my $bits1="THIRTY_TWO_BIT ";
my $bits2="SIXTY_FOUR_BIT ";
-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o";
+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:e_padlock-x86.o";
my $x86_elf_asm="$x86_asm:elf";
-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o";
-my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o:void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o:void";
-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o::::::::::::void";
-my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o:void";
-my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::";
-my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::";
-my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o";
-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
-my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32";
-my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64";
-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";
-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o:::::::";
-my $no_asm="::::::::::::::void";
+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o";
+my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
+my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
+my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
+my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
+my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
+my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:";
+my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
+my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
+my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
+my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
+my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
+my $no_asm=":::::::::::::::void";
# As for $BSDthreads. Idea is to maintain "collective" set of flags,
# which would cover all BSD flavors. -pthread applies to them all,
# seems to be sufficient?
my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT";
-#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib
+#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib
my %table=(
# File 'TABLE' (created by 'make TABLE') contains the data from this list,
"debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -mcpu=pentiumpro -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn",
"debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 -Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o:e_padlock-x86.o:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"debug-linux-generic32","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"debug-linux-x86_64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -m64 -DL_ENDIAN -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
"hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1::pa-risc2.o:::::::::::::void:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
+"hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1::pa-risc2.o::::::::::::::void:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
# More attempts at unified 10.X and 11.X targets for HP C compiler.
#
# Kevin Steves <ks@hp.se>
"hpux-parisc-cc","cc:+O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT::-Wl,+s -ldld:MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:${no_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux-parisc1_1-cc","cc:+DA1.1 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT::-Wl,+s -ldld:MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:${parisc11_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa1.1",
-"hpux-parisc2-cc","cc:+DA2.0 +DS2.0 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2.o:::::::::::::void:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"hpux-parisc2-cc","cc:+DA2.0 +DS2.0 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2.o::::::::::::::void:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"hpux64-parisc2-cc","cc:+DD64 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:${parisc20_asm}:dlfcn:hpux-shared:+Z:+DD64 -b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
# HP/UX IA-64 targets
# Visual C targets
#
# Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64
-"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o:ias:win32",
+"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32",
"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32",
-"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o:ias:win32",
+"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32",
"debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32",
# x86 Win32 target defaults to ANSI API, if you want UNICODE, complement
# 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE'
my $idx_wp_obj = $idx++;
my $idx_cmll_obj = $idx++;
my $idx_modes_obj = $idx++;
+my $idx_engines_obj = $idx++;
my $idx_perlasm_scheme = $idx++;
my $idx_dso_scheme = $idx++;
my $idx_shared_target = $idx++;
my $wp_obj = $fields[$idx_wp_obj];
my $cmll_obj = $fields[$idx_cmll_obj];
my $modes_obj = $fields[$idx_modes_obj];
+my $engines_obj = $fields[$idx_engines_obj];
my $perlasm_scheme = $fields[$idx_perlasm_scheme];
my $dso_scheme = $fields[$idx_dso_scheme];
my $shared_target = $fields[$idx_shared_target];
{
$cpuid_obj=$bn_obj=
$des_obj=$aes_obj=$bf_obj=$cast_obj=$rc4_obj=$rc5_obj=$cmll_obj=
- $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj="";
+ $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj="";
$cflags=~s/\-D[BL]_ENDIAN// if ($fips);
$thread_cflags=~s/\-D[BL]_ENDIAN// if ($fips);
}
s/^WP_ASM_OBJ=.*$/WP_ASM_OBJ= $wp_obj/;
s/^CMLL_ENC=.*$/CMLL_ENC= $cmll_obj/;
s/^MODES_ASM_OBJ.=*$/MODES_ASM_OBJ= $modes_obj/;
+ s/^ENGINES_ASM_OBJ.=*$/ENGINES_ASM_OBJ= $engines_obj/;
s/^PERLASM_SCHEME=.*$/PERLASM_SCHEME= $perlasm_scheme/;
s/^PROCESSOR=.*/PROCESSOR= $processor/;
s/^ARFLAGS=.*/ARFLAGS= $arflags/;
print "RMD160_OBJ_ASM=$rmd160_obj\n";
print "CMLL_ENC =$cmll_obj\n";
print "MODES_OBJ =$modes_obj\n";
+print "ENGINES_OBJ =$engines_obj\n";
print "PROCESSOR =$processor\n";
print "RANLIB =$ranlib\n";
print "ARFLAGS =$arflags\n";
(my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags,
my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj,
my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj,
- my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj,my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag,
+ my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj,
+ my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag,
my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multilib)=
split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
\$wp_obj = $wp_obj
\$cmll_obj = $cmll_obj
\$modes_obj = $modes_obj
+\$engines_obj = $engines_obj
\$perlasm_scheme = $perlasm_scheme
\$dso_scheme = $dso_scheme
\$shared_target= $shared_target
WP_ASM_OBJ=
CMLL_ENC=
MODES_ASM_OBJ=
+ENGINES_ASM_OBJ=
PERLASM_SCHEME=
# KRB5 stuff
RMD160_ASM_OBJ='$(RMD160_ASM_OBJ)' \
WP_ASM_OBJ='$(WP_ASM_OBJ)' \
MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \
+ ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \
PERLASM_SCHEME='$(PERLASM_SCHEME)' \
FIPSLIBDIR='${FIPSLIBDIR}' \
FIPSCANLIB="$${FIPSCANLIB:-$(FIPSCANLIB)}" \
build_ssl:
@dir=ssl; target=all; $(BUILD_ONE_CMD)
build_engines:
- @dir=engines; target=all; $(BUILD_ONE_CMD)
+ @dir=engines; target=all; AS='$(CC) -c'; export AS; $(BUILD_ONE_CMD)
build_apps:
@dir=apps; target=all; $(BUILD_ONE_CMD)
build_tests:
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = win32
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = a.out
$dso_scheme = dlfcn
$shared_target= bsd-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= bsd-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = coff
$dso_scheme = dlfcn
$shared_target= cygwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = win32
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = a.out
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= reliantunix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = win32
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = win32
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = win32n
$dso_scheme = win32
$shared_target=
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = auto
$dso_scheme = win32
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = ias
$dso_scheme = win32
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = aix32
$dso_scheme = dlfcn
$shared_target= aix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = aix32
$dso_scheme = dlfcn
$shared_target= aix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = aix64
$dso_scheme = dlfcn
$shared_target= aix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = aix64
$dso_scheme = dlfcn
$shared_target= aix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-armv4.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = beos
$shared_target= beos-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = beos
$shared_target= beos-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = macosx
$dso_scheme = dlfcn
$shared_target= darwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = osx32
$dso_scheme = dlfcn
$shared_target= darwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = osx64
$dso_scheme = dlfcn
$shared_target= darwin-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = macosx
$dso_scheme = dlfcn
$shared_target= darwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= bsd-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= cygwin-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = win32n
$dso_scheme = win32
$shared_target=
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = auto
$dso_scheme = win32
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = ias
$dso_scheme = win32
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = macosx
$dso_scheme = dlfcn
$shared_target= darwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = osx32
$dso_scheme = dlfcn
$shared_target= darwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj =
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target=
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = win32
$shared_target= cygwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-parisc.o
+$engines_obj =
$perlasm_scheme = 32
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-parisc.o
+$engines_obj =
$perlasm_scheme = 32
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dl
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-parisc.o
+$engines_obj =
$perlasm_scheme = 64
$dso_scheme = dlfcn
$shared_target= hpux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= hpux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = o32
$dso_scheme = dlfcn
$shared_target= irix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = o32
$dso_scheme = dlfcn
$shared_target= irix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = n32
$dso_scheme = dlfcn
$shared_target= irix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = n32
$dso_scheme = dlfcn
$shared_target= irix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = 64
$dso_scheme = dlfcn
$shared_target= irix-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = 64
$dso_scheme = dlfcn
$shared_target= irix-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = a.out
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-armv4.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-ia64.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = linux32
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = linux64
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-s390x.o
+$engines_obj =
$perlasm_scheme = 31
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-s390x.o
+$engines_obj =
$perlasm_scheme = 64
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= linux-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = coff
$dso_scheme = win32
$shared_target= cygwin-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = mingw64
$dso_scheme = win32
$shared_target= cygwin-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= alpha-osf1-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= alpha-osf1-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= svr3-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= svr3-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj = ghash-sparcv9.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
+$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= solaris-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj = ghash-alpha.o
+$engines_obj =
$perlasm_scheme = void
$dso_scheme = dlfcn
$shared_target= alpha-osf1-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target= linux-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= svr5-shared
$wp_obj = wp_block.o wp-mmx.o
$cmll_obj = cmll-x86.o
$modes_obj = ghash-x86.o
+$engines_obj = e_padlock-x86.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= gnu-shared
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme = void
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
$wp_obj =
$cmll_obj =
$modes_obj =
+$engines_obj =
$perlasm_scheme =
$dso_scheme =
$shared_target=
sub ::dataseg
{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; }
+sub ::safeseh
+{ my $nm=shift;
+ push(@out,"IF \@Version GE 710\n");
+ push(@out,".SAFESEH ".&::LABEL($nm,$nmdecor.$nm)."\n");
+ push(@out,"ENDIF\n");
+}
+
1;
else { push(@out,"section\t.data align=4\n"); }
}
+sub ::safeseh
+{ my $nm=shift;
+ push(@out,"%if __NASM_VERSION_ID__ >= 0x02030000\n");
+ push(@out,"safeseh ".&::LABEL($nm,$nmdecor.$nm)."\n");
+ push(@out,"%endif\n");
+}
+
1;
$(MAKE) -e TOP=../.. DIR=$$i $$target ) || exit 1; \
done;
+ENGINES_ASM_OBJ=
+
PEX_LIBS=
EX_LIBS=
CFLAGS= $(INCLUDES) $(CFLAG)
+ASFLAGS= $(INCLUDES) $(ASFLAG)
GENERAL=Makefile engines.com install.com engine_vector.mar
TEST=
e_sureware.o \
e_ubsec.o \
e_padlock.o \
- e_capi.o
+ e_capi.o \
+ $(ENGINES_ASM_OBJ)
SRC= $(LIBSRC)
set -e; \
for l in $(LIBNAMES); do \
$(MAKE) -f ../Makefile.shared -e \
- LIBNAME=$$l LIBEXTRAS=e_$$l.o \
+ LIBNAME=$$l LIBEXTRAS="e_$$l*.o" \
LIBDEPS='-L.. -lcrypto $(EX_LIBS)' \
link_o.$(SHLIB_TARGET); \
done; \
fi; \
touch lib
+e_padlock-x86.s: asm/e_padlock-x86.pl
+ $(PERL) asm/e_padlock-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
+e_padlock-x86_64.s: asm/e_padlock-x86_64.pl
+ $(PERL) asm/e_padlock-x86_64.pl $(PERLASM_SCHEME) > $@
+
subdirs:
echo $(EDIRS)
@target=all; $(RECURSIVE_MAKE)
--- /dev/null
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# September 2011
+#
+# Assembler helpers for Padlock engine. Compared to original engine
+# version relying on inline assembler and compiled with gcc 3.4.6 it
+# was measured to provide ~100% improvement on misaligned data in ECB
+# mode and ~75% in CBC mode. For aligned data improvement can be
+# observed for short inputs only, e.g. 45% for 64-byte messages in
+# ECB mode, 20% in CBC. Difference in performance for aligned vs.
+# misaligned data depends on misalignment and is either ~1.8x or
+# ~2.9x. These are approximately same factors as for hardware support,
+# so there is little reason to rely on the latter. It might actually
+# hurt performance in mixture of aligned and misaligned buffers,
+# because a) if you choose to flip 'align' flag on per-buffer basis,
+# then you'd have to reload key context; b) if you choose to set
+# 'align' flag permanently, it limits performance for aligned data
+# to ~1/2. All results were collected on 1.5GHz C7.
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../crypto/perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],$0);
+
+$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
+
+$ctx="edx";
+$out="edi";
+$inp="esi";
+$len="ecx";
+$chunk="ebx";
+
+&function_begin_B("padlock_capability");
+ &push ("ebx");
+ &pushf ();
+ &pop ("eax");
+ &mov ("ecx","eax");
+ &xor ("eax",1<<21);
+ &push ("eax");
+ &popf ();
+ &pushf ();
+ &pop ("eax");
+ &xor ("ecx","eax");
+ &xor ("eax","eax");
+ &bt ("ecx",21);
+ &jnc (&label("noluck"));
+ &cpuid ();
+ &xor ("eax","eax");
+ &cmp ("ebx","0x".unpack("H*",'tneC'));
+ &jne (&label("noluck"));
+ &cmp ("edx","0x".unpack("H*",'Hrua'));
+ &jne (&label("noluck"));
+ &cmp ("ecx","0x".unpack("H*",'slua'));
+ &jne (&label("noluck"));
+ &mov ("eax",0xC0000000);
+ &cpuid ();
+ &mov ("edx","eax");
+ &xor ("eax","eax");
+ &cmp ("edx",0xC0000001);
+ &jb (&label("noluck"));
+ &mov ("eax",1);
+ &cpuid ();
+ &or ("eax",0x0f);
+ &xor ("ebx","ebx");
+ &and ("eax",0x0fff);
+ &cmp ("eax",0x06ff); # check for Nano
+ &sete ("bl");
+ &mov ("eax",0xC0000001);
+ &push ("ebx");
+ &cpuid ();
+ &pop ("ebx");
+ &mov ("eax","edx");
+ &shl ("ebx",4); # bit#4 denotes Nano
+ &and ("eax",0xffffffef);
+ &or ("eax","ebx")
+&set_label("noluck");
+ &pop ("ebx");
+ &ret ();
+&function_end_B("padlock_capability")
+
+&function_begin_B("padlock_key_bswap");
+ &mov ("edx",&wparam(0));
+ &mov ("ecx",&DWP(240,"edx"));
+&set_label("bswap_loop");
+ &mov ("eax",&DWP(0,"edx"));
+ &bswap ("eax");
+ &mov (&DWP(0,"edx"),"eax");
+ &lea ("edx",&DWP(4,"edx"));
+ &sub ("ecx",1);
+ &jnz (&label("bswap_loop"));
+ &ret ();
+&function_end_B("padlock_key_bswap");
+
+# This is heuristic key context tracing. At first one
+# believes that one should use atomic swap instructions,
+# but it's not actually necessary. Point is that if
+# padlock_saved_context was changed by another thread
+# after we've read it and before we compare it with ctx,
+# our key *shall* be reloaded upon thread context switch
+# and we are therefore set in either case...
+&static_label("padlock_saved_context");
+
+&function_begin_B("padlock_verify_context");
+ &mov ($ctx,&wparam(0));
+ &lea ("eax",&DWP("padlock_saved_context-".&label("verify_pic_point")));
+ &pushf ();
+ &call ("_padlock_verify_ctx");
+&set_label("verify_pic_point");
+ &lea ("esp",&DWP(4,"esp"));
+ &ret ();
+&function_end_B("padlock_verify_context");
+
+&function_begin_B("_padlock_verify_ctx");
+ &add ("eax",&DWP(0,"esp")); # &padlock_saved_context
+ &bt (&DWP(4,"esp"),30); # eflags
+ &jnc (&label("verified"));
+ &cmp ($ctx,&DWP(0,"eax"));
+ &je (&label("verified"));
+ &pushf ();
+ &popf ();
+&set_label("verified");
+ &mov (&DWP(0,"eax"),$ctx);
+ &ret ();
+&function_end_B("_padlock_verify_ctx");
+
+&function_begin_B("padlock_reload_key");
+ &pushf ();
+ &popf ();
+ &ret ();
+&function_end_B("padlock_reload_key");
+
+&function_begin_B("padlock_aes_block");
+ &push ("edi");
+ &push ("esi");
+ &push ("ebx");
+ &mov ($out,&wparam(0)); # must be 16-byte aligned
+ &mov ($inp,&wparam(1)); # must be 16-byte aligned
+ &mov ($ctx,&wparam(2));
+ &mov ($len,1);
+ &lea ("ebx",&DWP(32,$ctx)); # key
+ &lea ($ctx,&DWP(16,$ctx)); # control word
+ &data_byte(0xf3,0x0f,0xa7,0xc8); # rep xcryptecb
+ &pop ("ebx");
+ &pop ("esi");
+ &pop ("edi");
+ &ret ();
+&function_end_B("padlock_aes_block");
+
+sub generate_mode {
+my ($mode,$opcode) = @_;
+# int padlock_$mode_encrypt(void *out, const void *inp,
+# struct padlock_cipher_data *ctx, size_t len);
+&function_begin("padlock_${mode}_encrypt");
+ &mov ($out,&wparam(0));
+ &mov ($inp,&wparam(1));
+ &mov ($ctx,&wparam(2));
+ &mov ($len,&wparam(3));
+ &test ($ctx,15);
+ &jnz (&label("${mode}_abort"));
+ &test ($len,15);
+ &jnz (&label("${mode}_abort"));
+ &lea ("eax",&DWP("padlock_saved_context-".&label("${mode}_pic_point")));
+ &pushf ();
+ &cld ();
+ &call ("_padlock_verify_ctx");
+&set_label("${mode}_pic_point");
+ &lea ($ctx,&DWP(16,$ctx)); # control word
+ &xor ("eax","eax");
+ if ($mode eq "ctr16") {
+ &movdqa ("xmm0",&QWP(-16,$ctx));# load iv
+ } else {
+ &xor ("ebx","ebx");
+ &test (&DWP(0,$ctx),1<<5); # align bit in control word
+ &jnz (&label("${mode}_aligned"));
+ &test ($out,0x0f);
+ &setz ("al"); # !out_misaligned
+ &test ($inp,0x0f);
+ &setz ("bl"); # !inp_misaligned
+ &test ("eax","ebx");
+ &jnz (&label("${mode}_aligned"));
+ &neg ("eax");
+ }
+ &mov ($chunk,$PADLOCK_CHUNK);
+ ¬ ("eax"); # out_misaligned?-1:0
+ &lea ("ebp",&DWP(-24,"esp"));
+ &cmp ($len,$chunk);
+ &cmovc ($chunk,$len); # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
+ &and ("eax",$chunk); # out_misaligned?chunk:0
+ &mov ($chunk,$len);
+ &neg ("eax");
+ &and ($chunk,$PADLOCK_CHUNK-1); # chunk=len%PADLOCK_CHUNK
+ &lea ("esp",&DWP(0,"eax","ebp")); # alloca
+ &and ("esp",-16);
+ &jmp (&label("${mode}_loop"));
+
+&set_label("${mode}_loop",16);
+ &mov (&DWP(0,"ebp"),$out); # save parameters
+ &mov (&DWP(4,"ebp"),$inp);
+ &mov (&DWP(8,"ebp"),$len);
+ &mov ($len,$chunk);
+ &mov (&DWP(12,"ebp"),$chunk); # chunk
+ if ($mode eq "ctr16") {
+ &pextrw ("ecx","xmm0",7); # borrow $len
+ &mov ($inp,1);
+ &xor ($out,$out);
+ &xchg ("ch","cl");
+&set_label("${mode}_prepare");
+ &movdqa (&QWP(0,"esp",$out),"xmm0");
+ &lea ("eax",&DWP(0,"ecx",$inp));
+ &xchg ("ah","al");
+ &lea ($out,&DWP(16,$out));
+ &pinsrw ("xmm0","eax",7);
+ &lea ($inp,&DWP(1,$inp));
+ &cmp ($out,$chunk);
+ &jb (&label("${mode}_prepare"));
+
+ &lea ($inp,&DWP(0,"esp"));
+ &lea ($out,&DWP(0,"esp"));
+ &mov ($len,$chunk);
+ } else {
+ &test ($out,0x0f); # out_misaligned
+ &cmovnz ($out,"esp");
+ &test ($inp,0x0f); # inp_misaligned
+ &jz (&label("${mode}_inp_aligned"));
+ &shr ($len,2);
+ &data_byte(0xf3,0xa5); # rep movsl
+ &sub ($out,$chunk);
+ &mov ($len,$chunk);
+ &mov ($inp,$out);
+&set_label("${mode}_inp_aligned");
+ }
+ &lea ("eax",&DWP(-16,$ctx)); # ivp
+ &lea ("ebx",&DWP(16,$ctx)); # key
+ &shr ($len,4); # len/=AES_BLOCK_SIZE
+ &data_byte(0xf3,0x0f,0xa7,$opcode); # rep xcrypt*
+ if ($mode !~ /ecb|ctr/) {
+ &movdqa ("xmm0",&QWP(0,"eax"));
+ &movdqa (&DWP(-16,$ctx),"xmm0"); # copy [or refresh] iv
+ }
+ &mov ($out,&DWP(0,"ebp")); # restore parameters
+ &mov ($chunk,&DWP(12,"ebp"));
+ if ($mode eq "ctr16") {
+ &mov ($inp,&DWP(4,"ebp"));
+ &xor ($len,$len);
+&set_label("${mode}_xor");
+ &movdqu ("xmm1",&QWP(0,$inp,$len));
+ &lea ($len,&DWP(16,$len));
+ &pxor ("xmm1",&QWP(-16,"esp",$len));
+ &movdqu (&QWP(-16,$out,$len),"xmm1");
+ &cmp ($len,$chunk);
+ &jb (&label("${mode}_xor"));
+ } else {
+ &test ($out,0x0f);
+ &jz (&label("${mode}_out_aligned"));
+ &mov ($len,$chunk);
+ &shr ($len,2);
+ &lea ($inp,&DWP(0,"esp"));
+ &data_byte(0xf3,0xa5); # rep movsl
+ &sub ($out,$chunk);
+&set_label("${mode}_out_aligned");
+ &mov ($inp,&DWP(4,"ebp"));
+ }
+ &mov ($len,&DWP(8,"ebp"));
+ &add ($out,$chunk);
+ &add ($inp,$chunk);
+ &sub ($len,$chunk);
+ &mov ($chunk,$PADLOCK_CHUNK);
+ &jnz (&label("${mode}_loop"));
+ if ($mode eq "ctr16") {
+ &movdqa (&QWP(-16,$ctx),"xmm0"); # write out iv
+ &pxor ("xmm0","xmm0");
+ &pxor ("xmm1","xmm1");
+ } else {
+ &test ($out,0x0f); # out_misaligned
+ &jz (&label("${mode}_done"));
+ }
+ &mov ($len,"ebp");
+ &mov ($out,"esp");
+ &sub ($len,"esp");
+ &xor ("eax","eax");
+ &shr ($len,2);
+ &data_byte(0xf3,0xab); # rep stosl
+&set_label("${mode}_done");
+ &lea ("esp",&DWP(24,"ebp"));
+ if ($mode ne "ctr16") {
+ &jmp (&label("${mode}_exit"));
+
+&set_label("${mode}_aligned",16);
+ &lea ("eax",&DWP(-16,$ctx)); # ivp
+ &lea ("ebx",&DWP(16,$ctx)); # key
+ &shr ($len,4); # len/=AES_BLOCK_SIZE
+ &data_byte(0xf3,0x0f,0xa7,$opcode); # rep xcrypt*
+ if ($mode ne "ecb") {
+ &movdqa ("xmm0",&QWP(0,"eax"));
+ &movdqa (&DWP(-16,$ctx),"xmm0"); # copy [or refresh] iv
+ }
+&set_label("${mode}_exit"); }
+ &mov ("eax",1);
+ &lea ("esp",&DWP(4,"esp")); # popf
+&set_label("${mode}_abort");
+&function_end("padlock_${mode}_encrypt");
+}
+
+&generate_mode("ecb",0xc8);
+&generate_mode("cbc",0xd0);
+&generate_mode("cfb",0xe0);
+&generate_mode("ofb",0xe8);
+&generate_mode("ctr16",0xc8); # yes, it implements own ctr with ecb opcode,
+ # because hardware ctr was introduced later
+ # and even has errata on certain CPU stepping.
+ # own implementation *always* works...
+
+&function_begin_B("padlock_xstore");
+ &push ("edi");
+ &mov ("edi",&wparam(0));
+ &mov ("edx",&wparam(1));
+ &data_byte(0x0f,0xa7,0xc0); # xstore
+ &pop ("edi");
+ &ret ();
+&function_end_B("padlock_xstore");
+
+&function_begin_B("_win32_segv_handler");
+ &mov ("eax",1); # ExceptionContinueSearch
+ &mov ("edx",&wparam(0)); # *ExceptionRecord
+ &mov ("ecx",&wparam(2)); # *ContextRecord
+ &cmp (&DWP(0,"edx"),0xC0000005) # ExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION
+ &jne (&label("ret"));
+ &add (&DWP(184,"ecx"),4); # skip over rep sha*
+ &mov ("eax",0); # ExceptionContinueExecution
+&set_label("ret");
+ &ret ();
+&function_end_B("_win32_segv_handler");
+&safeseh("_win32_segv_handler") if ($::win32);
+
+&function_begin_B("padlock_sha1_oneshot");
+ &push ("edi");
+ &push ("esi");
+ &xor ("eax","eax");
+ if ($::win32 or $::coff) {
+ &push (&::islabel("_win32_segv_handler"));
+ &data_byte(0x64,0xff,0x30); # push %fs:(%eax)
+ &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax)
+ }
+ &mov ("edi",&wparam(0));
+ &mov ("esi",&wparam(1));
+ &mov ("ecx",&wparam(2));
+ &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1
+ if ($::win32 or $::coff) {
+ &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0
+ &lea ("esp",&DWP(4,"esp"));
+ }
+ &pop ("esi");
+ &pop ("edi");
+ &ret ();
+&function_end_B("padlock_sha1_oneshot");
+
+&function_begin_B("padlock_sha1");
+ &push ("edi");
+ &push ("esi");
+ &mov ("eax",-1);
+ &mov ("edi",&wparam(0));
+ &mov ("esi",&wparam(1));
+ &mov ("ecx",&wparam(2));
+ &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1
+ &pop ("esi");
+ &pop ("edi");
+ &ret ();
+&function_end_B("padlock_sha1");
+
+&function_begin_B("padlock_sha256_oneshot");
+ &push ("edi");
+ &push ("esi");
+ &xor ("eax","eax");
+ if ($::win32 or $::coff) {
+ &push (&::islabel("_win32_segv_handler"));
+ &data_byte(0x64,0xff,0x30); # push %fs:(%eax)
+ &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax)
+ }
+ &mov ("edi",&wparam(0));
+ &mov ("esi",&wparam(1));
+ &mov ("ecx",&wparam(2));
+ &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256
+ if ($::win32 or $::coff) {
+ &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0
+ &lea ("esp",&DWP(4,"esp"));
+ }
+ &pop ("esi");
+ &pop ("edi");
+ &ret ();
+&function_end_B("padlock_sha256_oneshot");
+
+&function_begin_B("padlock_sha256");
+ &push ("edi");
+ &push ("esi");
+ &mov ("eax",-1);
+ &mov ("edi",&wparam(0));
+ &mov ("esi",&wparam(1));
+ &mov ("ecx",&wparam(2));
+ &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256
+ &pop ("esi");
+ &pop ("edi");
+ &ret ();
+&function_end_B("padlock_sha256");
+
+&asciz ("VIA Padlock x86 module, CRYPTOGAMS by <appro\@openssl.org>");
+&align (16);
+
+&dataseg();
+# Essentially this variable belongs in thread local storage.
+# Having this variable global on the other hand can only cause
+# few bogus key reloads [if any at all on signle-CPU system],
+# so we accept the panalty...
+&set_label("padlock_saved_context",4);
+&data_word(0);
+
+&asm_finish();
--- /dev/null
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# September 2011
+#
+# Assembler helpers for Padlock engine.
+
+$flavour = shift;
+$output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output";
+
+$code=".text\n";
+
+$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
+
+$ctx="%rdx";
+$out="%rdi";
+$inp="%rsi";
+$len="%rcx";
+$chunk="%rbx";
+
+($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order
+
+$code.=<<___;
+.globl padlock_capability
+.type padlock_capability,\@abi-omnipotent
+.align 16
+padlock_capability:
+ mov %rbx,%r8
+ xor %eax,%eax
+ cpuid
+ xor %eax,%eax
+ cmp \$`"0x".unpack("H*",'tneC')`,%ebx
+ jne .Lnoluck
+ cmp \$`"0x".unpack("H*",'Hrua')`,%edx
+ jne .Lnoluck
+ cmp \$`"0x".unpack("H*",'slua')`,%ecx
+ jne .Lnoluck
+ mov \$0xC0000000,%eax
+ cpuid
+ mov %eax,%edx
+ xor %eax,%eax
+ cmp \$0xC0000001,%edx
+ jb .Lnoluck
+ mov \$0xC0000001,%eax
+ cpuid
+ mov %edx,%eax
+ and \$0xffffffef,%eax
+ or \$0x10,%eax # set Nano bit#4
+.Lnoluck:
+ mov %r8,%rbx
+ ret
+.size padlock_capability,.-padlock_capability
+
+.globl padlock_key_bswap
+.type padlock_key_bswap,\@abi-omnipotent,0
+.align 16
+padlock_key_bswap:
+ mov 240($arg1),%edx
+.Lbswap_loop:
+ mov ($arg1),%eax
+ bswap %eax
+ mov %eax,($arg1)
+ lea 4($arg1),$arg1
+ sub \$1,%edx
+ jnz .Lbswap_loop
+ ret
+.size padlock_key_bswap,.-padlock_key_bswap
+
+.globl padlock_verify_context
+.type padlock_verify_context,\@abi-omnipotent
+.align 16
+padlock_verify_context:
+ mov $arg1,$ctx
+ pushf
+ lea .Lpadlock_saved_context(%rip),%rax
+ call _padlock_verify_ctx
+ lea 8(%rsp),%rsp
+ ret
+.size padlock_verify_context,.-padlock_verify_context
+
+.type _padlock_verify_ctx,\@abi-omnipotent
+.align 16
+_padlock_verify_ctx:
+ mov 8(%rsp),%r8
+ bt \$30,%r8
+ jnc .Lverified
+ cmp (%rax),$ctx
+ je .Lverified
+ pushf
+ popf
+.Lverified:
+ mov $ctx,(%rax)
+ ret
+.size _padlock_verify_ctx,.-_padlock_verify_ctx
+
+.globl padlock_reload_key
+.type padlock_reload_key,\@abi-omnipotent
+.align 16
+padlock_reload_key:
+ pushf
+ popf
+ ret
+.size padlock_reload_key,.-padlock_reload_key
+
+.globl padlock_aes_block
+.type padlock_aes_block,\@function,3
+.align 16
+padlock_aes_block:
+ mov %rbx,%r8
+ mov \$1,$len
+ lea 32($ctx),%rbx # key
+ lea 16($ctx),$ctx # control word
+ .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
+ mov %r8,%rbx
+ ret
+.size padlock_aes_block,.-padlock_aes_block
+
+.globl padlock_xstore
+.type padlock_xstore,\@function,2
+.align 16
+padlock_xstore:
+ mov %esi,%edx
+ .byte 0x0f,0xa7,0xc0 # xstore
+ ret
+.size padlock_xstore,.-padlock_xstore
+
+.globl padlock_sha1_oneshot
+.type padlock_sha1_oneshot,\@function,3
+.align 16
+padlock_sha1_oneshot:
+ xor %rax,%rax
+ mov %rdx,%rcx
+ .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
+ ret
+.size padlock_sha1_oneshot,.-padlock_sha1_oneshot
+
+.globl padlock_sha1
+.type padlock_sha1,\@function,3
+.align 16
+padlock_sha1:
+ mov \$-1,%rax
+ mov %rdx,%rcx
+ .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
+ ret
+.size padlock_sha1,.-padlock_sha1
+
+.globl padlock_sha256_oneshot
+.type padlock_sha256_oneshot,\@function,3
+.align 16
+padlock_sha256_oneshot:
+ xor %rax,%rax
+ mov %rdx,%rcx
+ .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
+ ret
+.size padlock_sha256_oneshot,.-padlock_sha256_oneshot
+
+.globl padlock_sha256
+.type padlock_sha256,\@function,3
+.align 16
+padlock_sha256:
+ mov \$-1,%rax
+ mov %rdx,%rcx
+ .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
+ ret
+.size padlock_sha256,.-padlock_sha256
+___
+
+sub generate_mode {
+my ($mode,$opcode) = @_;
+# int padlock_$mode_encrypt(void *out, const void *inp,
+# struct padlock_cipher_data *ctx, size_t len);
+$code.=<<___;
+.globl padlock_${mode}_encrypt
+.type padlock_${mode}_encrypt,\@function,4
+.align 16
+padlock_${mode}_encrypt:
+ push %rbp
+ push %rbx
+
+ xor %eax,%eax
+ test \$15,$ctx
+ jnz .L${mode}_abort
+ test \$15,$len
+ jnz .L${mode}_abort
+ lea .Lpadlock_saved_context(%rip),%rax
+ pushf
+ cld
+ call _padlock_verify_ctx
+ lea 16($ctx),$ctx # control word
+ xor %eax,%eax
+ xor %ebx,%ebx
+ test \$`1<<5`,($ctx) # align bit in control word
+ test \$0x0f,$out
+ setz %al # !out_misaligned
+ test \$0x0f,$inp
+ setz %bl # !inp_misaligned
+ test %ebx,%eax
+ jnz .L${mode}_aligned
+ neg %rax
+ mov \$$PADLOCK_CHUNK,$chunk
+ not %rax # out_misaligned?-1:0
+ lea (%rsp),%rbp
+ cmp $chunk,$len
+ cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
+ and $chunk,%rax # out_misaligned?chunk:0
+ mov $len,$chunk
+ neg %rax
+ and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
+ lea (%rax,%rbp),%rsp
+ jmp .L${mode}_loop
+.align 16
+.L${mode}_loop:
+ mov $out,%r8 # save parameters
+ mov $inp,%r9
+ mov $len,%r10
+ mov $chunk,$len
+ mov $chunk,%r11
+ test \$0x0f,$out # out_misaligned
+ cmovnz %rsp,$out
+ test \$0x0f,$inp # inp_misaligned
+ jz .L${mode}_inp_aligned
+ shr \$3,$len
+ .byte 0xf3,0x48,0xa5 # rep movsq
+ sub $chunk,$out
+ mov $chunk,$len
+ mov $out,$inp
+.L${mode}_inp_aligned:
+ lea -16($ctx),%rax # ivp
+ lea 16($ctx),%rbx # key
+ shr \$4,$len
+ .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
+___
+$code.=<<___ if ($mode !~ /ecb|ctr/);
+ movdqa (%rax),%xmm0
+ movdqa %xmm0,-16($ctx) # copy [or refresh] iv
+___
+$code.=<<___;
+ mov %r8,$out # restore paramters
+ mov %r11,$chunk
+ test \$0x0f,$out
+ jz .L${mode}_out_aligned
+ mov $chunk,$len
+ shr \$3,$len
+ lea (%rsp),$inp
+ .byte 0xf3,0x48,0xa5 # rep movsq
+ sub $chunk,$out
+.L${mode}_out_aligned:
+ mov %r9,$inp
+ mov %r10,$len
+ add $chunk,$out
+ add $chunk,$inp
+ sub $chunk,$len
+ mov \$$PADLOCK_CHUNK,$chunk
+ jnz .L${mode}_loop
+
+ test \$0x0f,$out
+ jz .L${mode}_done
+
+ mov %rbp,$len
+ mov %rsp,$out
+ sub %rsp,$len
+ xor %rax,%rax
+ shr \$3,$len
+ .byte 0xf3,0x48,0xab # rep stosq
+.L${mode}_done:
+ lea (%rbp),%rsp
+ jmp .L${mode}_exit
+
+.align 16
+.L${mode}_aligned:
+ lea -16($ctx),%rax # ivp
+ lea 16($ctx),%rbx # key
+ shr \$4,$len # len/=AES_BLOCK_SIZE
+ .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
+___
+$code.=<<___ if ($mode !~ /ecb|ctr/);
+ movdqa (%rax),%xmm0
+ movdqa %xmm0,-16($ctx) # copy [or refresh] iv
+___
+$code.=<<___;
+.L${mode}_exit:
+ mov \$1,%eax
+ lea 8(%rsp),%rsp
+.L${mode}_abort:
+ pop %rbx
+ pop %rbp
+ ret
+.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
+___
+}
+
+&generate_mode("ecb",0xc8);
+&generate_mode("cbc",0xd0);
+&generate_mode("cfb",0xe0);
+&generate_mode("ofb",0xe8);
+&generate_mode("ctr16",0xd8);
+
+$code.=<<___;
+.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
+.align 16
+.data
+.align 8
+.Lpadlock_saved_context:
+ .quad 0
+___
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+print $code;
+
+close STDOUT;
/* VIA PadLock AES is available *ONLY* on some x86 CPUs.
Not only that it doesn't exist elsewhere, but it
- even can't be compiled on other platforms!
+ even can't be compiled on other platforms! */
- In addition, because of the heavy use of inline assembler,
- compiler choice is limited to GCC and Microsoft C. */
#undef COMPILE_HW_PADLOCK
-#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
-# if (defined(__GNUC__) && __GNUC__>=2 && \
- (defined(__i386__) || defined(__i386) || \
- defined(__x86_64__) || defined(__x86_64)) \
- ) || \
- (defined(_MSC_VER) && defined(_M_IX86))
+#if !defined(I386_ONLY) && !defined(OPENSSL_NO_ASM)
+# if defined(__i386__) || defined(__i386) || \
+ defined(__x86_64__) || defined(__x86_64) || \
+ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
+ defined(__INTEL__)
# define COMPILE_HW_PADLOCK
# ifdef OPENSSL_NO_DYNAMIC_ENGINE
static ENGINE *ENGINE_padlock (void);
#endif
#ifdef COMPILE_HW_PADLOCK
-/* We do these includes here to avoid header problems on platforms that
- do not have the VIA padlock anyway... */
-#include <stdlib.h>
-#ifdef _WIN32
-# include <malloc.h>
-# ifndef alloca
-# define alloca _alloca
-# endif
-#elif defined(__GNUC__)
-# ifndef alloca
-# define alloca(s) __builtin_alloca((s))
-# endif
-#endif
/* Function for ENGINE detection and control */
static int padlock_available(void);
/* Available features */
static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
static int padlock_use_rng = 0; /* Random Number Generator */
-#ifndef OPENSSL_NO_AES
-static int padlock_aes_align_required = 1;
-#endif
/* ===== Engine "management" functions ===== */
} cword; /* Control word */
AES_KEY ks; /* Encryption key */
};
-
-/*
- * Essentially this variable belongs in thread local storage.
- * Having this variable global on the other hand can only cause
- * few bogus key reloads [if any at all on single-CPU system],
- * so we accept the penatly...
- */
-static volatile struct padlock_cipher_data *padlock_saved_context;
-#endif
-
-/*
- * =======================================================
- * Inline assembler section(s).
- * =======================================================
- * Order of arguments is chosen to facilitate Windows port
- * using __fastcall calling convention. If you wish to add
- * more routines, keep in mind that first __fastcall
- * argument is passed in %ecx and second - in %edx.
- * =======================================================
- */
-#if defined(__GNUC__) && __GNUC__>=2
-#if defined(__i386__) || defined(__i386)
-/*
- * As for excessive "push %ebx"/"pop %ebx" found all over.
- * When generating position-independent code GCC won't let
- * us use "b" in assembler templates nor even respect "ebx"
- * in "clobber description." Therefore the trouble...
- */
-
-/* Helper function - check if a CPUID instruction
- is available on this CPU */
-static int
-padlock_insn_cpuid_available(void)
-{
- int result = -1;
-
- /* We're checking if the bit #21 of EFLAGS
- can be toggled. If yes = CPUID is available. */
- asm volatile (
- "pushf\n"
- "popl %%eax\n"
- "xorl $0x200000, %%eax\n"
- "movl %%eax, %%ecx\n"
- "andl $0x200000, %%ecx\n"
- "pushl %%eax\n"
- "popf\n"
- "pushf\n"
- "popl %%eax\n"
- "andl $0x200000, %%eax\n"
- "xorl %%eax, %%ecx\n"
- "movl %%ecx, %0\n"
- : "=r" (result) : : "eax", "ecx");
-
- return (result == 0);
-}
-
-/* Load supported features of the CPU to see if
- the PadLock is available. */
-static int
-padlock_available(void)
-{
- char vendor_string[16];
- unsigned int eax, edx;
-
- /* First check if the CPUID instruction is available at all... */
- if (! padlock_insn_cpuid_available())
- return 0;
-
- /* Are we running on the Centaur (VIA) CPU? */
- eax = 0x00000000;
- vendor_string[12] = 0;
- asm volatile (
- "pushl %%ebx\n"
- "cpuid\n"
- "movl %%ebx,(%%edi)\n"
- "movl %%edx,4(%%edi)\n"
- "movl %%ecx,8(%%edi)\n"
- "popl %%ebx"
- : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
- if (strcmp(vendor_string, "CentaurHauls") != 0)
- return 0;
-
- /* Check for Centaur Extended Feature Flags presence */
- eax = 0xC0000000;
- asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
- : "+a"(eax) : : "ecx", "edx");
- if (eax < 0xC0000001)
- return 0;
-
- /* Read the Centaur Extended Feature Flags */
- eax = 0xC0000001;
- asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
- : "+a"(eax), "=d"(edx) : : "ecx");
-
- /* Fill up some flags */
- padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
- padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
-
- return padlock_use_ace + padlock_use_rng;
-}
-
-/* Force key reload from memory to the CPU microcode.
- Loading EFLAGS from the stack clears EFLAGS[30]
- which does the trick. */
-static inline void
-padlock_reload_key(void)
-{
- asm volatile ("pushfl; popfl");
-}
-
-#ifndef OPENSSL_NO_AES
-/*
- * This is heuristic key context tracing. At first one
- * believes that one should use atomic swap instructions,
- * but it's not actually necessary. Point is that if
- * padlock_saved_context was changed by another thread
- * after we've read it and before we compare it with cdata,
- * our key *shall* be reloaded upon thread context switch
- * and we are therefore set in either case...
- */
-static inline void
-padlock_verify_context(struct padlock_cipher_data *cdata)
-{
- asm volatile (
- "pushfl\n"
-" btl $30,(%%esp)\n"
-" jnc 1f\n"
-" cmpl %2,%1\n"
-" je 1f\n"
-" popfl\n"
-" subl $4,%%esp\n"
-"1: addl $4,%%esp\n"
-" movl %2,%0"
- :"+m"(padlock_saved_context)
- : "r"(padlock_saved_context), "r"(cdata) : "cc");
-}
-
-/* Template for padlock_xcrypt_* modes */
-/* BIG FAT WARNING:
- * The offsets used with 'leal' instructions
- * describe items of the 'padlock_cipher_data'
- * structure.
- */
-#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
-static inline void *name(size_t cnt, \
- struct padlock_cipher_data *cdata, \
- void *out, const void *inp) \
-{ void *iv; \
- asm volatile ( "pushl %%ebx\n" \
- " leal 16(%0),%%edx\n" \
- " leal 32(%0),%%ebx\n" \
- rep_xcrypt "\n" \
- " popl %%ebx" \
- : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
- : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
- : "edx", "cc", "memory"); \
- return iv; \
-}
#endif
-#elif defined(__x86_64__) || defined(__x86_64)
+/* Interface to assembler module */
+unsigned int padlock_capability();
+void padlock_key_bswap(AES_KEY *key);
+void padlock_verify_context(struct padlock_cipher_data *ctx);
+void padlock_reload_key();
+void padlock_aes_block(void *out, const void *inp,
+ struct padlock_cipher_data *ctx);
+int padlock_ecb_encrypt(void *out, const void *inp,
+ struct padlock_cipher_data *ctx, size_t len);
+int padlock_cbc_encrypt(void *out, const void *inp,
+ struct padlock_cipher_data *ctx, size_t len);
+int padlock_cfb_encrypt(void *out, const void *inp,
+ struct padlock_cipher_data *ctx, size_t len);
+int padlock_ofb_encrypt(void *out, const void *inp,
+ struct padlock_cipher_data *ctx, size_t len);
+int padlock_ctr32_encrypt(void *out, const void *inp,
+ struct padlock_cipher_data *ctx, size_t len);
+int padlock_xstore(void *out,int edx);
+void padlock_sha1_oneshot(void *ctx,const void *inp,size_t len);
+void padlock_sha1(void *ctx,const void *inp,size_t len);
+void padlock_sha256_oneshot(void *ctx,const void *inp,size_t len);
+void padlock_sha256(void *ctx,const void *inp,size_t len);
/* Load supported features of the CPU to see if
the PadLock is available. */
static int
padlock_available(void)
{
- char vendor_string[16];
- unsigned int eax, edx;
- size_t scratch;
-
- /* Are we running on the Centaur (VIA) CPU? */
- eax = 0x00000000;
- vendor_string[12] = 0;
- asm volatile (
- "movq %%rbx,%1\n"
- "cpuid\n"
- "movl %%ebx,(%2)\n"
- "movl %%edx,4(%2)\n"
- "movl %%ecx,8(%2)\n"
- "movq %1,%%rbx"
- : "+a"(eax), "=&r"(scratch) : "r"(vendor_string) : "rcx", "rdx");
- if (strcmp(vendor_string, "CentaurHauls") != 0)
- return 0;
-
- /* Check for Centaur Extended Feature Flags presence */
- eax = 0xC0000000;
- asm volatile ("movq %%rbx,%1; cpuid; movq %1,%%rbx"
- : "+a"(eax), "=&r"(scratch) : : "rcx", "rdx");
- if (eax < 0xC0000001)
- return 0;
-
- /* Read the Centaur Extended Feature Flags */
- eax = 0xC0000001;
- asm volatile ("movq %%rbx,%2; cpuid; movq %2,%%rbx"
- : "+a"(eax), "=d"(edx), "=&r"(scratch) : : "rcx");
+ unsigned int edx = padlock_capability();
/* Fill up some flags */
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
return padlock_use_ace + padlock_use_rng;
}
-/* Force key reload from memory to the CPU microcode.
- Loading EFLAGS from the stack clears EFLAGS[30]
- which does the trick. */
-static inline void
-padlock_reload_key(void)
-{
- asm volatile ("pushfq; popfq");
-}
-
-#ifndef OPENSSL_NO_AES
-/*
- * This is heuristic key context tracing. At first one
- * believes that one should use atomic swap instructions,
- * but it's not actually necessary. Point is that if
- * padlock_saved_context was changed by another thread
- * after we've read it and before we compare it with cdata,
- * our key *shall* be reloaded upon thread context switch
- * and we are therefore set in either case...
- */
-static inline void
-padlock_verify_context(struct padlock_cipher_data *cdata)
-{
- asm volatile (
- "pushfq\n"
-" btl $30,(%%rsp)\n"
-" jnc 1f\n"
-" cmpq %2,%1\n"
-" je 1f\n"
-" popfq\n"
-" subq $8,%%rsp\n"
-"1: addq $8,%%rsp\n"
-" movq %2,%0"
- :"+m"(padlock_saved_context)
- : "r"(padlock_saved_context), "r"(cdata) : "cc");
-}
-
-/* Template for padlock_xcrypt_* modes */
-/* BIG FAT WARNING:
- * The offsets used with 'leal' instructions
- * describe items of the 'padlock_cipher_data'
- * structure.
- */
-#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
-static inline void *name(size_t cnt, \
- struct padlock_cipher_data *cdata, \
- void *out, const void *inp) \
-{ void *iv; \
- size_t scratch; \
- asm volatile ( "movq %%rbx,%4\n" \
- " leaq 16(%0),%%rdx\n" \
- " leaq 32(%0),%%rbx\n" \
- rep_xcrypt "\n" \
- " movq %4,%%rbx" \
- : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp), "=&r"(scratch) \
- : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
- : "rdx", "cc", "memory"); \
- return iv; \
-}
-#endif
-
-#endif /* cpu */
-
-#ifndef OPENSSL_NO_AES
-/* Generate all functions with appropriate opcodes */
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
-
-/* Our own htonl()/ntohl() */
-static inline void
-padlock_bswapl(AES_KEY *ks)
-{
- size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
- unsigned int *key = ks->rd_key;
-
- while (i--) {
- asm volatile ("bswapl %0" : "+r"(*key));
- key++;
- }
-}
-#endif
-
-/* The RNG call itself */
-static inline unsigned int
-padlock_xstore(void *addr, unsigned int edx_in)
-{
- unsigned int eax_out;
-
- asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
- : "=a"(eax_out),"=m"(*(unsigned *)addr)
- : "D"(addr), "d" (edx_in)
- );
-
- return eax_out;
-}
-
-/* Why not inline 'rep movsd'? I failed to find information on what
- * value in Direction Flag one can expect and consequently have to
- * apply "better-safe-than-sorry" approach and assume "undefined."
- * I could explicitly clear it and restore the original value upon
- * return from padlock_aes_cipher, but it's presumably too much
- * trouble for too little gain...
- *
- * In case you wonder 'rep xcrypt*' instructions above are *not*
- * affected by the Direction Flag and pointers advance toward
- * larger addresses unconditionally.
- */
-static inline unsigned char *
-padlock_memcpy(void *dst,const void *src,size_t n)
-{
- size_t *d=dst;
- const size_t *s=src;
-
- n /= sizeof(*d);
- do { *d++ = *s++; } while (--n);
-
- return dst;
-}
-
-#elif defined(_MSC_VER)
-/*
- * Unlike GCC these are real functions. In order to minimize impact
- * on performance we adhere to __fastcall calling convention in
- * order to get two first arguments passed through %ecx and %edx.
- * Which kind of suits very well, as instructions in question use
- * both %ecx and %edx as input:-)
- */
-#define REP_XCRYPT(code) \
- _asm _emit 0xf3 \
- _asm _emit 0x0f _asm _emit 0xa7 \
- _asm _emit code
-
-/* BIG FAT WARNING:
- * The offsets used with 'lea' instructions
- * describe items of the 'padlock_cipher_data'
- * structure.
- */
-#define PADLOCK_XCRYPT_ASM(name,code) \
-static void * __fastcall \
- name (size_t cnt, void *cdata, \
- void *outp, const void *inp) \
-{ _asm mov eax,edx \
- _asm lea edx,[eax+16] \
- _asm lea ebx,[eax+32] \
- _asm mov edi,outp \
- _asm mov esi,inp \
- REP_XCRYPT(code) \
-}
-
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
-PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
-
-static int __fastcall
-padlock_xstore(void *outp,unsigned int code)
-{ _asm mov edi,ecx
- _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
-}
-
-static void __fastcall
-padlock_reload_key(void)
-{ _asm pushfd _asm popfd }
-
-static void __fastcall
-padlock_verify_context(void *cdata)
-{ _asm {
- pushfd
- bt DWORD PTR[esp],30
- jnc skip
- cmp ecx,padlock_saved_context
- je skip
- popfd
- sub esp,4
- skip: add esp,4
- mov padlock_saved_context,ecx
- }
-}
-
-static int
-padlock_available(void)
-{ _asm {
- pushfd
- pop eax
- mov ecx,eax
- xor eax,1<<21
- push eax
- popfd
- pushfd
- pop eax
- xor eax,ecx
- bt eax,21
- jnc noluck
- mov eax,0
- cpuid
- xor eax,eax
- cmp ebx,'tneC'
- jne noluck
- cmp edx,'Hrua'
- jne noluck
- cmp ecx,'slua'
- jne noluck
- mov eax,0xC0000000
- cpuid
- mov edx,eax
- xor eax,eax
- cmp edx,0xC0000001
- jb noluck
- mov eax,0xC0000001
- cpuid
- xor eax,eax
- bt edx,6
- jnc skip_a
- bt edx,7
- jnc skip_a
- mov padlock_use_ace,1
- inc eax
- skip_a: bt edx,2
- jnc skip_r
- bt edx,3
- jnc skip_r
- mov padlock_use_rng,1
- inc eax
- skip_r:
- noluck:
- }
-}
-
-static void __fastcall
-padlock_bswapl(void *key)
-{ _asm {
- pushfd
- cld
- mov esi,ecx
- mov edi,ecx
- mov ecx,60
- up: lodsd
- bswap eax
- stosd
- loop up
- popfd
- }
-}
-
-/* MS actually specifies status of Direction Flag and compiler even
- * manages to compile following as 'rep movsd' all by itself...
- */
-#define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
-#endif
-
/* ===== AES encryption/decryption ===== */
#ifndef OPENSSL_NO_AES
/* Function prototypes ... */
static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const unsigned char *iv, int enc);
-static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
- const unsigned char *in, size_t nbytes);
#define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
#define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
NEAREST_ALIGNED(ctx->cipher_data))
+static int
+padlock_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
+ const unsigned char *in_arg, size_t nbytes)
+{
+ return padlock_ecb_encrypt(out_arg,in_arg,
+ ALIGNED_CIPHER_DATA(ctx),nbytes);
+}
+static int
+padlock_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
+ const unsigned char *in_arg, size_t nbytes)
+{
+ struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
+ int ret;
+
+ memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
+ if ((ret = padlock_cbc_encrypt(out_arg,in_arg,cdata,nbytes)))
+ memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
+ return ret;
+}
+
+static int
+padlock_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
+ const unsigned char *in_arg, size_t nbytes)
+{
+ struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
+ size_t chunk;
+
+ if ((chunk = ctx->num)) { /* borrow chunk variable */
+ unsigned char *ivp=ctx->iv;
+
+ if (chunk >= AES_BLOCK_SIZE)
+ return 0; /* bogus value */
+
+ if (ctx->encrypt)
+ while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
+ ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
+ chunk++, nbytes--;
+ }
+ else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
+ unsigned char c = *(in_arg++);
+ *(out_arg++) = c ^ ivp[chunk];
+ ivp[chunk++] = c, nbytes--;
+ }
+
+ ctx->num = chunk%AES_BLOCK_SIZE;
+ }
+
+ if (nbytes == 0)
+ return 1;
+
+ memcpy (cdata->iv, ctx->iv, AES_BLOCK_SIZE);
+
+ if ((chunk = nbytes & ~(AES_BLOCK_SIZE-1))) {
+ if (!padlock_cfb_encrypt(out_arg,in_arg,cdata,chunk))
+ return 0;
+ nbytes -= chunk;
+ }
+
+ if (nbytes) {
+ unsigned char *ivp = cdata->iv;
+
+ out_arg += chunk;
+ in_arg += chunk;
+ ctx->num = nbytes;
+ if (cdata->cword.b.encdec) {
+ cdata->cword.b.encdec=0;
+ padlock_reload_key();
+ padlock_aes_block(ivp,ivp,cdata);
+ cdata->cword.b.encdec=1;
+ padlock_reload_key();
+ while(nbytes) {
+ unsigned char c = *(in_arg++);
+ *(out_arg++) = c ^ *ivp;
+ *(ivp++) = c, nbytes--;
+ }
+ }
+ else { padlock_reload_key();
+ padlock_aes_block(ivp,ivp,cdata);
+ padlock_reload_key();
+ while (nbytes) {
+ *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
+ ivp++, nbytes--;
+ }
+ }
+ }
+
+ memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
+
+ return 1;
+}
+
+static int
+padlock_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
+ const unsigned char *in_arg, size_t nbytes)
+{
+ struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
+ size_t chunk;
+
+ /* ctx->num is maintained in byte-oriented modes,
+ such as CFB and OFB... */
+ if ((chunk = ctx->num)) { /* borrow chunk variable */
+ unsigned char *ivp=ctx->iv;
+
+ if (chunk >= AES_BLOCK_SIZE)
+ return 0; /* bogus value */
+
+ while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
+ *(out_arg++) = *(in_arg++) ^ ivp[chunk];
+ chunk++, nbytes--;
+ }
+
+ ctx->num = chunk%AES_BLOCK_SIZE;
+ }
+
+ if (nbytes == 0)
+ return 1;
+
+ memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
+
+ if ((chunk = nbytes & ~(AES_BLOCK_SIZE-1))) {
+ if (!padlock_ofb_encrypt(out_arg,in_arg,cdata,chunk))
+ return 0;
+ nbytes -= chunk;
+ }
+
+ if (nbytes) {
+ unsigned char *ivp = cdata->iv;
+
+ out_arg += chunk;
+ in_arg += chunk;
+ ctx->num = nbytes;
+ padlock_reload_key(); /* empirically found */
+ padlock_aes_block(ivp,ivp,cdata);
+ padlock_reload_key(); /* empirically found */
+ while (nbytes) {
+ *(out_arg++) = *(in_arg++) ^ *ivp;
+ ivp++, nbytes--;
+ }
+ }
+
+ memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
+
+ return 1;
+}
+
#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_OFB 1
AES_BLOCK_SIZE, \
0 | EVP_CIPH_##umode##_MODE, \
padlock_aes_init_key, \
- padlock_aes_cipher, \
+ padlock_##lmode##_cipher, \
NULL, \
sizeof(struct padlock_cipher_data) + 16, \
EVP_CIPHER_set_asn1_iv, \
and is listed as hardware errata. They most
likely will fix it at some point and then
a check for stepping would be due here. */
- if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
- EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
- enc)
- AES_set_encrypt_key(key, key_len, &cdata->ks);
- else
+ if ((EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_ECB_MODE ||
+ EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CBC_MODE)
+ && !enc)
AES_set_decrypt_key(key, key_len, &cdata->ks);
+ else
+ AES_set_encrypt_key(key, key_len, &cdata->ks);
#ifndef AES_ASM
/* OpenSSL C functions use byte-swapped extended key. */
- padlock_bswapl(&cdata->ks);
+ padlock_key_bswap(&cdata->ks);
#endif
cdata->cword.b.keygen = 1;
break;
return 1;
}
-/*
- * Simplified version of padlock_aes_cipher() used when
- * 1) both input and output buffers are at aligned addresses.
- * or when
- * 2) running on a newer CPU that doesn't require aligned buffers.
- */
-static int
-padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
- const unsigned char *in_arg, size_t nbytes)
-{
- struct padlock_cipher_data *cdata;
- void *iv;
-
- cdata = ALIGNED_CIPHER_DATA(ctx);
- padlock_verify_context(cdata);
-
- switch (EVP_CIPHER_CTX_mode(ctx)) {
- case EVP_CIPH_ECB_MODE:
- padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
- break;
-
- case EVP_CIPH_CBC_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
- memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
- break;
-
- case EVP_CIPH_CFB_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
- memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
- break;
-
- case EVP_CIPH_OFB_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
- memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
- break;
-
- default:
- return 0;
- }
-
- memset(cdata->iv, 0, AES_BLOCK_SIZE);
-
- return 1;
-}
-
-#ifndef PADLOCK_CHUNK
-# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
-#endif
-#if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
-# error "insane PADLOCK_CHUNK..."
-#endif
-
-/* Re-align the arguments to 16-Bytes boundaries and run the
- encryption function itself. This function is not AES-specific. */
-static int
-padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
- const unsigned char *in_arg, size_t nbytes)
-{
- struct padlock_cipher_data *cdata;
- const void *inp;
- unsigned char *out;
- void *iv;
- int inp_misaligned, out_misaligned, realign_in_loop;
- size_t chunk, allocated=0;
-
- /* ctx->num is maintained in byte-oriented modes,
- such as CFB and OFB... */
- if ((chunk = ctx->num)) { /* borrow chunk variable */
- unsigned char *ivp=ctx->iv;
-
- switch (EVP_CIPHER_CTX_mode(ctx)) {
- case EVP_CIPH_CFB_MODE:
- if (chunk >= AES_BLOCK_SIZE)
- return 0; /* bogus value */
-
- if (ctx->encrypt)
- while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
- ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
- chunk++, nbytes--;
- }
- else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
- unsigned char c = *(in_arg++);
- *(out_arg++) = c ^ ivp[chunk];
- ivp[chunk++] = c, nbytes--;
- }
-
- ctx->num = chunk%AES_BLOCK_SIZE;
- break;
- case EVP_CIPH_OFB_MODE:
- if (chunk >= AES_BLOCK_SIZE)
- return 0; /* bogus value */
-
- while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
- *(out_arg++) = *(in_arg++) ^ ivp[chunk];
- chunk++, nbytes--;
- }
-
- ctx->num = chunk%AES_BLOCK_SIZE;
- break;
- }
- }
-
- if (nbytes == 0)
- return 1;
-#if 0
- if (nbytes % AES_BLOCK_SIZE)
- return 0; /* are we expected to do tail processing? */
-#else
- /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
- modes and arbitrary value in byte-oriented modes, such as
- CFB and OFB... */
-#endif
-
- /* VIA promises CPUs that won't require alignment in the future.
- For now padlock_aes_align_required is initialized to 1 and
- the condition is never met... */
- /* C7 core is capable to manage unaligned input in non-ECB[!]
- mode, but performance penalties appear to be approximately
- same as for software alignment below or ~3x. They promise to
- improve it in the future, but for now we can just as well
- pretend that it can only handle aligned input... */
- if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
- return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
-
- inp_misaligned = (((size_t)in_arg) & 0x0F);
- out_misaligned = (((size_t)out_arg) & 0x0F);
-
- /* Note that even if output is aligned and input not,
- * I still prefer to loop instead of copy the whole
- * input and then encrypt in one stroke. This is done
- * in order to improve L1 cache utilization... */
- realign_in_loop = out_misaligned|inp_misaligned;
-
- if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
- return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
-
- /* this takes one "if" out of the loops */
- chunk = nbytes;
- chunk %= PADLOCK_CHUNK;
- if (chunk==0) chunk = PADLOCK_CHUNK;
-
- if (out_misaligned) {
- /* optmize for small input */
- allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
- out = alloca(0x10 + allocated);
- out = NEAREST_ALIGNED(out);
- }
- else
- out = out_arg;
-
- cdata = ALIGNED_CIPHER_DATA(ctx);
- padlock_verify_context(cdata);
-
- switch (EVP_CIPHER_CTX_mode(ctx)) {
- case EVP_CIPH_ECB_MODE:
- do {
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- nbytes -= chunk;
- chunk = PADLOCK_CHUNK;
- } while (nbytes);
- break;
-
- case EVP_CIPH_CBC_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- goto cbc_shortcut;
- do {
- if (iv != cdata->iv)
- memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
- chunk = PADLOCK_CHUNK;
- cbc_shortcut: /* optimize for small input */
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- } while (nbytes -= chunk);
- memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
- break;
-
- case EVP_CIPH_CFB_MODE:
- memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- chunk &= ~(AES_BLOCK_SIZE-1);
- if (chunk) goto cfb_shortcut;
- else goto cfb_skiploop;
- do {
- if (iv != cdata->iv)
- memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
- chunk = PADLOCK_CHUNK;
- cfb_shortcut: /* optimize for small input */
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- nbytes -= chunk;
- } while (nbytes >= AES_BLOCK_SIZE);
-
- cfb_skiploop:
- if (nbytes) {
- unsigned char *ivp = cdata->iv;
-
- if (iv != ivp) {
- memcpy(ivp, iv, AES_BLOCK_SIZE);
- iv = ivp;
- }
- ctx->num = nbytes;
- if (cdata->cword.b.encdec) {
- cdata->cword.b.encdec=0;
- padlock_reload_key();
- padlock_xcrypt_ecb(1,cdata,ivp,ivp);
- cdata->cword.b.encdec=1;
- padlock_reload_key();
- while(nbytes) {
- unsigned char c = *(in_arg++);
- *(out_arg++) = c ^ *ivp;
- *(ivp++) = c, nbytes--;
- }
- }
- else { padlock_reload_key();
- padlock_xcrypt_ecb(1,cdata,ivp,ivp);
- padlock_reload_key();
- while (nbytes) {
- *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
- ivp++, nbytes--;
- }
- }
- }
-
- memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
- break;
-
- case EVP_CIPH_OFB_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- chunk &= ~(AES_BLOCK_SIZE-1);
- if (chunk) do {
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- nbytes -= chunk;
- chunk = PADLOCK_CHUNK;
- } while (nbytes >= AES_BLOCK_SIZE);
-
- if (nbytes) {
- unsigned char *ivp = cdata->iv;
-
- ctx->num = nbytes;
- padlock_reload_key(); /* empirically found */
- padlock_xcrypt_ecb(1,cdata,ivp,ivp);
- padlock_reload_key(); /* empirically found */
- while (nbytes) {
- *(out_arg++) = *(in_arg++) ^ *ivp;
- ivp++, nbytes--;
- }
- }
-
- memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
- break;
-
- default:
- return 0;
- }
-
- /* Clean the realign buffer if it was used */
- if (out_misaligned) {
- volatile unsigned long *p=(void *)out;
- size_t n = allocated/sizeof(*p);
- while (n--) *p++=0;
- }
-
- memset(cdata->iv, 0, AES_BLOCK_SIZE);
-
- return 1;
-}
-
#endif /* OPENSSL_NO_AES */
/* ===== Random Number Generator ===== */