From 0ec55604c0aa0574fbb04bef9e9617d9ea980568 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 28 Jun 2011 13:31:58 +0000 Subject: [PATCH] Expand OPENSSL_ia32cap_P to 64 bits. It might appear controversial, because such operation can be considered as breaking binary compatibility. However! OPNESSL_ia32cap_P is accessed by application through pointer returned by OPENSSL_ia32cap_loc() and such change of *internal* OPENSSL_ia32cap_P declaration is possible specifically on little-endian platforms, such as x86[_64] ones in question. In addition, if 32-bit application calls OPENSSL_ia32cap_loc(), it clears upper half of capability vector maintaining the illusion that it's still 32 bits wide. --- crypto/cryptlib.c | 31 ++++++++++++++++++++++++++----- crypto/cryptlib.h | 2 +- crypto/perlasm/x86gas.pl | 2 +- crypto/perlasm/x86masm.pl | 2 +- crypto/perlasm/x86nasm.pl | 2 +- crypto/whrlpool/wp_block.c | 4 ++-- 6 files changed, 32 insertions(+), 11 deletions(-) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 072b341ca2..8384b5d339 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -665,28 +665,49 @@ const char *CRYPTO_get_lock_name(int type) defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -unsigned long OPENSSL_ia32cap_P=0; -unsigned long *OPENSSL_ia32cap_loc(void) { return &OPENSSL_ia32cap_P; } +unsigned int OPENSSL_ia32cap_P[2]; +unsigned long *OPENSSL_ia32cap_loc(void) +{ if (sizeof(long)==4) + /* + * If 32-bit application pulls address of OPENSSL_ia32cap_P[0] + * clear second element to maintain the illusion that vector + * is 32-bit. + */ + OPENSSL_ia32cap_P[1]=0; + return (unsigned long *)OPENSSL_ia32cap_P; +} #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY) #define OPENSSL_CPUID_SETUP +#if defined(_WIN32) +typedef unsigned __int64 IA32CAP; +#else +typedef unsigned long long IA32CAP; +#endif void OPENSSL_cpuid_setup(void) { static int trigger=0; - unsigned long OPENSSL_ia32_cpuid(void); + IA32CAP OPENSSL_ia32_cpuid(void); + IA32CAP vec; char *env; if (trigger) return; trigger=1; if ((env=getenv("OPENSSL_ia32cap"))) - OPENSSL_ia32cap_P = strtoul(env,NULL,0)|(1<<10); +#if defined(_WIN32) + { if (!sscanf(env,"%I64i",&vec)) vec = strtoul(env,NULL,0); } +#else + vec = strtoull(env,NULL,0); +#endif else - OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid()|(1<<10); + vec = OPENSSL_ia32_cpuid(); /* * |(1<<10) sets a reserved bit to signal that variable * was initialized already... This is to avoid interference * with cpuid snippets in ELF .init segment. */ + OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); + OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); } #endif diff --git a/crypto/cryptlib.h b/crypto/cryptlib.h index fc249c57f3..1761f6b668 100644 --- a/crypto/cryptlib.h +++ b/crypto/cryptlib.h @@ -99,7 +99,7 @@ extern "C" { #define HEX_SIZE(type) (sizeof(type)*2) void OPENSSL_cpuid_setup(void); -extern unsigned long OPENSSL_ia32cap_P; +extern unsigned int OPENSSL_ia32cap_P[]; void OPENSSL_showfatal(const char *,...); void *OPENSSL_stderr(void); extern int OPENSSL_NONPIC_relocated; diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl index 6eab727fd4..b470507730 100644 --- a/crypto/perlasm/x86gas.pl +++ b/crypto/perlasm/x86gas.pl @@ -150,7 +150,7 @@ sub ::public_label sub ::file_end { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,4"; + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; if ($::elf) { push (@out,"$tmp,4\n"); } else { push (@out,"$tmp\n"); } } diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl index 3d50e4a786..03e7ba66ae 100644 --- a/crypto/perlasm/x86masm.pl +++ b/crypto/perlasm/x86masm.pl @@ -129,7 +129,7 @@ ___ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $comm=<<___; .bss SEGMENT 'BSS' -COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD +COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD .bss ENDS ___ # comment out OPENSSL_ia32cap_P declarations diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl index ce2bed9bb2..1a384582bf 100644 --- a/crypto/perlasm/x86nasm.pl +++ b/crypto/perlasm/x86nasm.pl @@ -114,7 +114,7 @@ sub ::file_end { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $comm=<<___; ${drdecor}segment .bss -${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 4 +${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 8 ___ # comment out OPENSSL_ia32cap_P declarations grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; diff --git a/crypto/whrlpool/wp_block.c b/crypto/whrlpool/wp_block.c index 221f6cc59f..824ed1827c 100644 --- a/crypto/whrlpool/wp_block.c +++ b/crypto/whrlpool/wp_block.c @@ -68,9 +68,9 @@ typedef unsigned long long u64; CPUs this is actually faster! */ # endif # define GO_FOR_MMX(ctx,inp,num) do { \ - extern unsigned long OPENSSL_ia32cap_P; \ + extern unsigned int OPENSSL_ia32cap_P[]; \ void whirlpool_block_mmx(void *,const void *,size_t); \ - if (!(OPENSSL_ia32cap_P & (1<<23))) break; \ + if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \ whirlpool_block_mmx(ctx->H.c,inp,num); return; \ } while (0) # endif -- 2.40.0