From 48b7b96bd0ddca21e7cce8f1f7e904cbc361005a Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 20 May 2013 00:16:18 +0200 Subject: [PATCH] sparcv9cap.c: update from master. --- crypto/sparc_arch.h | 101 +++++++++++++++++++++++++++++++++++ crypto/sparccpuid.S | 127 ++++++++++++++++++++++++++++++++++++++++++++ crypto/sparcv9cap.c | 111 ++++++++++++++++++++++++++++++-------- 3 files changed, 316 insertions(+), 23 deletions(-) create mode 100644 crypto/sparc_arch.h diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h new file mode 100644 index 0000000000..c3843c01b8 --- /dev/null +++ b/crypto/sparc_arch.h @@ -0,0 +1,101 @@ +#ifndef __SPARC_ARCH_H__ +#define __SPARC_ARCH_H__ + +#define SPARCV9_TICK_PRIVILEGED (1<<0) +#define SPARCV9_PREFER_FPU (1<<1) +#define SPARCV9_VIS1 (1<<2) +#define SPARCV9_VIS2 (1<<3) /* reserved */ +#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#define SPARCV9_BLK (1<<5) /* VIS1 block copy */ +#define SPARCV9_VIS3 (1<<6) +#define SPARCV9_RANDOM (1<<7) +#define SPARCV9_64BIT_STACK (1<<8) + +/* + * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, + * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in + * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... + */ +#define CFR_AES 0x00000001 /* Supports AES opcodes */ +#define CFR_DES 0x00000002 /* Supports DES opcodes */ +#define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */ +#define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes*/ +#define CFR_MD5 0x00000010 /* Supports MD5 opcodes */ +#define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */ +#define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */ +#define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */ +#define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */ +#define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */ +#define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */ +#define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */ + +#if defined(OPENSSL_PIC) && !defined(__PIC__) +# define __PIC__ +#endif + +#if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__) +# define __arch64__ +#endif + +#define SPARC_PIC_THUNK(reg) \ + .align 32; \ +.Lpic_thunk: \ + jmp %o7 + 8; \ + add %o7, reg, reg; + +#define SPARC_PIC_THUNK_CALL(reg) \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ + call .Lpic_thunk; \ + or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; + +#if 1 +# define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) +#else +# define SPARC_SETUP_GOT_REG(reg) \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ + call .+8; \ + or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ + add %o7, reg, reg +#endif + +#if defined(__arch64__) + +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + setx SYM, %o7, reg; +# define LDPTR ldx +# define SIZE_T_CC %xcc +# define STACK_FRAME 192 +# define STACK_BIAS 2047 +# define STACK_7thARG (STACK_BIAS+176) + +#else + +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + set SYM, reg; +# define LDPTR ld +# define SIZE_T_CC %icc +# define STACK_FRAME 112 +# define STACK_BIAS 0 +# define STACK_7thARG 92 +# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg) + +#endif + +#ifdef __PIC__ +# undef SPARC_LOAD_ADDRESS +# undef SPARC_LOAD_ADDRESS_LEAF +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + SPARC_SETUP_GOT_REG(reg); \ + sethi %hi(SYM), %o7; \ + or %o7, %lo(SYM), %o7; \ + LDPTR [reg + %o7], reg; +#endif + +#ifndef SPARC_LOAD_ADDRESS_LEAF +# define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ + mov %o7, tmp; \ + SPARC_LOAD_ADDRESS(SYM, reg) \ + mov tmp, %o7; +#endif + +#endif /* __SPARC_ARCH_H__ */ diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S index 0cc247e489..eea2006fba 100644 --- a/crypto/sparccpuid.S +++ b/crypto/sparccpuid.S @@ -251,6 +251,11 @@ _sparcv9_vis1_probe: ! UltraSPARC IIe 7 ! UltraSPARC III 7 ! UltraSPARC T1 24 +! SPARC T4 65(*) +! +! (*) result has lesser to do with VIS instruction latencies, rdtick +! appears that slow, but it does the trick in sense that FP and +! VIS code paths are still slower than integer-only ones. ! ! Numbers for T2 and SPARC64 V-VII are more than welcomed. ! @@ -260,6 +265,8 @@ _sparcv9_vis1_probe: .global _sparcv9_vis1_instrument .align 8 _sparcv9_vis1_instrument: + .word 0x81b00d80 !fxor %f0,%f0,%f0 + .word 0x85b08d82 !fxor %f2,%f2,%f2 .word 0x91410000 !rd %tick,%o0 .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 @@ -314,6 +321,30 @@ _sparcv9_fmadd_probe: .type _sparcv9_fmadd_probe,#function .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe +.global _sparcv9_rdcfr +.align 8 +_sparcv9_rdcfr: + retl + .word 0x91468000 !rd %asr26,%o0 +.type _sparcv9_rdcfr,#function +.size _sparcv9_rdcfr,.-_sparcv9_rdcfr + +.global _sparcv9_vis3_probe +.align 8 +_sparcv9_vis3_probe: + retl + .word 0x81b022a0 !xmulx %g0,%g0,%g0 +.type _sparcv9_vis3_probe,#function +.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe + +.global _sparcv9_random +.align 8 +_sparcv9_random: + retl + .word 0x91b002a0 !random %o0 +.type _sparcv9_random,#function +.size _sparcv9_random,.-_sparcv9_vis3_probe + .global OPENSSL_cleanse .align 32 OPENSSL_cleanse: @@ -397,6 +428,102 @@ OPENSSL_cleanse: .type OPENSSL_cleanse,#function .size OPENSSL_cleanse,.-OPENSSL_cleanse +.global _sparcv9_vis1_instrument_bus +.align 8 +_sparcv9_vis1_instrument_bus: + mov %o1,%o3 ! save cnt + .word 0x99410000 !rd %tick,%o4 ! tick + mov %o4,%o5 ! lasttick = tick + set 0,%g4 ! diff + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + +.Loop: .word 0x99410000 !rd %tick,%o4 + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + subcc %o1,1,%o1 ! --$cnt + bnz .Loop + add %o0,4,%o0 ! ++$out + + retl + mov %o3,%o0 +.type _sparcv9_vis1_instrument_bus,#function +.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus + +.global _sparcv9_vis1_instrument_bus2 +.align 8 +_sparcv9_vis1_instrument_bus2: + mov %o1,%o3 ! save cnt + sll %o1,2,%o1 ! cnt*=4 + + .word 0x99410000 !rd %tick,%o4 ! tick + mov %o4,%o5 ! lasttick = tick + set 0,%g4 ! diff + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + + .word 0x99410000 !rd %tick,%o4 ! tick + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + mov %g4,%g5 ! lastdiff=diff +.Loop2: + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + + subcc %o2,1,%o2 ! --max + bz .Ldone2 + nop + + .word 0x99410000 !rd %tick,%o4 ! tick + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + cmp %g4,%g5 + mov %g4,%g5 ! lastdiff=diff + + .word 0x83408000 !rd %ccr,%g1 + and %g1,4,%g1 ! isolate zero flag + xor %g1,4,%g1 ! flip zero flag + + subcc %o1,%g1,%o1 ! conditional --$cnt + bnz .Loop2 + add %o0,%g1,%o0 ! conditional ++$out + +.Ldone2: + srl %o1,2,%o1 + retl + sub %o3,%o1,%o0 +.type _sparcv9_vis1_instrument_bus2,#function +.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 + .section ".init",#alloc,#execinstr call OPENSSL_cpuid_setup nop diff --git a/crypto/sparcv9cap.c b/crypto/sparcv9cap.c index 43b3ac6f81..70a581992d 100644 --- a/crypto/sparcv9cap.c +++ b/crypto/sparcv9cap.c @@ -4,15 +4,15 @@ #include #include #include +#include #include -#define SPARCV9_TICK_PRIVILEGED (1<<0) -#define SPARCV9_PREFER_FPU (1<<1) -#define SPARCV9_VIS1 (1<<2) -#define SPARCV9_VIS2 (1<<3) /* reserved */ -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#include "sparc_arch.h" -static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; +#if defined(__GNUC__) && defined(__linux) +__attribute__((visibility("hidden"))) +#endif +unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0}; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) { @@ -20,7 +20,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); if (num>=8 && !(num&1) && - (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == + (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); else @@ -32,10 +32,15 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); +unsigned long _sparcv9_rdcfr(void); +void _sparcv9_vis3_probe(void); +unsigned long _sparcv9_random(void); +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); +size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t); unsigned long OPENSSL_rdtsc(void) { - if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) + if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) return gethrtime(); #else @@ -45,6 +50,24 @@ unsigned long OPENSSL_rdtsc(void) return _sparcv9_rdtick(); } +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) + { + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus(out,cnt); + else + return 0; + } + +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) + { + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus2(out,cnt,max); + else + return 0; + } + #if 0 && defined(__sun) && defined(__SVR4) /* This code path is disabled, because of incompatibility of * libdevinfo.so.1 and libmalloc.so.1 (see below for details) @@ -69,18 +92,18 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name) if (!strcmp (name,"SUNW,UltraSPARC") || !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ { - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0') - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name,"SUNW,UltraSPARC",15)) { - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } @@ -99,7 +122,7 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); + OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); return; } @@ -107,17 +130,17 @@ void OPENSSL_cpuid_setup(void) { if (strcmp(si,"sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; } if (sysinfo(SI_ISALIST,si,sizeof(si))>0) { if (strstr(si,"+vis")) - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; if (strstr(si,"+vis2")) { - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return; } } @@ -177,12 +200,14 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); + OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); + if ((e=strchr(e,':'))) + OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0); return; } /* Initial value, fits UltraSPARC-I&II... */ - OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; sigfillset(&all_masked); sigdelset(&all_masked,SIGILL); @@ -205,33 +230,73 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_rdtick(); - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis1_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) - OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); else { _sparcv9_vis2_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; } } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_fmadd_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; + } + + /* + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, + * because VIS3 defines even integer instructions. + */ + if (sigsetjmp(common_jmp,1) == 0) + { + _sparcv9_vis3_probe(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; + } + + if (sigsetjmp(common_jmp,1) == 0) + { + (void)_sparcv9_random(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; + } + + /* + * In wait for better solution _sparcv9_rdcfr is masked by + * VIS3 flag, because it goes to uninterruptable endless + * loop on UltraSPARC II running Solaris. Things might be + * different on Linux... + */ + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && + sigsetjmp(common_jmp,1) == 0) + { + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); } sigaction(SIGBUS,&bus_oact,NULL); sigaction(SIGILL,&ill_oact,NULL); sigprocmask(SIG_SETMASK,&oset,NULL); + + if (sizeof(size_t)==8) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; +#ifdef __linux + else + { + int ret = syscall(340); + + if (ret>=0 && ret&1) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; + } +#endif } #endif -- 2.40.0