From 1fda639ae756eb87f585b7d5114490d4ffe649a1 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 23 Sep 2012 20:29:03 +0000 Subject: [PATCH] sparcv9cap.c: add SPARC-T4 feature detection. Submitted by: David Miller --- crypto/sparc_arch.h | 35 +++++++++++++++++++ crypto/sparccpuid.S | 31 +++++++++++++++++ crypto/sparcv9cap.c | 83 +++++++++++++++++++++++++++++++-------------- 3 files changed, 123 insertions(+), 26 deletions(-) create mode 100644 crypto/sparc_arch.h diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h new file mode 100644 index 0000000000..f675710307 --- /dev/null +++ b/crypto/sparc_arch.h @@ -0,0 +1,35 @@ +#ifndef __SPARC_ARCH_H__ +#define __SPARC_ARCH_H__ + +#if !__ASSEMBLER__ +extern unsigned int OPENSSL_sparcv9cap_P[]; +#endif + +#define SPARCV9_TICK_PRIVILEGED (1<<0) +#define SPARCV9_PREFER_FPU (1<<1) +#define SPARCV9_VIS1 (1<<2) +#define SPARCV9_VIS2 (1<<3) /* reserved */ +#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#define SPARCV9_BLK (1<<5) /* VIS1 block copy */ +#define SPARCV9_VIS3 (1<<6) +#define SPARCV9_RANDOM (1<<7) + +/* + * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, + * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in + * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... + */ +#define CFR_AES 0x00000001 /* Supports AES opcodes */ +#define CFR_DES 0x00000002 /* Supports DES opcodes */ +#define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */ +#define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes*/ +#define CFR_MD5 0x00000010 /* Supports MD5 opcodes */ +#define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */ +#define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */ +#define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */ +#define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */ +#define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */ +#define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */ +#define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */ + +#endif diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S index ccdd31e041..8baded0d40 100644 --- a/crypto/sparccpuid.S +++ b/crypto/sparccpuid.S @@ -255,6 +255,11 @@ _sparcv9_vis1_probe: ! UltraSPARC IIe 7 ! UltraSPARC III 7 ! UltraSPARC T1 24 +! SPARC T4 65(*) +! +! (*) result has lesser to do with VIS instruction latencies, rdtick +! appears that slow, but it does the trick in sense that FP and +! VIS code paths are still slower than integer-only ones. ! ! Numbers for T2 and SPARC64 V-VII are more than welcomed. ! @@ -264,6 +269,8 @@ _sparcv9_vis1_probe: .global _sparcv9_vis1_instrument .align 8 _sparcv9_vis1_instrument: + .word 0x81b00d80 !fxor %f0,%f0,%f0 + .word 0x85b08d82 !fxor %f2,%f2,%f2 .word 0x91410000 !rd %tick,%o0 .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 @@ -318,6 +325,30 @@ _sparcv9_fmadd_probe: .type _sparcv9_fmadd_probe,#function .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe +.global _sparcv9_rdcfr +.align 8 +_sparcv9_rdcfr: + retl + .word 0x91468000 !rd %asr26,%o0 +.type _sparcv9_rdcfr,#function +.size _sparcv9_rdcfr,.-_sparcv9_rdcfr + +.global _sparcv9_vis3_probe +.align 8 +_sparcv9_vis3_probe: + retl + .word 0x81b022a0 !xmulx %g0,%g0,%g0 +.type _sparcv9_vis3_probe,#function +.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe + +.global _sparcv9_random +.align 8 +_sparcv9_random: + retl + .word 0x91b002a0 !random %o0 +.type _sparcv9_random,#function +.size _sparcv9_random,.-_sparcv9_vis3_probe + .global OPENSSL_cleanse .align 32 OPENSSL_cleanse: diff --git a/crypto/sparcv9cap.c b/crypto/sparcv9cap.c index b961cbe3fa..149eb5232d 100644 --- a/crypto/sparcv9cap.c +++ b/crypto/sparcv9cap.c @@ -6,14 +6,12 @@ #include #include -#define SPARCV9_TICK_PRIVILEGED (1<<0) -#define SPARCV9_PREFER_FPU (1<<1) -#define SPARCV9_VIS1 (1<<2) -#define SPARCV9_VIS2 (1<<3) /* reserved */ -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ -#define SPARCV9_BLK (1<<5) /* VIS1 block copy */ +#include "sparc_arch.h" -static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; +#if defined(__GNUC__) && defined(__linux) +__attribute__((visibility("hidden"))) +#endif +unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0}; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) { @@ -21,7 +19,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); if (num>=8 && !(num&1) && - (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == + (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); else @@ -33,12 +31,15 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); +unsigned long _sparcv9_rdcfr(void); +void _sparcv9_vis3_probe(void); +unsigned long _sparcv9_random(void); size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); size_t _sparcv8_vis1_instrument_bus2(unsigned int *,size_t,size_t); unsigned long OPENSSL_rdtsc(void) { - if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) + if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) return gethrtime(); #else @@ -50,7 +51,7 @@ unsigned long OPENSSL_rdtsc(void) size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) { - if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == + if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == SPARCV9_BLK) return _sparcv9_vis1_instrument_bus(out,cnt); else @@ -59,7 +60,7 @@ size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) { - if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == + if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == SPARCV9_BLK) return _sparcv9_vis1_instrument_bus2(out,cnt,max); else @@ -90,18 +91,18 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name) if (!strcmp (name,"SUNW,UltraSPARC") || !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ { - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0') - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name,"SUNW,UltraSPARC",15)) { - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } @@ -120,7 +121,7 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); + OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); return; } @@ -128,17 +129,17 @@ void OPENSSL_cpuid_setup(void) { if (strcmp(si,"sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; } if (sysinfo(SI_ISALIST,si,sizeof(si))>0) { if (strstr(si,"+vis")) - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; if (strstr(si,"+vis2")) { - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return; } } @@ -198,12 +199,14 @@ void OPENSSL_cpuid_setup(void) if ((e=getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); + OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); + if ((e=strchr(e,':'))) + OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0); return; } /* Initial value, fits UltraSPARC-I&II... */ - OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; sigfillset(&all_masked); sigdelset(&all_masked,SIGILL); @@ -226,27 +229,55 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_rdtick(); - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_vis1_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) - OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); else { _sparcv9_vis2_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; } } if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_fmadd_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; + } + + /* + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, + * because VIS3 defines even integer instructions. + */ + if (sigsetjmp(common_jmp,1) == 0) + { + _sparcv9_vis3_probe(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; + } + + if (sigsetjmp(common_jmp,1) == 0) + { + (void)_sparcv9_random(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; + } + + /* + * In wait for better solution _sparcv9_rdcfr is masked by + * VIS3 flag, because it goes to uninterruptable endless + * loop on UltraSPARC II running Solaris. Things might be + * different on Linux... + */ + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && + sigsetjmp(common_jmp,1) == 0) + { + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); } sigaction(SIGBUS,&bus_oact,NULL); -- 2.40.0