x86[_64]cpuid.pl: harmonize usage of reserved bits #20 and #30.

author Andy Polyakov <appro@openssl.org>

Fri, 27 May 2011 15:32:43 +0000 (15:32 +0000)

committer Andy Polyakov <appro@openssl.org>

Fri, 27 May 2011 15:32:43 +0000 (15:32 +0000)
author Andy Polyakov <appro@openssl.org>
Fri, 27 May 2011 15:32:43 +0000 (15:32 +0000)
committer Andy Polyakov <appro@openssl.org>
Fri, 27 May 2011 15:32:43 +0000 (15:32 +0000)
diff --git a/crypto/rc4/asm/rc4-x86_64.pl b/crypto/rc4/asm/rc4-x86_64.pl

index b08cc256566c05d39f35b0e84f8d7b3d53ea3298..44466ee97a001c31b3086789025b2b98e1c8c9dd 100755 (executable)
--- a/crypto/rc4/asm/rc4-x86_64.pl
+++ b/crypto/rc4/asm/rc4-x86_64.pl
@@ -159,8 +159,8 @@ $code.=<<___;
         movl    ($dat,$XX[0],4),$TX[0]#d
         test    \$-16,$len
         jz      .Lloop1
-       bt      \$30,%r8d       # Intel CPU Family 6
-       jc      .L16x
+       bt      \$30,%r8d       # Intel CPU?
+       jc      .Lintel
         and     \$7,$TX[1]
         lea     1($XX[0]),$XX[1]
         jz      .Loop8
@@ -217,7 +217,7 @@ $code.=<<___;
         jmp     .Lexit
  
  .align 16
-.L16x:
+.Lintel:
         test    \$-32,$len
         jz      .Lloop1
         and     \$15,$TX[1]
@@ -438,10 +438,8 @@ RC4_set_key:
         xor     %r11,%r11
  
         mov     OPENSSL_ia32cap_P(%rip),$idx#d
-       bt      \$20,$idx#d     # Intel CPU
-       jnc     .Lw1stloop
-       bt      \$30,$idx#d     # Intel CPU Family 6
-       jnc     .Lc1stloop
+       bt      \$20,$idx#d     # RC4_CHAR?
+       jc      .Lc1stloop
         jmp     .Lw1stloop
  
  .align 16
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl

index 84b1cbe85f59984d99e5ecda20b0449c057d1729..7b76522bd881f8eac8a53939a07941d19f957185 100644 (file)
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -124,13 +124,14 @@ OPENSSL_ia32_cpuid:
  .Lnocacheinfo:
         mov     \$1,%eax
         cpuid
+       and     \$0xbfefffff,%edx       # force reserved bits to 0
         cmp     \$0,%r9d
         jne     .Lnotintel
-       or      \$0x00100000,%edx       # use reserved 20th bit to engage RC4_CHAR
+       or      \$0x40000000,%edx       # set reserved bit#30 on Intel CPUs
         and     \$15,%ah
         cmp     \$15,%ah                # examine Family ID
-       je      .Lnotintel
-       or      \$0x40000000,%edx       # use reserved bit to skip unrolled loop
+       jne     .Lnotintel
+       or      \$0x00100000,%edx       # set reserved bit#20 to engage RC4_CHAR
  .Lnotintel:
         bt      \$28,%edx               # test hyper-threading bit
         jnc     .Lgeneric
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl

index f424c2debeed80da5038f4ec0740e918322a9a8a..716f44da92a5d75f2d55c77801ab2e311bcdedd8 100644 (file)
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -92,13 +92,15 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
  &set_label("nocacheinfo");
         &mov    ("eax",1);
         &cpuid  ();
+       &and    ("edx",~(1<<20|1<<30)); # force reserved bits to 0
         &cmp    ("ebp",0);
-       &jne    (&label("notP4"));
+       &jne    (&label("notintel"));
+       &or     ("edx",1<<30);          # set reserved bit#30 on Intel CPUs
         &and    (&HB("eax"),15);        # familiy ID
         &cmp    (&HB("eax"),15);        # P4?
-       &jne    (&label("notP4"));
-       &or     ("edx",1<<20);          # use reserved bit to engage RC4_CHAR
-&set_label("notP4");
+       &jne    (&label("notintel"));
+       &or     ("edx",1<<20);          # set reserved bit#20 to engage RC4_CHAR
+&set_label("notintel");
         &bt     ("edx",28);             # test hyper-threading bit
         &jnc    (&label("generic"));
         &and    ("edx",0xefffffff);
diff --git a/doc/crypto/OPENSSL_ia32cap.pod b/doc/crypto/OPENSSL_ia32cap.pod

index af6b4f3a4d1d87fa4a16ef76c84c4cd8abb0b8e9..3f6458c6bd1e2cfbf6b74421de4622ce09117e36 100644 (file)
--- a/doc/crypto/OPENSSL_ia32cap.pod
+++ b/doc/crypto/OPENSSL_ia32cap.pod
@@ -37,14 +37,13 @@ moment of this writing following bits are significant:
  =item bit #28 denoting Hyperthreading, which is used to distiguish
        cores with shared cache;
  
-=item bit #30, reserved by Intel, is used to choose among RC4 code
-      paths;
+=item bit #30, reserved by Intel, denotes specifically Intel CPUs;
  
  =item bit #33 denoting availability of PCLMULQDQ instruction;
  
  =item bit #41 denoting SSSE3, Supplemental SSE3, support;
  
-=item bit #43 denoting AMD XOP support (forced to zero on Intel);
+=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
  
  =item bit #57 denoting AES-NI instruction set extension;
author	Andy Polyakov <appro@openssl.org>
	Fri, 27 May 2011 15:32:43 +0000 (15:32 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Fri, 27 May 2011 15:32:43 +0000 (15:32 +0000)
crypto/rc4/asm/rc4-x86_64.pl		patch \| blob \| history
crypto/x86_64cpuid.pl		patch \| blob \| history
crypto/x86cpuid.pl		patch \| blob \| history
doc/crypto/OPENSSL_ia32cap.pod		patch \| blob \| history