# Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%)
# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74%
# Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%)
+# Skylake 2.63[+3.5(4.1)] 6.17(6.69) 4.23(4.44) +46%(+51%)
# Bulldozer 5.77[+6.0] 11.72 6.37 +84%
#
# AES-192-CBC
# Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%)
# Ivy Bridge 7.05 11.65 7.12 +64%
# Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%)
+# Skylake 3.62 7.16(7.68) 4.56(4.76) +57%(+61$)
# Bulldozer 8.00 13.95 8.25 +69%
#
# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for
# Sandy Bridge 5.05/6.05/7.05+11.6 13.0 +28%/36%/43%
# Ivy Bridge 5.05/6.05/7.05+10.3 11.6 +32%/41%/50%
# Haswell 4.43/5.29/6.19+7.80 8.79 +39%/49%/59%
+# Skylake 2.62/3.14/3.62+7.70 8.10 +27%/34%/40%
# Bulldozer 5.77/6.89/8.00+13.7 13.7 +42%/50%/58%
#
# (*) there are XOP, AVX1 and AVX2 code pathes, meaning that
# Westmere 3.77/1.25 1.25 1.25 1.26
# * Bridge 5.07/0.74 0.75 0.90 0.85
# Haswell 4.44/0.63 0.63 0.73 0.63
+# Skylake 2.62/0.63 0.63 0.63 0.63
# Silvermont 5.75/3.54 3.56 4.12 3.87(*)
# Bulldozer 5.77/0.70 0.72 0.90 0.70
#
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
# pressure with notable relative improvement, achieving 1.0 cycle per
-# byte processed with 128-bit key on Haswell processor, and 0.74 -
-# on Broadwell. [Mentioned results are raw profiled measurements for
-# favourable packet size, one divisible by 96. Applications using the
-# EVP interface will observe a few percent worse performance.]
+# byte processed with 128-bit key on Haswell processor, 0.74 - on
+# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
+# measurements for favourable packet size, one divisible by 96.
+# Applications using the EVP interface will observe a few percent
+# worse performance.]
#
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
# Ivy Bridge 1.80(+7%)
# Haswell 0.55(+93%) (if system doesn't support AVX)
# Broadwell 0.45(+110%)(if system doesn't support AVX)
+# Skylake 0.44(+110%)(if system doesn't support AVX)
# Bulldozer 1.49(+27%)
# Silvermont 2.88(+13%)
# CPUs such as Sandy and Ivy Bridge can execute it, the code performs
# sub-optimally in comparison to above mentioned version. But thanks
# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
-# it performs in 0.41 cycles per byte on Haswell processor, and in
-# 0.29 on Broadwell.
+# it performs in 0.41 cycles per byte on Haswell processor, in
+# 0.29 on Broadwell, and in 0.36 on Skylake.
#
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
# Core2 6.5 5.8 12.3 7.7 +60%
# Westmere 4.3 5.2 9.5 7.0 +36%
# Sandy Bridge 4.2 5.5 9.7 6.8 +43%
+# Ivy Bridge 4.1 5.2 9.3 6.0 +54%
+# Haswell 4.0 5.0 9.0 5.7 +60%
+# Skylake 6.3(**) 5.0 11.3 5.3 +110%
# Atom 9.3 6.5 15.8 11.1 +42%
# VIA Nano 6.3 5.4 11.7 8.6 +37%
-# Ivy Bridge 4.1 5.2 9.3 6.0 +54%
# Bulldozer 4.5 5.4 9.9 7.7 +29%
#
# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
# is +53%...
+# (**) unidentified anomaly;
my ($rc4,$md5)=(1,1); # what to generate?
my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(),
# Sandy Bridge (8.16 +5.15=13.3)/n 4.99 5.98 +80%
# Ivy Bridge (8.08 +5.14=13.2)/n 4.60 5.54 +68%
# Haswell(iii) (8.96 +5.00=14.0)/n 3.57 4.55 +160%
+# Skylake (8.70 +5.00=13.7)/n 3.64 4.20 +145%
# Bulldozer (9.76 +5.76=15.5)/n 5.95 6.37 +64%
#
# (i) multi-block CBC encrypt with 128-bit key;
# Sandy Bridge 7.70 6.10/+26% 4.99/+54%
# Ivy Bridge 6.06 4.67/+30% 4.60/+32%
# Haswell 5.45 4.15/+31% 3.57/+53%
+# Skylake 5.18 4.06/+28% 3.54/+46%
# Bulldozer 9.11 5.95/+53%
# VIA Nano 9.32 7.15/+30%
# Atom 10.3 9.17/+12%
# Sandy Bridge (20.5 +5.15=25.7)/n 11.6 13.0 +103%
# Ivy Bridge (20.4 +5.14=25.5)/n 10.3 11.6 +82%
# Haswell(iii) (21.0 +5.00=26.0)/n 7.80 8.79 +170%
+# Skylake (18.9 +5.00=23.9)/n 7.70 8.17 +170%
# Bulldozer (21.6 +5.76=27.4)/n 13.6 13.7 +100%
#
# (i) multi-block CBC encrypt with 128-bit key;
# Sandy Bridge 17.4 14.2(+23%) 11.6(+50%(**)) 11.2 8.10(+38%(**))
# Ivy Bridge 12.6 10.5(+20%) 10.3(+22%) 8.17 7.22(+13%)
# Haswell 12.2 9.28(+31%) 7.80(+56%) 7.66 5.40(+42%)
+# Skylake 11.4 9.03(+26%) 7.70(+48%) 7.25 5.20(+40%)
# Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%)
# VIA Nano 23.0 16.5(+39%) - 14.7 -
# Atom 23.0 18.9(+22%) - 14.7 -