[Midnightbsd-cvs] src [12154] trunk/secure/lib/libcrypto/i386: update
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sun Jan 20 00:38:27 EST 2019
Revision: 12154
http://svnweb.midnightbsd.org/src/?rev=12154
Author: laffer1
Date: 2019-01-20 00:38:27 -0500 (Sun, 20 Jan 2019)
Log Message:
-----------
update
Modified Paths:
--------------
trunk/secure/lib/libcrypto/i386/aes-586.S
trunk/secure/lib/libcrypto/i386/aesni-x86.S
trunk/secure/lib/libcrypto/i386/bf-586.S
trunk/secure/lib/libcrypto/i386/bf-686.S
trunk/secure/lib/libcrypto/i386/bn-586.S
trunk/secure/lib/libcrypto/i386/cmll-x86.S
trunk/secure/lib/libcrypto/i386/co-586.S
trunk/secure/lib/libcrypto/i386/crypt586.S
trunk/secure/lib/libcrypto/i386/des-586.S
trunk/secure/lib/libcrypto/i386/ghash-x86.S
trunk/secure/lib/libcrypto/i386/md5-586.S
trunk/secure/lib/libcrypto/i386/rc4-586.S
trunk/secure/lib/libcrypto/i386/rc5-586.S
trunk/secure/lib/libcrypto/i386/rmd-586.S
trunk/secure/lib/libcrypto/i386/sha1-586.S
trunk/secure/lib/libcrypto/i386/sha256-586.S
trunk/secure/lib/libcrypto/i386/sha512-586.S
trunk/secure/lib/libcrypto/i386/vpaes-x86.S
trunk/secure/lib/libcrypto/i386/wp-mmx.S
trunk/secure/lib/libcrypto/i386/x86-gf2m.S
trunk/secure/lib/libcrypto/i386/x86-mont.S
trunk/secure/lib/libcrypto/i386/x86cpuid.S
Modified: trunk/secure/lib/libcrypto/i386/aes-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/aes-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/aes-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/aes-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from aes-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/aes-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from aes-586.pl. */
#ifdef PIC
.file "aes-586.S"
.text
@@ -104,74 +104,78 @@
xorl %ecx,%edx
movl %esi,%ecx
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %ecx,%ebp
andl $454761243,%esi
- movl %ecx,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %ecx,%edi
xorl %esi,%ecx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%ecx
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%ecx
- rorl $16,%ebp
- xorl %ebp,%ecx
- rorl $8,%ebp
- xorl %ebp,%ecx
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %edx,%ebp
andl $454761243,%esi
- movl %edx,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %edx,%edi
xorl %esi,%edx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%edx
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%edx
- rorl $16,%ebp
- xorl %ebp,%edx
- rorl $8,%ebp
- xorl %ebp,%edx
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %eax,%ebp
andl $454761243,%esi
- movl %eax,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %eax,%edi
xorl %esi,%eax
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%eax
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%eax
- rorl $16,%ebp
- xorl %ebp,%eax
- rorl $8,%ebp
- xorl %ebp,%eax
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %ebx,%ebp
andl $454761243,%esi
- movl %ebx,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %ebx,%edi
xorl %esi,%ebx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%ebx
+ xorl %edi,%esi
xorl %esi,%ebx
- rorl $16,%ebp
- xorl %ebp,%ebx
- rorl $8,%ebp
- xorl %ebp,%ebx
movl 20(%esp),%edi
movl 28(%esp),%ebp
addl $16,%edi
@@ -293,74 +297,76 @@
pshufw $13,%mm4,%mm5
movd %mm1,%eax
movd %mm5,%ebx
+ movl %edi,20(%esp)
movzbl %al,%esi
+ movzbl %ah,%edx
+ pshufw $13,%mm0,%mm2
movzbl -128(%ebp,%esi,1),%ecx
- pshufw $13,%mm0,%mm2
- movzbl %ah,%edx
+ movzbl %bl,%edi
movzbl -128(%ebp,%edx,1),%edx
+ shrl $16,%eax
shll $8,%edx
- shrl $16,%eax
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $16,%esi
+ pshufw $8,%mm4,%mm6
orl %esi,%ecx
- pshufw $8,%mm4,%mm6
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $24,%esi
+ shrl $16,%ebx
orl %esi,%edx
- shrl $16,%ebx
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $8,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $24,%esi
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
+ movd %mm2,%eax
movd %ecx,%mm0
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%ecx
- movd %mm2,%eax
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
- shll $16,%esi
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %ah,%edi
+ shll $16,%ecx
+ movd %mm6,%ebx
orl %esi,%ecx
- movd %mm6,%ebx
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
shll $8,%esi
+ shrl $16,%ebx
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
+ shrl $16,%eax
movd %ecx,%mm1
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%ecx
- shrl $16,%ebx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
- shll $16,%esi
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %ah,%edi
+ shll $16,%ecx
+ andl $255,%eax
orl %esi,%ecx
- shrl $16,%eax
punpckldq %mm1,%mm0
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
+ andl $255,%ebx
+ movzbl -128(%ebp,%eax,1),%eax
orl %esi,%ecx
- andl $255,%eax
- movzbl -128(%ebp,%eax,1),%eax
shll $16,%eax
+ movzbl -128(%ebp,%edi,1),%esi
orl %eax,%edx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $8,%esi
+ movzbl -128(%ebp,%ebx,1),%ebx
orl %esi,%ecx
+ orl %ebx,%edx
+ movl 20(%esp),%edi
movd %ecx,%mm4
- andl $255,%ebx
- movzbl -128(%ebp,%ebx,1),%ebx
- orl %ebx,%edx
movd %edx,%mm5
punpckldq %mm5,%mm4
addl $16,%edi
@@ -998,8 +1004,7 @@
call .L004pic_point
.L004pic_point:
popl %ebp
- leal _GLOBAL_OFFSET_TABLE_+[.-.L004pic_point](%ebp),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L004pic_point(%ebp),%eax
leal .LAES_Te-.L004pic_point(%ebp),%ebp
leal 764(%esp),%ebx
subl %ebp,%ebx
@@ -1134,18 +1139,18 @@
movzbl -128(%ebp,%eax,1),%eax
shll $24,%eax
xorl %eax,%edx
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%eax
subl %edi,%esi
andl $4278124286,%eax
andl $454761243,%esi
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%eax
+ movl $2155905152,%edi
+ andl %eax,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%eax,%eax,1),%ebx
subl %edi,%esi
@@ -1152,10 +1157,10 @@
andl $4278124286,%ebx
andl $454761243,%esi
xorl %ecx,%eax
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ebx
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ebp
subl %edi,%esi
@@ -1166,29 +1171,29 @@
xorl %esi,%ebp
xorl %eax,%ecx
xorl %ebp,%eax
- roll $24,%eax
xorl %ebx,%ecx
xorl %ebp,%ebx
+ roll $24,%eax
+ xorl %ebp,%ecx
roll $16,%ebx
- xorl %ebp,%ecx
+ xorl %eax,%ecx
roll $8,%ebp
- xorl %eax,%ecx
xorl %ebx,%ecx
movl 4(%esp),%eax
xorl %ebp,%ecx
movl %ecx,12(%esp)
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebx
subl %edi,%esi
andl $4278124286,%ebx
andl $454761243,%esi
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ebx
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ecx
subl %edi,%esi
@@ -1195,10 +1200,10 @@
andl $4278124286,%ecx
andl $454761243,%esi
xorl %edx,%ebx
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%ebp
subl %edi,%esi
@@ -1209,29 +1214,29 @@
xorl %esi,%ebp
xorl %ebx,%edx
xorl %ebp,%ebx
- roll $24,%ebx
xorl %ecx,%edx
xorl %ebp,%ecx
+ roll $24,%ebx
+ xorl %ebp,%edx
roll $16,%ecx
- xorl %ebp,%edx
+ xorl %ebx,%edx
roll $8,%ebp
- xorl %ebx,%edx
xorl %ecx,%edx
movl 8(%esp),%ebx
xorl %ebp,%edx
movl %edx,16(%esp)
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %eax,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%eax,%eax,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%edx
subl %edi,%esi
@@ -1238,10 +1243,10 @@
andl $4278124286,%edx
andl $454761243,%esi
xorl %eax,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%edx
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebp
subl %edi,%esi
@@ -1252,27 +1257,27 @@
xorl %esi,%ebp
xorl %ecx,%eax
xorl %ebp,%ecx
- roll $24,%ecx
xorl %edx,%eax
xorl %ebp,%edx
+ roll $24,%ecx
+ xorl %ebp,%eax
roll $16,%edx
- xorl %ebp,%eax
+ xorl %ecx,%eax
roll $8,%ebp
- xorl %ecx,%eax
xorl %edx,%eax
xorl %ebp,%eax
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%edx
subl %edi,%esi
@@ -1279,10 +1284,10 @@
andl $4278124286,%edx
andl $454761243,%esi
xorl %ebx,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%edx
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebp
subl %edi,%esi
@@ -1293,13 +1298,13 @@
xorl %esi,%ebp
xorl %ecx,%ebx
xorl %ebp,%ecx
- roll $24,%ecx
xorl %edx,%ebx
xorl %ebp,%edx
+ roll $24,%ecx
+ xorl %ebp,%ebx
roll $16,%edx
- xorl %ebp,%ebx
+ xorl %ecx,%ebx
roll $8,%ebp
- xorl %ecx,%ebx
xorl %edx,%ebx
movl 12(%esp),%ecx
xorl %ebp,%ebx
@@ -1418,77 +1423,79 @@
.align 16
.L007loop:
pshufw $12,%mm0,%mm1
+ pshufw $9,%mm4,%mm5
movd %mm1,%eax
- pshufw $9,%mm4,%mm5
+ movd %mm5,%ebx
+ movl %edi,20(%esp)
movzbl %al,%esi
+ movzbl %ah,%edx
+ pshufw $6,%mm0,%mm2
movzbl -128(%ebp,%esi,1),%ecx
- movd %mm5,%ebx
- movzbl %ah,%edx
+ movzbl %bl,%edi
movzbl -128(%ebp,%edx,1),%edx
+ shrl $16,%eax
shll $8,%edx
- pshufw $6,%mm0,%mm2
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $16,%esi
+ pshufw $3,%mm4,%mm6
orl %esi,%ecx
- shrl $16,%eax
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $24,%esi
+ shrl $16,%ebx
orl %esi,%edx
- shrl $16,%ebx
- pshufw $3,%mm4,%mm6
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $8,%esi
+ movd %mm2,%eax
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
+ shll $16,%esi
+ movd %mm6,%ebx
movd %ecx,%mm0
- movzbl %al,%esi
- movd %mm2,%eax
- movzbl -128(%ebp,%esi,1),%ecx
- shll $16,%ecx
- movzbl %bl,%esi
- movd %mm6,%ebx
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %al,%edi
orl %esi,%ecx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
orl %esi,%edx
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $16,%esi
+ shrl $16,%eax
orl %esi,%edx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
+ shrl $16,%ebx
+ shll $8,%esi
movd %edx,%mm1
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%edx
- shll $8,%edx
- movzbl %bh,%esi
- shrl $16,%eax
- movzbl -128(%ebp,%esi,1),%esi
- shll $24,%esi
+ movzbl -128(%ebp,%edi,1),%edx
+ movzbl %bh,%edi
+ shll $24,%edx
+ andl $255,%ebx
orl %esi,%edx
- shrl $16,%ebx
punpckldq %mm1,%mm0
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $8,%esi
+ movzbl %ah,%eax
+ movzbl -128(%ebp,%ebx,1),%ebx
orl %esi,%ecx
- andl $255,%ebx
- movzbl -128(%ebp,%ebx,1),%ebx
+ movzbl -128(%ebp,%edi,1),%esi
orl %ebx,%edx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $16,%esi
+ movzbl -128(%ebp,%eax,1),%eax
orl %esi,%edx
- movd %edx,%mm4
- movzbl %ah,%eax
- movzbl -128(%ebp,%eax,1),%eax
shll $24,%eax
orl %eax,%ecx
+ movl 20(%esp),%edi
+ movd %edx,%mm4
movd %ecx,%mm5
punpckldq %mm5,%mm4
addl $16,%edi
@@ -2189,8 +2196,7 @@
call .L010pic_point
.L010pic_point:
popl %ebp
- leal _GLOBAL_OFFSET_TABLE_+[.-.L010pic_point](%ebp),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L010pic_point(%ebp),%eax
leal .LAES_Td-.L010pic_point(%ebp),%ebp
leal 764(%esp),%ebx
subl %ebp,%ebx
@@ -2246,8 +2252,7 @@
call .L013pic_point
.L013pic_point:
popl %ebp
- leal _GLOBAL_OFFSET_TABLE_+[.-.L013pic_point](%ebp),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L013pic_point(%ebp),%eax
cmpl $0,40(%esp)
leal .LAES_Te-.L013pic_point(%ebp),%ebp
jne .L014picked_te
@@ -3052,30 +3057,30 @@
.align 4
.L056permute:
addl $16,%edi
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%ebx
subl %ebp,%esi
andl $4278124286,%ebx
andl $454761243,%esi
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ebx
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%ecx
subl %ebp,%esi
andl $4278124286,%ecx
andl $454761243,%esi
xorl %eax,%ebx
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ecx
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edx
xorl %eax,%ecx
subl %ebp,%esi
andl $4278124286,%edx
@@ -3096,30 +3101,30 @@
movl %ebp,%ebx
xorl %edx,%eax
movl %eax,(%edi)
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%ecx
subl %ebp,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ecx
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edx
subl %ebp,%esi
andl $4278124286,%edx
andl $454761243,%esi
xorl %ebx,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%edx
+ movl $2155905152,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%eax
xorl %ebx,%edx
subl %ebp,%esi
andl $4278124286,%eax
@@ -3140,30 +3145,30 @@
movl %ebp,%ecx
xorl %eax,%ebx
movl %ebx,4(%edi)
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edx
subl %ebp,%esi
andl $4278124286,%edx
andl $454761243,%esi
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%edx
+ movl $2155905152,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%eax
subl %ebp,%esi
andl $4278124286,%eax
andl $454761243,%esi
xorl %ecx,%edx
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%eax
+ movl $2155905152,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%ebx
xorl %ecx,%eax
subl %ebp,%esi
andl $4278124286,%ebx
@@ -3184,30 +3189,30 @@
movl %ebp,%edx
xorl %ebx,%ecx
movl %ecx,8(%edi)
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%eax
subl %ebp,%esi
andl $4278124286,%eax
andl $454761243,%esi
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%eax
+ movl $2155905152,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%ebx
subl %ebp,%esi
andl $4278124286,%ebx
andl $454761243,%esi
xorl %edx,%eax
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ebx
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%ecx
xorl %edx,%ebx
subl %ebp,%esi
andl $4278124286,%ecx
@@ -3240,7 +3245,7 @@
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "aes-586.S"
.text
@@ -3344,74 +3349,78 @@
xorl %ecx,%edx
movl %esi,%ecx
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %ecx,%ebp
andl $454761243,%esi
- movl %ecx,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %ecx,%edi
xorl %esi,%ecx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%ecx
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%ecx
- rorl $16,%ebp
- xorl %ebp,%ecx
- rorl $8,%ebp
- xorl %ebp,%ecx
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %edx,%ebp
andl $454761243,%esi
- movl %edx,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %edx,%edi
xorl %esi,%edx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%edx
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%edx
- rorl $16,%ebp
- xorl %ebp,%edx
- rorl $8,%ebp
- xorl %ebp,%edx
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %eax,%ebp
andl $454761243,%esi
- movl %eax,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %eax,%edi
xorl %esi,%eax
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%eax
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%eax
- rorl $16,%ebp
- xorl %ebp,%eax
- rorl $8,%ebp
- xorl %ebp,%eax
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%edi
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%edi
+ andl $4278124286,%edi
subl %ebp,%esi
- andl $4278124286,%edi
+ movl %ebx,%ebp
andl $454761243,%esi
- movl %ebx,%ebp
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %ebx,%edi
xorl %esi,%ebx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%ebx
+ xorl %edi,%esi
xorl %esi,%ebx
- rorl $16,%ebp
- xorl %ebp,%ebx
- rorl $8,%ebp
- xorl %ebp,%ebx
movl 20(%esp),%edi
movl 28(%esp),%ebp
addl $16,%edi
@@ -3533,74 +3542,76 @@
pshufw $13,%mm4,%mm5
movd %mm1,%eax
movd %mm5,%ebx
+ movl %edi,20(%esp)
movzbl %al,%esi
+ movzbl %ah,%edx
+ pshufw $13,%mm0,%mm2
movzbl -128(%ebp,%esi,1),%ecx
- pshufw $13,%mm0,%mm2
- movzbl %ah,%edx
+ movzbl %bl,%edi
movzbl -128(%ebp,%edx,1),%edx
+ shrl $16,%eax
shll $8,%edx
- shrl $16,%eax
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $16,%esi
+ pshufw $8,%mm4,%mm6
orl %esi,%ecx
- pshufw $8,%mm4,%mm6
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $24,%esi
+ shrl $16,%ebx
orl %esi,%edx
- shrl $16,%ebx
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $8,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $24,%esi
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
+ movd %mm2,%eax
movd %ecx,%mm0
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%ecx
- movd %mm2,%eax
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
- shll $16,%esi
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %ah,%edi
+ shll $16,%ecx
+ movd %mm6,%ebx
orl %esi,%ecx
- movd %mm6,%ebx
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
shll $8,%esi
+ shrl $16,%ebx
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
+ shrl $16,%eax
movd %ecx,%mm1
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%ecx
- shrl $16,%ebx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
- shll $16,%esi
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %ah,%edi
+ shll $16,%ecx
+ andl $255,%eax
orl %esi,%ecx
- shrl $16,%eax
punpckldq %mm1,%mm0
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
+ andl $255,%ebx
+ movzbl -128(%ebp,%eax,1),%eax
orl %esi,%ecx
- andl $255,%eax
- movzbl -128(%ebp,%eax,1),%eax
shll $16,%eax
+ movzbl -128(%ebp,%edi,1),%esi
orl %eax,%edx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $8,%esi
+ movzbl -128(%ebp,%ebx,1),%ebx
orl %esi,%ecx
+ orl %ebx,%edx
+ movl 20(%esp),%edi
movd %ecx,%mm4
- andl $255,%ebx
- movzbl -128(%ebp,%ebx,1),%ebx
- orl %ebx,%edx
movd %edx,%mm5
punpckldq %mm5,%mm4
addl $16,%edi
@@ -4373,18 +4384,18 @@
movzbl -128(%ebp,%eax,1),%eax
shll $24,%eax
xorl %eax,%edx
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%eax
subl %edi,%esi
andl $4278124286,%eax
andl $454761243,%esi
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%eax
+ movl $2155905152,%edi
+ andl %eax,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%eax,%eax,1),%ebx
subl %edi,%esi
@@ -4391,10 +4402,10 @@
andl $4278124286,%ebx
andl $454761243,%esi
xorl %ecx,%eax
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ebx
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ebp
subl %edi,%esi
@@ -4405,29 +4416,29 @@
xorl %esi,%ebp
xorl %eax,%ecx
xorl %ebp,%eax
- roll $24,%eax
xorl %ebx,%ecx
xorl %ebp,%ebx
+ roll $24,%eax
+ xorl %ebp,%ecx
roll $16,%ebx
- xorl %ebp,%ecx
+ xorl %eax,%ecx
roll $8,%ebp
- xorl %eax,%ecx
xorl %ebx,%ecx
movl 4(%esp),%eax
xorl %ebp,%ecx
movl %ecx,12(%esp)
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebx
subl %edi,%esi
andl $4278124286,%ebx
andl $454761243,%esi
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ebx
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ecx
subl %edi,%esi
@@ -4434,10 +4445,10 @@
andl $4278124286,%ecx
andl $454761243,%esi
xorl %edx,%ebx
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%ebp
subl %edi,%esi
@@ -4448,29 +4459,29 @@
xorl %esi,%ebp
xorl %ebx,%edx
xorl %ebp,%ebx
- roll $24,%ebx
xorl %ecx,%edx
xorl %ebp,%ecx
+ roll $24,%ebx
+ xorl %ebp,%edx
roll $16,%ecx
- xorl %ebp,%edx
+ xorl %ebx,%edx
roll $8,%ebp
- xorl %ebx,%edx
xorl %ecx,%edx
movl 8(%esp),%ebx
xorl %ebp,%edx
movl %edx,16(%esp)
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %eax,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%eax,%eax,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%edx
subl %edi,%esi
@@ -4477,10 +4488,10 @@
andl $4278124286,%edx
andl $454761243,%esi
xorl %eax,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%edx
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebp
subl %edi,%esi
@@ -4491,27 +4502,27 @@
xorl %esi,%ebp
xorl %ecx,%eax
xorl %ebp,%ecx
- roll $24,%ecx
xorl %edx,%eax
xorl %ebp,%edx
+ roll $24,%ecx
+ xorl %ebp,%eax
roll $16,%edx
- xorl %ebp,%eax
+ xorl %ecx,%eax
roll $8,%ebp
- xorl %ecx,%eax
xorl %edx,%eax
xorl %ebp,%eax
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%edx
subl %edi,%esi
@@ -4518,10 +4529,10 @@
andl $4278124286,%edx
andl $454761243,%esi
xorl %ebx,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%edx
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebp
subl %edi,%esi
@@ -4532,13 +4543,13 @@
xorl %esi,%ebp
xorl %ecx,%ebx
xorl %ebp,%ecx
- roll $24,%ecx
xorl %edx,%ebx
xorl %ebp,%edx
+ roll $24,%ecx
+ xorl %ebp,%ebx
roll $16,%edx
- xorl %ebp,%ebx
+ xorl %ecx,%ebx
roll $8,%ebp
- xorl %ecx,%ebx
xorl %edx,%ebx
movl 12(%esp),%ecx
xorl %ebp,%ebx
@@ -4657,77 +4668,79 @@
.align 16
.L007loop:
pshufw $12,%mm0,%mm1
+ pshufw $9,%mm4,%mm5
movd %mm1,%eax
- pshufw $9,%mm4,%mm5
+ movd %mm5,%ebx
+ movl %edi,20(%esp)
movzbl %al,%esi
+ movzbl %ah,%edx
+ pshufw $6,%mm0,%mm2
movzbl -128(%ebp,%esi,1),%ecx
- movd %mm5,%ebx
- movzbl %ah,%edx
+ movzbl %bl,%edi
movzbl -128(%ebp,%edx,1),%edx
+ shrl $16,%eax
shll $8,%edx
- pshufw $6,%mm0,%mm2
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $16,%esi
+ pshufw $3,%mm4,%mm6
orl %esi,%ecx
- shrl $16,%eax
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $24,%esi
+ shrl $16,%ebx
orl %esi,%edx
- shrl $16,%ebx
- pshufw $3,%mm4,%mm6
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $8,%esi
+ movd %mm2,%eax
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
+ shll $16,%esi
+ movd %mm6,%ebx
movd %ecx,%mm0
- movzbl %al,%esi
- movd %mm2,%eax
- movzbl -128(%ebp,%esi,1),%ecx
- shll $16,%ecx
- movzbl %bl,%esi
- movd %mm6,%ebx
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %al,%edi
orl %esi,%ecx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
orl %esi,%edx
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $16,%esi
+ shrl $16,%eax
orl %esi,%edx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
+ shrl $16,%ebx
+ shll $8,%esi
movd %edx,%mm1
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%edx
- shll $8,%edx
- movzbl %bh,%esi
- shrl $16,%eax
- movzbl -128(%ebp,%esi,1),%esi
- shll $24,%esi
+ movzbl -128(%ebp,%edi,1),%edx
+ movzbl %bh,%edi
+ shll $24,%edx
+ andl $255,%ebx
orl %esi,%edx
- shrl $16,%ebx
punpckldq %mm1,%mm0
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $8,%esi
+ movzbl %ah,%eax
+ movzbl -128(%ebp,%ebx,1),%ebx
orl %esi,%ecx
- andl $255,%ebx
- movzbl -128(%ebp,%ebx,1),%ebx
+ movzbl -128(%ebp,%edi,1),%esi
orl %ebx,%edx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $16,%esi
+ movzbl -128(%ebp,%eax,1),%eax
orl %esi,%edx
- movd %edx,%mm4
- movzbl %ah,%eax
- movzbl -128(%ebp,%eax,1),%eax
shll $24,%eax
orl %eax,%ecx
+ movl 20(%esp),%edi
+ movd %edx,%mm4
movd %ecx,%mm5
punpckldq %mm5,%mm4
addl $16,%edi
@@ -6289,30 +6302,30 @@
.align 4
.L056permute:
addl $16,%edi
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%ebx
subl %ebp,%esi
andl $4278124286,%ebx
andl $454761243,%esi
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ebx
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%ecx
subl %ebp,%esi
andl $4278124286,%ecx
andl $454761243,%esi
xorl %eax,%ebx
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ecx
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edx
xorl %eax,%ecx
subl %ebp,%esi
andl $4278124286,%edx
@@ -6333,30 +6346,30 @@
movl %ebp,%ebx
xorl %edx,%eax
movl %eax,(%edi)
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%ecx
subl %ebp,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ecx
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edx
subl %ebp,%esi
andl $4278124286,%edx
andl $454761243,%esi
xorl %ebx,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%edx
+ movl $2155905152,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%eax
xorl %ebx,%edx
subl %ebp,%esi
andl $4278124286,%eax
@@ -6377,30 +6390,30 @@
movl %ebp,%ecx
xorl %eax,%ebx
movl %ebx,4(%edi)
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
+ leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ecx,%ecx,1),%edx
subl %ebp,%esi
andl $4278124286,%edx
andl $454761243,%esi
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%edx
+ movl $2155905152,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%eax
subl %ebp,%esi
andl $4278124286,%eax
andl $454761243,%esi
xorl %ecx,%edx
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%eax
+ movl $2155905152,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%ebx
xorl %ecx,%eax
subl %ebp,%esi
andl $4278124286,%ebx
@@ -6421,30 +6434,30 @@
movl %ebp,%edx
xorl %ebx,%ecx
movl %ecx,8(%edi)
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
+ movl $2155905152,%ebp
+ andl %edx,%ebp
+ leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%edx,%edx,1),%eax
subl %ebp,%esi
andl $4278124286,%eax
andl $454761243,%esi
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%eax
+ movl $2155905152,%ebp
+ andl %eax,%ebp
+ leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%eax,%eax,1),%ebx
subl %ebp,%esi
andl $4278124286,%ebx
andl $454761243,%esi
xorl %edx,%eax
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%ebp
+ xorl %esi,%ebx
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
+ leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
shrl $7,%ebp
- leal (%ebx,%ebx,1),%ecx
xorl %edx,%ebx
subl %ebp,%esi
andl $4278124286,%ecx
@@ -6477,5 +6490,5 @@
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/aesni-x86.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/aesni-x86.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/aesni-x86.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/aesni-x86.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from aesni-x86.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/aesni-x86.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from aesni-x86.pl. */
#ifdef PIC
.file "aesni-x86.S"
.text
@@ -25,7 +25,10 @@
leal 16(%edx),%edx
jnz .L000enc1_loop_1
.byte 102,15,56,221,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
ret
.size aesni_encrypt,.-.L_aesni_encrypt_begin
.globl aesni_decrypt
@@ -49,32 +52,90 @@
leal 16(%edx),%edx
jnz .L001dec1_loop_2
.byte 102,15,56,223,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
ret
.size aesni_decrypt,.-.L_aesni_decrypt_begin
+.type _aesni_encrypt2, at function
+.align 16
+_aesni_encrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L002enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L002enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ ret
+.size _aesni_encrypt2,.-_aesni_encrypt2
+.type _aesni_decrypt2, at function
+.align 16
+_aesni_decrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L003dec2_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L003dec2_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+ ret
+.size _aesni_decrypt2,.-_aesni_decrypt2
.type _aesni_encrypt3, at function
.align 16
_aesni_encrypt3:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
- movups (%edx),%xmm0
-.L002enc3_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L004enc3_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
- movups (%edx),%xmm0
- jnz .L002enc3_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L004enc3_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -87,25 +148,26 @@
.align 16
_aesni_decrypt3:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
- movups (%edx),%xmm0
-.L003dec3_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L005dec3_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
- movups (%edx),%xmm0
- jnz .L003dec3_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L005dec3_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -119,27 +181,29 @@
_aesni_encrypt4:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
- shrl $1,%ecx
- leal 32(%edx),%edx
+ shll $4,%ecx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
pxor %xmm0,%xmm5
- movups (%edx),%xmm0
-.L004enc4_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L006enc4_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
.byte 102,15,56,220,233
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
.byte 102,15,56,220,232
- movups (%edx),%xmm0
- jnz .L004enc4_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L006enc4_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -155,27 +219,29 @@
_aesni_decrypt4:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
- shrl $1,%ecx
- leal 32(%edx),%edx
+ shll $4,%ecx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
pxor %xmm0,%xmm5
- movups (%edx),%xmm0
-.L005dec4_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L007dec4_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
.byte 102,15,56,222,233
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
.byte 102,15,56,222,232
- movups (%edx),%xmm0
- jnz .L005dec4_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L007dec4_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -190,45 +256,42 @@
.align 16
_aesni_encrypt6:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
.byte 102,15,56,220,209
- pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
.byte 102,15,56,220,217
- pxor %xmm0,%xmm5
- decl %ecx
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
.byte 102,15,56,220,225
- pxor %xmm0,%xmm6
-.byte 102,15,56,220,233
pxor %xmm0,%xmm7
-.byte 102,15,56,220,241
- movups (%edx),%xmm0
-.byte 102,15,56,220,249
- jmp .L_aesni_encrypt6_enter
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L008_aesni_encrypt6_inner
.align 16
-.L006enc6_loop:
+.L009enc6_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
+.L008_aesni_encrypt6_inner:
.byte 102,15,56,220,233
.byte 102,15,56,220,241
.byte 102,15,56,220,249
-.align 16
.L_aesni_encrypt6_enter:
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
.byte 102,15,56,220,232
.byte 102,15,56,220,240
.byte 102,15,56,220,248
- movups (%edx),%xmm0
- jnz .L006enc6_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L009enc6_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -247,45 +310,42 @@
.align 16
_aesni_decrypt6:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
.byte 102,15,56,222,209
- pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
.byte 102,15,56,222,217
- pxor %xmm0,%xmm5
- decl %ecx
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
.byte 102,15,56,222,225
- pxor %xmm0,%xmm6
-.byte 102,15,56,222,233
pxor %xmm0,%xmm7
-.byte 102,15,56,222,241
- movups (%edx),%xmm0
-.byte 102,15,56,222,249
- jmp .L_aesni_decrypt6_enter
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L010_aesni_decrypt6_inner
.align 16
-.L007dec6_loop:
+.L011dec6_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
+.L010_aesni_decrypt6_inner:
.byte 102,15,56,222,233
.byte 102,15,56,222,241
.byte 102,15,56,222,249
-.align 16
.L_aesni_decrypt6_enter:
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
.byte 102,15,56,222,232
.byte 102,15,56,222,240
.byte 102,15,56,222,248
- movups (%edx),%xmm0
- jnz .L007dec6_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L011dec6_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -315,14 +375,14 @@
movl 32(%esp),%edx
movl 36(%esp),%ebx
andl $-16,%eax
- jz .L008ecb_ret
+ jz .L012ecb_ret
movl 240(%edx),%ecx
testl %ebx,%ebx
- jz .L009ecb_decrypt
+ jz .L013ecb_decrypt
movl %edx,%ebp
movl %ecx,%ebx
cmpl $96,%eax
- jb .L010ecb_enc_tail
+ jb .L014ecb_enc_tail
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -331,9 +391,9 @@
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
subl $96,%eax
- jmp .L011ecb_enc_loop6_enter
+ jmp .L015ecb_enc_loop6_enter
.align 16
-.L012ecb_enc_loop6:
+.L016ecb_enc_loop6:
movups %xmm2,(%edi)
movdqu (%esi),%xmm2
movups %xmm3,16(%edi)
@@ -348,12 +408,12 @@
leal 96(%edi),%edi
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
-.L011ecb_enc_loop6_enter:
+.L015ecb_enc_loop6_enter:
call _aesni_encrypt6
movl %ebp,%edx
movl %ebx,%ecx
subl $96,%eax
- jnc .L012ecb_enc_loop6
+ jnc .L016ecb_enc_loop6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
@@ -362,18 +422,18 @@
movups %xmm7,80(%edi)
leal 96(%edi),%edi
addl $96,%eax
- jz .L008ecb_ret
-.L010ecb_enc_tail:
+ jz .L012ecb_ret
+.L014ecb_enc_tail:
movups (%esi),%xmm2
cmpl $32,%eax
- jb .L013ecb_enc_one
+ jb .L017ecb_enc_one
movups 16(%esi),%xmm3
- je .L014ecb_enc_two
+ je .L018ecb_enc_two
movups 32(%esi),%xmm4
cmpl $64,%eax
- jb .L015ecb_enc_three
+ jb .L019ecb_enc_three
movups 48(%esi),%xmm5
- je .L016ecb_enc_four
+ je .L020ecb_enc_four
movups 64(%esi),%xmm6
xorps %xmm7,%xmm7
call _aesni_encrypt6
@@ -382,50 +442,49 @@
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L013ecb_enc_one:
+.L017ecb_enc_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L017enc1_loop_3:
+.L021enc1_loop_3:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L017enc1_loop_3
+ jnz .L021enc1_loop_3
.byte 102,15,56,221,209
movups %xmm2,(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L014ecb_enc_two:
- xorps %xmm4,%xmm4
- call _aesni_encrypt3
+.L018ecb_enc_two:
+ call _aesni_encrypt2
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L015ecb_enc_three:
+.L019ecb_enc_three:
call _aesni_encrypt3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L016ecb_enc_four:
+.L020ecb_enc_four:
call _aesni_encrypt4
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L009ecb_decrypt:
+.L013ecb_decrypt:
movl %edx,%ebp
movl %ecx,%ebx
cmpl $96,%eax
- jb .L018ecb_dec_tail
+ jb .L022ecb_dec_tail
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -434,9 +493,9 @@
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
subl $96,%eax
- jmp .L019ecb_dec_loop6_enter
+ jmp .L023ecb_dec_loop6_enter
.align 16
-.L020ecb_dec_loop6:
+.L024ecb_dec_loop6:
movups %xmm2,(%edi)
movdqu (%esi),%xmm2
movups %xmm3,16(%edi)
@@ -451,12 +510,12 @@
leal 96(%edi),%edi
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
-.L019ecb_dec_loop6_enter:
+.L023ecb_dec_loop6_enter:
call _aesni_decrypt6
movl %ebp,%edx
movl %ebx,%ecx
subl $96,%eax
- jnc .L020ecb_dec_loop6
+ jnc .L024ecb_dec_loop6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
@@ -465,18 +524,18 @@
movups %xmm7,80(%edi)
leal 96(%edi),%edi
addl $96,%eax
- jz .L008ecb_ret
-.L018ecb_dec_tail:
+ jz .L012ecb_ret
+.L022ecb_dec_tail:
movups (%esi),%xmm2
cmpl $32,%eax
- jb .L021ecb_dec_one
+ jb .L025ecb_dec_one
movups 16(%esi),%xmm3
- je .L022ecb_dec_two
+ je .L026ecb_dec_two
movups 32(%esi),%xmm4
cmpl $64,%eax
- jb .L023ecb_dec_three
+ jb .L027ecb_dec_three
movups 48(%esi),%xmm5
- je .L024ecb_dec_four
+ je .L028ecb_dec_four
movups 64(%esi),%xmm6
xorps %xmm7,%xmm7
call _aesni_decrypt6
@@ -485,44 +544,51 @@
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L021ecb_dec_one:
+.L025ecb_dec_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L025dec1_loop_4:
+.L029dec1_loop_4:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L025dec1_loop_4
+ jnz .L029dec1_loop_4
.byte 102,15,56,223,209
movups %xmm2,(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L022ecb_dec_two:
- xorps %xmm4,%xmm4
- call _aesni_decrypt3
+.L026ecb_dec_two:
+ call _aesni_decrypt2
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L023ecb_dec_three:
+.L027ecb_dec_three:
call _aesni_decrypt3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L024ecb_dec_four:
+.L028ecb_dec_four:
call _aesni_decrypt4
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
-.L008ecb_ret:
+.L012ecb_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -561,13 +627,15 @@
movl %ebp,20(%esp)
movl %ebp,24(%esp)
movl %ebp,28(%esp)
- shrl $1,%ecx
+ shll $4,%ecx
+ movl $16,%ebx
leal (%edx),%ebp
movdqa (%esp),%xmm5
movdqa %xmm7,%xmm2
- movl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ subl %ecx,%ebx
.byte 102,15,56,0,253
-.L026ccm64_enc_outer:
+.L030ccm64_enc_outer:
movups (%ebp),%xmm0
movl %ebx,%ecx
movups (%esi),%xmm6
@@ -574,35 +642,41 @@
xorps %xmm0,%xmm2
movups 16(%ebp),%xmm1
xorps %xmm6,%xmm0
- leal 32(%ebp),%edx
xorps %xmm0,%xmm3
- movups (%edx),%xmm0
-.L027ccm64_enc2_loop:
+ movups 32(%ebp),%xmm0
+.L031ccm64_enc2_loop:
.byte 102,15,56,220,209
- decl %ecx
.byte 102,15,56,220,217
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
- leal 32(%edx),%edx
.byte 102,15,56,220,216
- movups (%edx),%xmm0
- jnz .L027ccm64_enc2_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L031ccm64_enc2_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
paddq 16(%esp),%xmm7
+ decl %eax
.byte 102,15,56,221,208
.byte 102,15,56,221,216
- decl %eax
leal 16(%esi),%esi
xorps %xmm2,%xmm6
movdqa %xmm7,%xmm2
movups %xmm6,(%edi)
+.byte 102,15,56,0,213
leal 16(%edi),%edi
-.byte 102,15,56,0,213
- jnz .L026ccm64_enc_outer
+ jnz .L030ccm64_enc_outer
movl 48(%esp),%esp
movl 40(%esp),%edi
movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -650,55 +724,58 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L028enc1_loop_5:
+.L032enc1_loop_5:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L028enc1_loop_5
+ jnz .L032enc1_loop_5
.byte 102,15,56,221,209
+ shll $4,%ebx
+ movl $16,%ecx
movups (%esi),%xmm6
paddq 16(%esp),%xmm7
leal 16(%esi),%esi
- jmp .L029ccm64_dec_outer
+ subl %ebx,%ecx
+ leal 32(%ebp,%ebx,1),%edx
+ movl %ecx,%ebx
+ jmp .L033ccm64_dec_outer
.align 16
-.L029ccm64_dec_outer:
+.L033ccm64_dec_outer:
xorps %xmm2,%xmm6
movdqa %xmm7,%xmm2
- movl %ebx,%ecx
movups %xmm6,(%edi)
leal 16(%edi),%edi
.byte 102,15,56,0,213
subl $1,%eax
- jz .L030ccm64_dec_break
+ jz .L034ccm64_dec_break
movups (%ebp),%xmm0
- shrl $1,%ecx
+ movl %ebx,%ecx
movups 16(%ebp),%xmm1
xorps %xmm0,%xmm6
- leal 32(%ebp),%edx
xorps %xmm0,%xmm2
xorps %xmm6,%xmm3
- movups (%edx),%xmm0
-.L031ccm64_dec2_loop:
+ movups 32(%ebp),%xmm0
+.L035ccm64_dec2_loop:
.byte 102,15,56,220,209
- decl %ecx
.byte 102,15,56,220,217
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
- leal 32(%edx),%edx
.byte 102,15,56,220,216
- movups (%edx),%xmm0
- jnz .L031ccm64_dec2_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L035ccm64_dec2_loop
movups (%esi),%xmm6
paddq 16(%esp),%xmm7
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- leal 16(%esi),%esi
.byte 102,15,56,221,208
.byte 102,15,56,221,216
- jmp .L029ccm64_dec_outer
+ leal 16(%esi),%esi
+ jmp .L033ccm64_dec_outer
.align 16
-.L030ccm64_dec_break:
+.L034ccm64_dec_break:
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movups (%edx),%xmm0
movups 16(%edx),%xmm1
@@ -705,16 +782,24 @@
xorps %xmm0,%xmm6
leal 32(%edx),%edx
xorps %xmm6,%xmm3
-.L032enc1_loop_6:
+.L036enc1_loop_6:
.byte 102,15,56,220,217
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L032enc1_loop_6
+ jnz .L036enc1_loop_6
.byte 102,15,56,221,217
movl 48(%esp),%esp
movl 40(%esp),%edi
movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -740,7 +825,7 @@
andl $-16,%esp
movl %ebp,80(%esp)
cmpl $1,%eax
- je .L033ctr32_one_shortcut
+ je .L037ctr32_one_shortcut
movdqu (%ebx),%xmm7
movl $202182159,(%esp)
movl $134810123,4(%esp)
@@ -756,63 +841,59 @@
.byte 102,15,58,34,253,3
movl 240(%edx),%ecx
bswap %ebx
+ pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
- pxor %xmm0,%xmm0
movdqa (%esp),%xmm2
-.byte 102,15,58,34,203,0
+.byte 102,15,58,34,195,0
leal 3(%ebx),%ebp
-.byte 102,15,58,34,197,0
+.byte 102,15,58,34,205,0
incl %ebx
-.byte 102,15,58,34,203,1
+.byte 102,15,58,34,195,1
incl %ebp
-.byte 102,15,58,34,197,1
+.byte 102,15,58,34,205,1
incl %ebx
-.byte 102,15,58,34,203,2
+.byte 102,15,58,34,195,2
incl %ebp
-.byte 102,15,58,34,197,2
- movdqa %xmm1,48(%esp)
+.byte 102,15,58,34,205,2
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
+ movdqu (%edx),%xmm6
+ movdqa %xmm1,64(%esp)
.byte 102,15,56,0,202
- movdqa %xmm0,64(%esp)
-.byte 102,15,56,0,194
- pshufd $192,%xmm1,%xmm2
- pshufd $128,%xmm1,%xmm3
+ pshufd $192,%xmm0,%xmm2
+ pshufd $128,%xmm0,%xmm3
cmpl $6,%eax
- jb .L034ctr32_tail
+ jb .L038ctr32_tail
+ pxor %xmm6,%xmm7
+ shll $4,%ecx
+ movl $16,%ebx
movdqa %xmm7,32(%esp)
- shrl $1,%ecx
movl %edx,%ebp
- movl %ecx,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
subl $6,%eax
- jmp .L035ctr32_loop6
+ jmp .L039ctr32_loop6
.align 16
-.L035ctr32_loop6:
- pshufd $64,%xmm1,%xmm4
- movdqa 32(%esp),%xmm1
- pshufd $192,%xmm0,%xmm5
- por %xmm1,%xmm2
- pshufd $128,%xmm0,%xmm6
- por %xmm1,%xmm3
- pshufd $64,%xmm0,%xmm7
- por %xmm1,%xmm4
- por %xmm1,%xmm5
- por %xmm1,%xmm6
- por %xmm1,%xmm7
- movups (%ebp),%xmm0
- movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
- decl %ecx
+.L039ctr32_loop6:
+ pshufd $64,%xmm0,%xmm4
+ movdqa 32(%esp),%xmm0
+ pshufd $192,%xmm1,%xmm5
pxor %xmm0,%xmm2
+ pshufd $128,%xmm1,%xmm6
pxor %xmm0,%xmm3
+ pshufd $64,%xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
.byte 102,15,56,220,209
- pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
.byte 102,15,56,220,217
- pxor %xmm0,%xmm5
+ movups 32(%ebp),%xmm0
+ movl %ebx,%ecx
.byte 102,15,56,220,225
- pxor %xmm0,%xmm6
.byte 102,15,56,220,233
- pxor %xmm0,%xmm7
.byte 102,15,56,220,241
- movups (%edx),%xmm0
.byte 102,15,56,220,249
call .L_aesni_encrypt6_enter
movups (%esi),%xmm1
@@ -823,11 +904,11 @@
movups %xmm2,(%edi)
movdqa 16(%esp),%xmm0
xorps %xmm1,%xmm4
- movdqa 48(%esp),%xmm1
+ movdqa 64(%esp),%xmm1
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
paddd %xmm0,%xmm1
- paddd 64(%esp),%xmm0
+ paddd 48(%esp),%xmm0
movdqa (%esp),%xmm2
movups 48(%esi),%xmm3
movups 64(%esi),%xmm4
@@ -834,40 +915,40 @@
xorps %xmm3,%xmm5
movups 80(%esi),%xmm3
leal 96(%esi),%esi
- movdqa %xmm1,48(%esp)
-.byte 102,15,56,0,202
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
xorps %xmm4,%xmm6
movups %xmm5,48(%edi)
xorps %xmm3,%xmm7
- movdqa %xmm0,64(%esp)
-.byte 102,15,56,0,194
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
movups %xmm6,64(%edi)
- pshufd $192,%xmm1,%xmm2
+ pshufd $192,%xmm0,%xmm2
movups %xmm7,80(%edi)
leal 96(%edi),%edi
- movl %ebx,%ecx
- pshufd $128,%xmm1,%xmm3
+ pshufd $128,%xmm0,%xmm3
subl $6,%eax
- jnc .L035ctr32_loop6
+ jnc .L039ctr32_loop6
addl $6,%eax
- jz .L036ctr32_ret
+ jz .L040ctr32_ret
+ movdqu (%ebp),%xmm7
movl %ebp,%edx
- leal 1(,%ecx,2),%ecx
- movdqa 32(%esp),%xmm7
-.L034ctr32_tail:
+ pxor 32(%esp),%xmm7
+ movl 240(%ebp),%ecx
+.L038ctr32_tail:
por %xmm7,%xmm2
cmpl $2,%eax
- jb .L037ctr32_one
- pshufd $64,%xmm1,%xmm4
+ jb .L041ctr32_one
+ pshufd $64,%xmm0,%xmm4
por %xmm7,%xmm3
- je .L038ctr32_two
- pshufd $192,%xmm0,%xmm5
+ je .L042ctr32_two
+ pshufd $192,%xmm1,%xmm5
por %xmm7,%xmm4
cmpl $4,%eax
- jb .L039ctr32_three
- pshufd $128,%xmm0,%xmm6
+ jb .L043ctr32_three
+ pshufd $128,%xmm1,%xmm6
por %xmm7,%xmm5
- je .L040ctr32_four
+ je .L044ctr32_four
por %xmm7,%xmm6
call _aesni_encrypt6
movups (%esi),%xmm1
@@ -885,30 +966,30 @@
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L033ctr32_one_shortcut:
+.L037ctr32_one_shortcut:
movups (%ebx),%xmm2
movl 240(%edx),%ecx
-.L037ctr32_one:
+.L041ctr32_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L041enc1_loop_7:
+.L045enc1_loop_7:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L041enc1_loop_7
+ jnz .L045enc1_loop_7
.byte 102,15,56,221,209
movups (%esi),%xmm6
xorps %xmm2,%xmm6
movups %xmm6,(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L038ctr32_two:
- call _aesni_encrypt3
+.L042ctr32_two:
+ call _aesni_encrypt2
movups (%esi),%xmm5
movups 16(%esi),%xmm6
xorps %xmm5,%xmm2
@@ -915,9 +996,9 @@
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L039ctr32_three:
+.L043ctr32_three:
call _aesni_encrypt3
movups (%esi),%xmm5
movups 16(%esi),%xmm6
@@ -928,9 +1009,9 @@
xorps %xmm7,%xmm4
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L040ctr32_four:
+.L044ctr32_four:
call _aesni_encrypt4
movups (%esi),%xmm6
movups 16(%esi),%xmm7
@@ -944,7 +1025,18 @@
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
-.L036ctr32_ret:
+.L040ctr32_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
movl 80(%esp),%esp
popl %edi
popl %esi
@@ -969,12 +1061,12 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L042enc1_loop_8:
+.L046enc1_loop_8:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L042enc1_loop_8
+ jnz .L046enc1_loop_8
.byte 102,15,56,221,209
movl 20(%esp),%esi
movl 24(%esp),%edi
@@ -998,12 +1090,14 @@
movl %edx,%ebp
movl %ecx,%ebx
subl $96,%eax
- jc .L043xts_enc_short
- shrl $1,%ecx
- movl %ecx,%ebx
- jmp .L044xts_enc_loop6
+ jc .L047xts_enc_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L048xts_enc_loop6
.align 16
-.L044xts_enc_loop6:
+.L048xts_enc_loop6:
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,(%esp)
@@ -1039,6 +1133,7 @@
pand %xmm3,%xmm7
movups (%esi),%xmm2
pxor %xmm1,%xmm7
+ movl %ebx,%ecx
movdqu 16(%esi),%xmm3
xorps %xmm0,%xmm2
movdqu 32(%esi),%xmm4
@@ -1054,19 +1149,17 @@
movdqa %xmm7,80(%esp)
pxor %xmm1,%xmm7
movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
.byte 102,15,56,220,209
- pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
.byte 102,15,56,220,217
- pxor 48(%esp),%xmm5
- decl %ecx
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
.byte 102,15,56,220,225
- pxor 64(%esp),%xmm6
.byte 102,15,56,220,233
- pxor %xmm0,%xmm7
.byte 102,15,56,220,241
- movups (%edx),%xmm0
.byte 102,15,56,220,249
call .L_aesni_encrypt6_enter
movdqa 80(%esp),%xmm1
@@ -1091,19 +1184,18 @@
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
- movl %ebx,%ecx
pxor %xmm2,%xmm1
subl $96,%eax
- jnc .L044xts_enc_loop6
- leal 1(,%ecx,2),%ecx
+ jnc .L048xts_enc_loop6
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movl %ecx,%ebx
-.L043xts_enc_short:
+.L047xts_enc_short:
addl $96,%eax
- jz .L045xts_enc_done6x
+ jz .L049xts_enc_done6x
movdqa %xmm1,%xmm5
cmpl $32,%eax
- jb .L046xts_enc_one
+ jb .L050xts_enc_one
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
paddq %xmm1,%xmm1
@@ -1110,7 +1202,7 @@
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
- je .L047xts_enc_two
+ je .L051xts_enc_two
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm6
@@ -1119,7 +1211,7 @@
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
cmpl $64,%eax
- jb .L048xts_enc_three
+ jb .L052xts_enc_three
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm7
@@ -1129,7 +1221,7 @@
pxor %xmm2,%xmm1
movdqa %xmm5,(%esp)
movdqa %xmm6,16(%esp)
- je .L049xts_enc_four
+ je .L053xts_enc_four
movdqa %xmm7,32(%esp)
pshufd $19,%xmm0,%xmm7
movdqa %xmm1,48(%esp)
@@ -1161,9 +1253,9 @@
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
leal 80(%edi),%edi
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L046xts_enc_one:
+.L050xts_enc_one:
movups (%esi),%xmm2
leal 16(%esi),%esi
xorps %xmm5,%xmm2
@@ -1171,20 +1263,20 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L051enc1_loop_9:
+.L055enc1_loop_9:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L051enc1_loop_9
+ jnz .L055enc1_loop_9
.byte 102,15,56,221,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
leal 16(%edi),%edi
movdqa %xmm5,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L047xts_enc_two:
+.L051xts_enc_two:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1191,8 +1283,7 @@
leal 32(%esi),%esi
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
- xorps %xmm4,%xmm4
- call _aesni_encrypt3
+ call _aesni_encrypt2
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
@@ -1199,9 +1290,9 @@
movups %xmm3,16(%edi)
leal 32(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L048xts_enc_three:
+.L052xts_enc_three:
movaps %xmm1,%xmm7
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1219,9 +1310,9 @@
movups %xmm4,32(%edi)
leal 48(%edi),%edi
movdqa %xmm7,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L049xts_enc_four:
+.L053xts_enc_four:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1243,21 +1334,21 @@
movups %xmm5,48(%edi)
leal 64(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L045xts_enc_done6x:
+.L049xts_enc_done6x:
movl 112(%esp),%eax
andl $15,%eax
- jz .L052xts_enc_ret
+ jz .L056xts_enc_ret
movdqa %xmm1,%xmm5
movl %eax,112(%esp)
- jmp .L053xts_enc_steal
+ jmp .L057xts_enc_steal
.align 16
-.L050xts_enc_done:
+.L054xts_enc_done:
movl 112(%esp),%eax
pxor %xmm0,%xmm0
andl $15,%eax
- jz .L052xts_enc_ret
+ jz .L056xts_enc_ret
pcmpgtd %xmm1,%xmm0
movl %eax,112(%esp)
pshufd $19,%xmm0,%xmm5
@@ -1264,7 +1355,7 @@
paddq %xmm1,%xmm1
pand 96(%esp),%xmm5
pxor %xmm1,%xmm5
-.L053xts_enc_steal:
+.L057xts_enc_steal:
movzbl (%esi),%ecx
movzbl -16(%edi),%edx
leal 1(%esi),%esi
@@ -1272,7 +1363,7 @@
movb %dl,(%edi)
leal 1(%edi),%edi
subl $1,%eax
- jnz .L053xts_enc_steal
+ jnz .L057xts_enc_steal
subl 112(%esp),%edi
movl %ebp,%edx
movl %ebx,%ecx
@@ -1282,16 +1373,30 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L054enc1_loop_10:
+.L058enc1_loop_10:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L054enc1_loop_10
+ jnz .L058enc1_loop_10
.byte 102,15,56,221,209
xorps %xmm5,%xmm2
movups %xmm2,-16(%edi)
-.L052xts_enc_ret:
+.L056xts_enc_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
movl 116(%esp),%esp
popl %edi
popl %esi
@@ -1316,12 +1421,12 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L055enc1_loop_11:
+.L059enc1_loop_11:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L055enc1_loop_11
+ jnz .L059enc1_loop_11
.byte 102,15,56,221,209
movl 20(%esp),%esi
movl 24(%esp),%edi
@@ -1350,12 +1455,14 @@
pcmpgtd %xmm1,%xmm0
andl $-16,%eax
subl $96,%eax
- jc .L056xts_dec_short
- shrl $1,%ecx
- movl %ecx,%ebx
- jmp .L057xts_dec_loop6
+ jc .L060xts_dec_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L061xts_dec_loop6
.align 16
-.L057xts_dec_loop6:
+.L061xts_dec_loop6:
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,(%esp)
@@ -1391,6 +1498,7 @@
pand %xmm3,%xmm7
movups (%esi),%xmm2
pxor %xmm1,%xmm7
+ movl %ebx,%ecx
movdqu 16(%esi),%xmm3
xorps %xmm0,%xmm2
movdqu 32(%esi),%xmm4
@@ -1406,19 +1514,17 @@
movdqa %xmm7,80(%esp)
pxor %xmm1,%xmm7
movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
.byte 102,15,56,222,209
- pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
.byte 102,15,56,222,217
- pxor 48(%esp),%xmm5
- decl %ecx
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
.byte 102,15,56,222,225
- pxor 64(%esp),%xmm6
.byte 102,15,56,222,233
- pxor %xmm0,%xmm7
.byte 102,15,56,222,241
- movups (%edx),%xmm0
.byte 102,15,56,222,249
call .L_aesni_decrypt6_enter
movdqa 80(%esp),%xmm1
@@ -1443,19 +1549,18 @@
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
- movl %ebx,%ecx
pxor %xmm2,%xmm1
subl $96,%eax
- jnc .L057xts_dec_loop6
- leal 1(,%ecx,2),%ecx
+ jnc .L061xts_dec_loop6
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movl %ecx,%ebx
-.L056xts_dec_short:
+.L060xts_dec_short:
addl $96,%eax
- jz .L058xts_dec_done6x
+ jz .L062xts_dec_done6x
movdqa %xmm1,%xmm5
cmpl $32,%eax
- jb .L059xts_dec_one
+ jb .L063xts_dec_one
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
paddq %xmm1,%xmm1
@@ -1462,7 +1567,7 @@
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
- je .L060xts_dec_two
+ je .L064xts_dec_two
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm6
@@ -1471,7 +1576,7 @@
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
cmpl $64,%eax
- jb .L061xts_dec_three
+ jb .L065xts_dec_three
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm7
@@ -1481,7 +1586,7 @@
pxor %xmm2,%xmm1
movdqa %xmm5,(%esp)
movdqa %xmm6,16(%esp)
- je .L062xts_dec_four
+ je .L066xts_dec_four
movdqa %xmm7,32(%esp)
pshufd $19,%xmm0,%xmm7
movdqa %xmm1,48(%esp)
@@ -1513,9 +1618,9 @@
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
leal 80(%edi),%edi
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L059xts_dec_one:
+.L063xts_dec_one:
movups (%esi),%xmm2
leal 16(%esi),%esi
xorps %xmm5,%xmm2
@@ -1523,20 +1628,20 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L064dec1_loop_12:
+.L068dec1_loop_12:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L064dec1_loop_12
+ jnz .L068dec1_loop_12
.byte 102,15,56,223,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
leal 16(%edi),%edi
movdqa %xmm5,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L060xts_dec_two:
+.L064xts_dec_two:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1543,7 +1648,7 @@
leal 32(%esi),%esi
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
- call _aesni_decrypt3
+ call _aesni_decrypt2
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
@@ -1550,9 +1655,9 @@
movups %xmm3,16(%edi)
leal 32(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L061xts_dec_three:
+.L065xts_dec_three:
movaps %xmm1,%xmm7
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1570,9 +1675,9 @@
movups %xmm4,32(%edi)
leal 48(%edi),%edi
movdqa %xmm7,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L062xts_dec_four:
+.L066xts_dec_four:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1594,20 +1699,20 @@
movups %xmm5,48(%edi)
leal 64(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L058xts_dec_done6x:
+.L062xts_dec_done6x:
movl 112(%esp),%eax
andl $15,%eax
- jz .L065xts_dec_ret
+ jz .L069xts_dec_ret
movl %eax,112(%esp)
- jmp .L066xts_dec_only_one_more
+ jmp .L070xts_dec_only_one_more
.align 16
-.L063xts_dec_done:
+.L067xts_dec_done:
movl 112(%esp),%eax
pxor %xmm0,%xmm0
andl $15,%eax
- jz .L065xts_dec_ret
+ jz .L069xts_dec_ret
pcmpgtd %xmm1,%xmm0
movl %eax,112(%esp)
pshufd $19,%xmm0,%xmm2
@@ -1617,7 +1722,7 @@
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
-.L066xts_dec_only_one_more:
+.L070xts_dec_only_one_more:
pshufd $19,%xmm0,%xmm5
movdqa %xmm1,%xmm6
paddq %xmm1,%xmm1
@@ -1631,16 +1736,16 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L067dec1_loop_13:
+.L071dec1_loop_13:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L067dec1_loop_13
+ jnz .L071dec1_loop_13
.byte 102,15,56,223,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
-.L068xts_dec_steal:
+.L072xts_dec_steal:
movzbl 16(%esi),%ecx
movzbl (%edi),%edx
leal 1(%esi),%esi
@@ -1648,7 +1753,7 @@
movb %dl,16(%edi)
leal 1(%edi),%edi
subl $1,%eax
- jnz .L068xts_dec_steal
+ jnz .L072xts_dec_steal
subl 112(%esp),%edi
movl %ebp,%edx
movl %ebx,%ecx
@@ -1658,16 +1763,30 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L069dec1_loop_14:
+.L073dec1_loop_14:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L069dec1_loop_14
+ jnz .L073dec1_loop_14
.byte 102,15,56,223,209
xorps %xmm6,%xmm2
movups %xmm2,(%edi)
-.L065xts_dec_ret:
+.L069xts_dec_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
movl 116(%esp),%esp
popl %edi
popl %esi
@@ -1693,7 +1812,7 @@
movl 32(%esp),%edx
movl 36(%esp),%ebp
testl %eax,%eax
- jz .L070cbc_abort
+ jz .L074cbc_abort
cmpl $0,40(%esp)
xchgl %esp,%ebx
movups (%ebp),%xmm7
@@ -1701,14 +1820,14 @@
movl %edx,%ebp
movl %ebx,16(%esp)
movl %ecx,%ebx
- je .L071cbc_decrypt
+ je .L075cbc_decrypt
movaps %xmm7,%xmm2
cmpl $16,%eax
- jb .L072cbc_enc_tail
+ jb .L076cbc_enc_tail
subl $16,%eax
- jmp .L073cbc_enc_loop
+ jmp .L077cbc_enc_loop
.align 16
-.L073cbc_enc_loop:
+.L077cbc_enc_loop:
movups (%esi),%xmm7
leal 16(%esi),%esi
movups (%edx),%xmm0
@@ -1716,12 +1835,12 @@
xorps %xmm0,%xmm7
leal 32(%edx),%edx
xorps %xmm7,%xmm2
-.L074enc1_loop_15:
+.L078enc1_loop_15:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L074enc1_loop_15
+ jnz .L078enc1_loop_15
.byte 102,15,56,221,209
movl %ebx,%ecx
movl %ebp,%edx
@@ -1728,12 +1847,13 @@
movups %xmm2,(%edi)
leal 16(%edi),%edi
subl $16,%eax
- jnc .L073cbc_enc_loop
+ jnc .L077cbc_enc_loop
addl $16,%eax
- jnz .L072cbc_enc_tail
+ jnz .L076cbc_enc_tail
movaps %xmm2,%xmm7
- jmp .L075cbc_ret
-.L072cbc_enc_tail:
+ pxor %xmm2,%xmm2
+ jmp .L079cbc_ret
+.L076cbc_enc_tail:
movl %eax,%ecx
.long 2767451785
movl $16,%ecx
@@ -1744,20 +1864,20 @@
movl %ebx,%ecx
movl %edi,%esi
movl %ebp,%edx
- jmp .L073cbc_enc_loop
+ jmp .L077cbc_enc_loop
.align 16
-.L071cbc_decrypt:
+.L075cbc_decrypt:
cmpl $80,%eax
- jbe .L076cbc_dec_tail
+ jbe .L080cbc_dec_tail
movaps %xmm7,(%esp)
subl $80,%eax
- jmp .L077cbc_dec_loop6_enter
+ jmp .L081cbc_dec_loop6_enter
.align 16
-.L078cbc_dec_loop6:
+.L082cbc_dec_loop6:
movaps %xmm0,(%esp)
movups %xmm7,(%edi)
leal 16(%edi),%edi
-.L077cbc_dec_loop6_enter:
+.L081cbc_dec_loop6_enter:
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -1787,28 +1907,28 @@
movups %xmm6,64(%edi)
leal 80(%edi),%edi
subl $96,%eax
- ja .L078cbc_dec_loop6
+ ja .L082cbc_dec_loop6
movaps %xmm7,%xmm2
movaps %xmm0,%xmm7
addl $80,%eax
- jle .L079cbc_dec_tail_collected
+ jle .L083cbc_dec_clear_tail_collected
movups %xmm2,(%edi)
leal 16(%edi),%edi
-.L076cbc_dec_tail:
+.L080cbc_dec_tail:
movups (%esi),%xmm2
movaps %xmm2,%xmm6
cmpl $16,%eax
- jbe .L080cbc_dec_one
+ jbe .L084cbc_dec_one
movups 16(%esi),%xmm3
movaps %xmm3,%xmm5
cmpl $32,%eax
- jbe .L081cbc_dec_two
+ jbe .L085cbc_dec_two
movups 32(%esi),%xmm4
cmpl $48,%eax
- jbe .L082cbc_dec_three
+ jbe .L086cbc_dec_three
movups 48(%esi),%xmm5
cmpl $64,%eax
- jbe .L083cbc_dec_four
+ jbe .L087cbc_dec_four
movups 64(%esi),%xmm6
movaps %xmm7,(%esp)
movups (%esi),%xmm2
@@ -1826,43 +1946,47 @@
xorps %xmm0,%xmm6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
movups %xmm5,48(%edi)
+ pxor %xmm5,%xmm5
leal 64(%edi),%edi
movaps %xmm6,%xmm2
+ pxor %xmm6,%xmm6
subl $80,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L080cbc_dec_one:
+.L084cbc_dec_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L084dec1_loop_16:
+.L089dec1_loop_16:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L084dec1_loop_16
+ jnz .L089dec1_loop_16
.byte 102,15,56,223,209
xorps %xmm7,%xmm2
movaps %xmm6,%xmm7
subl $16,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L081cbc_dec_two:
- xorps %xmm4,%xmm4
- call _aesni_decrypt3
+.L085cbc_dec_two:
+ call _aesni_decrypt2
xorps %xmm7,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movaps %xmm3,%xmm2
+ pxor %xmm3,%xmm3
leal 16(%edi),%edi
movaps %xmm5,%xmm7
subl $32,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L082cbc_dec_three:
+.L086cbc_dec_three:
call _aesni_decrypt3
xorps %xmm7,%xmm2
xorps %xmm6,%xmm3
@@ -1869,13 +1993,15 @@
xorps %xmm5,%xmm4
movups %xmm2,(%edi)
movaps %xmm4,%xmm2
+ pxor %xmm4,%xmm4
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
leal 32(%edi),%edi
movups 32(%esi),%xmm7
subl $48,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L083cbc_dec_four:
+.L087cbc_dec_four:
call _aesni_decrypt4
movups 16(%esi),%xmm1
movups 32(%esi),%xmm0
@@ -1885,28 +2011,44 @@
movups %xmm2,(%edi)
xorps %xmm1,%xmm4
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
leal 48(%edi),%edi
movaps %xmm5,%xmm2
+ pxor %xmm5,%xmm5
subl $64,%eax
-.L079cbc_dec_tail_collected:
+ jmp .L088cbc_dec_tail_collected
+.align 16
+.L083cbc_dec_clear_tail_collected:
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+.L088cbc_dec_tail_collected:
andl $15,%eax
- jnz .L085cbc_dec_tail_partial
+ jnz .L090cbc_dec_tail_partial
movups %xmm2,(%edi)
- jmp .L075cbc_ret
+ pxor %xmm0,%xmm0
+ jmp .L079cbc_ret
.align 16
-.L085cbc_dec_tail_partial:
+.L090cbc_dec_tail_partial:
movaps %xmm2,(%esp)
+ pxor %xmm0,%xmm0
movl $16,%ecx
movl %esp,%esi
subl %eax,%ecx
.long 2767451785
-.L075cbc_ret:
+ movdqa %xmm2,(%esp)
+.L079cbc_ret:
movl 16(%esp),%esp
movl 36(%esp),%ebp
+ pxor %xmm2,%xmm2
+ pxor %xmm1,%xmm1
movups %xmm7,(%ebp)
-.L070cbc_abort:
+ pxor %xmm7,%xmm7
+.L074cbc_abort:
popl %edi
popl %esi
popl %ebx
@@ -1916,52 +2058,62 @@
.type _aesni_set_encrypt_key, at function
.align 16
_aesni_set_encrypt_key:
+ pushl %ebp
+ pushl %ebx
testl %eax,%eax
- jz .L086bad_pointer
+ jz .L091bad_pointer
testl %edx,%edx
- jz .L086bad_pointer
+ jz .L091bad_pointer
+ call .L092pic
+.L092pic:
+ popl %ebx
+ leal .Lkey_const-.L092pic(%ebx),%ebx
+ leal OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
movups (%eax),%xmm0
xorps %xmm4,%xmm4
+ movl 4(%ebp),%ebp
leal 16(%edx),%edx
+ andl $268437504,%ebp
cmpl $256,%ecx
- je .L08714rounds
+ je .L09314rounds
cmpl $192,%ecx
- je .L08812rounds
+ je .L09412rounds
cmpl $128,%ecx
- jne .L089bad_keybits
+ jne .L095bad_keybits
.align 16
-.L09010rounds:
+.L09610rounds:
+ cmpl $268435456,%ebp
+ je .L09710rounds_alt
movl $9,%ecx
movups %xmm0,-16(%edx)
.byte 102,15,58,223,200,1
- call .L091key_128_cold
+ call .L098key_128_cold
.byte 102,15,58,223,200,2
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,4
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,8
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,16
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,32
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,64
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,128
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,27
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,54
- call .L092key_128
+ call .L099key_128
movups %xmm0,(%edx)
movl %ecx,80(%edx)
- xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L092key_128:
+.L099key_128:
movups %xmm0,(%edx)
leal 16(%edx),%edx
-.L091key_128_cold:
+.L098key_128_cold:
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $140,%xmm0,%xmm4
@@ -1970,38 +2122,91 @@
xorps %xmm1,%xmm0
ret
.align 16
-.L08812rounds:
+.L09710rounds_alt:
+ movdqa (%ebx),%xmm5
+ movl $8,%ecx
+ movdqa 32(%ebx),%xmm4
+ movdqa %xmm0,%xmm2
+ movdqu %xmm0,-16(%edx)
+.L101loop_key128:
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ leal 16(%edx),%edx
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,-16(%edx)
+ movdqa %xmm0,%xmm2
+ decl %ecx
+ jnz .L101loop_key128
+ movdqa 48(%ebx),%xmm4
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ movdqa %xmm0,%xmm2
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,16(%edx)
+ movl $9,%ecx
+ movl %ecx,96(%edx)
+ jmp .L100good_key
+.align 16
+.L09412rounds:
movq 16(%eax),%xmm2
+ cmpl $268435456,%ebp
+ je .L10212rounds_alt
movl $11,%ecx
movups %xmm0,-16(%edx)
.byte 102,15,58,223,202,1
- call .L093key_192a_cold
+ call .L103key_192a_cold
.byte 102,15,58,223,202,2
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,4
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,8
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,16
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,32
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,64
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,128
- call .L094key_192b
+ call .L104key_192b
movups %xmm0,(%edx)
movl %ecx,48(%edx)
- xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L095key_192a:
+.L105key_192a:
movups %xmm0,(%edx)
leal 16(%edx),%edx
.align 16
-.L093key_192a_cold:
+.L103key_192a_cold:
movaps %xmm2,%xmm5
-.L096key_192b_warm:
+.L106key_192b_warm:
shufps $16,%xmm0,%xmm4
movdqa %xmm2,%xmm3
xorps %xmm4,%xmm0
@@ -2015,7 +2220,7 @@
pxor %xmm3,%xmm2
ret
.align 16
-.L094key_192b:
+.L104key_192b:
movaps %xmm0,%xmm3
shufps $68,%xmm0,%xmm5
movups %xmm5,(%edx)
@@ -2022,49 +2227,83 @@
shufps $78,%xmm2,%xmm3
movups %xmm3,16(%edx)
leal 32(%edx),%edx
- jmp .L096key_192b_warm
+ jmp .L106key_192b_warm
.align 16
-.L08714rounds:
+.L10212rounds_alt:
+ movdqa 16(%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $8,%ecx
+ movdqu %xmm0,-16(%edx)
+.L107loop_key192:
+ movq %xmm2,(%edx)
+ movdqa %xmm2,%xmm1
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ pslld $1,%xmm4
+ leal 24(%edx),%edx
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm2
+ movdqu %xmm0,-16(%edx)
+ decl %ecx
+ jnz .L107loop_key192
+ movl $11,%ecx
+ movl %ecx,32(%edx)
+ jmp .L100good_key
+.align 16
+.L09314rounds:
movups 16(%eax),%xmm2
+ leal 16(%edx),%edx
+ cmpl $268435456,%ebp
+ je .L10814rounds_alt
movl $13,%ecx
- leal 16(%edx),%edx
movups %xmm0,-32(%edx)
movups %xmm2,-16(%edx)
.byte 102,15,58,223,202,1
- call .L097key_256a_cold
+ call .L109key_256a_cold
.byte 102,15,58,223,200,1
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,2
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,2
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,4
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,4
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,8
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,8
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,16
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,16
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,32
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,32
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,64
- call .L099key_256a
+ call .L111key_256a
movups %xmm0,(%edx)
movl %ecx,16(%edx)
xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L099key_256a:
+.L111key_256a:
movups %xmm2,(%edx)
leal 16(%edx),%edx
-.L097key_256a_cold:
+.L109key_256a_cold:
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $140,%xmm0,%xmm4
@@ -2073,7 +2312,7 @@
xorps %xmm1,%xmm0
ret
.align 16
-.L098key_256b:
+.L110key_256b:
movups %xmm0,(%edx)
leal 16(%edx),%edx
shufps $16,%xmm2,%xmm4
@@ -2083,13 +2322,70 @@
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
ret
+.align 16
+.L10814rounds_alt:
+ movdqa (%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $7,%ecx
+ movdqu %xmm0,-32(%edx)
+ movdqa %xmm2,%xmm1
+ movdqu %xmm2,-16(%edx)
+.L112loop_key256:
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pslld $1,%xmm4
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ decl %ecx
+ jz .L113done_key256
+ pshufd $255,%xmm0,%xmm2
+ pxor %xmm3,%xmm3
+.byte 102,15,56,221,211
+ movdqa %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm3,%xmm1
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,16(%edx)
+ leal 32(%edx),%edx
+ movdqa %xmm2,%xmm1
+ jmp .L112loop_key256
+.L113done_key256:
+ movl $13,%ecx
+ movl %ecx,16(%edx)
+.L100good_key:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ xorl %eax,%eax
+ popl %ebx
+ popl %ebp
+ ret
.align 4
-.L086bad_pointer:
+.L091bad_pointer:
movl $-1,%eax
+ popl %ebx
+ popl %ebp
ret
.align 4
-.L089bad_keybits:
+.L095bad_keybits:
+ pxor %xmm0,%xmm0
movl $-2,%eax
+ popl %ebx
+ popl %ebp
ret
.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
.globl aesni_set_encrypt_key
@@ -2115,7 +2411,7 @@
movl 12(%esp),%edx
shll $4,%ecx
testl %eax,%eax
- jnz .L100dec_key_ret
+ jnz .L114dec_key_ret
leal 16(%edx,%ecx,1),%eax
movups (%edx),%xmm0
movups (%eax),%xmm1
@@ -2123,7 +2419,7 @@
movups %xmm1,(%edx)
leal 16(%edx),%edx
leal -16(%eax),%eax
-.L101dec_key_inverse:
+.L115dec_key_inverse:
movups (%edx),%xmm0
movups (%eax),%xmm1
.byte 102,15,56,219,192
@@ -2133,18 +2429,27 @@
movups %xmm0,16(%eax)
movups %xmm1,-16(%edx)
cmpl %edx,%eax
- ja .L101dec_key_inverse
+ ja .L115dec_key_inverse
movups (%edx),%xmm0
.byte 102,15,56,219,192
movups %xmm0,(%edx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
xorl %eax,%eax
-.L100dec_key_ret:
+.L114dec_key_ret:
ret
.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.align 64
+.Lkey_const:
+.long 202313229,202313229,202313229,202313229
+.long 67569157,67569157,67569157,67569157
+.long 1,1,1,1
+.long 27,27,27,27
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "aesni-x86.S"
.text
@@ -2169,7 +2474,10 @@
leal 16(%edx),%edx
jnz .L000enc1_loop_1
.byte 102,15,56,221,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
ret
.size aesni_encrypt,.-.L_aesni_encrypt_begin
.globl aesni_decrypt
@@ -2193,32 +2501,90 @@
leal 16(%edx),%edx
jnz .L001dec1_loop_2
.byte 102,15,56,223,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
ret
.size aesni_decrypt,.-.L_aesni_decrypt_begin
+.type _aesni_encrypt2, at function
+.align 16
+_aesni_encrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L002enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L002enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ ret
+.size _aesni_encrypt2,.-_aesni_encrypt2
+.type _aesni_decrypt2, at function
+.align 16
+_aesni_decrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L003dec2_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L003dec2_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+ ret
+.size _aesni_decrypt2,.-_aesni_decrypt2
.type _aesni_encrypt3, at function
.align 16
_aesni_encrypt3:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
- movups (%edx),%xmm0
-.L002enc3_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L004enc3_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
- movups (%edx),%xmm0
- jnz .L002enc3_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L004enc3_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -2231,25 +2597,26 @@
.align 16
_aesni_decrypt3:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
- movups (%edx),%xmm0
-.L003dec3_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L005dec3_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
- movups (%edx),%xmm0
- jnz .L003dec3_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L005dec3_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -2263,27 +2630,29 @@
_aesni_encrypt4:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
- shrl $1,%ecx
- leal 32(%edx),%edx
+ shll $4,%ecx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
pxor %xmm0,%xmm5
- movups (%edx),%xmm0
-.L004enc4_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L006enc4_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
.byte 102,15,56,220,233
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
.byte 102,15,56,220,232
- movups (%edx),%xmm0
- jnz .L004enc4_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L006enc4_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -2299,27 +2668,29 @@
_aesni_decrypt4:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
- shrl $1,%ecx
- leal 32(%edx),%edx
+ shll $4,%ecx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
pxor %xmm0,%xmm5
- movups (%edx),%xmm0
-.L005dec4_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L007dec4_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
.byte 102,15,56,222,233
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
.byte 102,15,56,222,232
- movups (%edx),%xmm0
- jnz .L005dec4_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L007dec4_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -2334,45 +2705,42 @@
.align 16
_aesni_encrypt6:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
.byte 102,15,56,220,209
- pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
.byte 102,15,56,220,217
- pxor %xmm0,%xmm5
- decl %ecx
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
.byte 102,15,56,220,225
- pxor %xmm0,%xmm6
-.byte 102,15,56,220,233
pxor %xmm0,%xmm7
-.byte 102,15,56,220,241
- movups (%edx),%xmm0
-.byte 102,15,56,220,249
- jmp .L_aesni_encrypt6_enter
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L008_aesni_encrypt6_inner
.align 16
-.L006enc6_loop:
+.L009enc6_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
+.L008_aesni_encrypt6_inner:
.byte 102,15,56,220,233
.byte 102,15,56,220,241
.byte 102,15,56,220,249
-.align 16
.L_aesni_encrypt6_enter:
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
.byte 102,15,56,220,232
.byte 102,15,56,220,240
.byte 102,15,56,220,248
- movups (%edx),%xmm0
- jnz .L006enc6_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L009enc6_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -2391,45 +2759,42 @@
.align 16
_aesni_decrypt6:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
.byte 102,15,56,222,209
- pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
.byte 102,15,56,222,217
- pxor %xmm0,%xmm5
- decl %ecx
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
.byte 102,15,56,222,225
- pxor %xmm0,%xmm6
-.byte 102,15,56,222,233
pxor %xmm0,%xmm7
-.byte 102,15,56,222,241
- movups (%edx),%xmm0
-.byte 102,15,56,222,249
- jmp .L_aesni_decrypt6_enter
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L010_aesni_decrypt6_inner
.align 16
-.L007dec6_loop:
+.L011dec6_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
+.L010_aesni_decrypt6_inner:
.byte 102,15,56,222,233
.byte 102,15,56,222,241
.byte 102,15,56,222,249
-.align 16
.L_aesni_decrypt6_enter:
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
.byte 102,15,56,222,232
.byte 102,15,56,222,240
.byte 102,15,56,222,248
- movups (%edx),%xmm0
- jnz .L007dec6_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L011dec6_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -2459,14 +2824,14 @@
movl 32(%esp),%edx
movl 36(%esp),%ebx
andl $-16,%eax
- jz .L008ecb_ret
+ jz .L012ecb_ret
movl 240(%edx),%ecx
testl %ebx,%ebx
- jz .L009ecb_decrypt
+ jz .L013ecb_decrypt
movl %edx,%ebp
movl %ecx,%ebx
cmpl $96,%eax
- jb .L010ecb_enc_tail
+ jb .L014ecb_enc_tail
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -2475,9 +2840,9 @@
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
subl $96,%eax
- jmp .L011ecb_enc_loop6_enter
+ jmp .L015ecb_enc_loop6_enter
.align 16
-.L012ecb_enc_loop6:
+.L016ecb_enc_loop6:
movups %xmm2,(%edi)
movdqu (%esi),%xmm2
movups %xmm3,16(%edi)
@@ -2492,12 +2857,12 @@
leal 96(%edi),%edi
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
-.L011ecb_enc_loop6_enter:
+.L015ecb_enc_loop6_enter:
call _aesni_encrypt6
movl %ebp,%edx
movl %ebx,%ecx
subl $96,%eax
- jnc .L012ecb_enc_loop6
+ jnc .L016ecb_enc_loop6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
@@ -2506,18 +2871,18 @@
movups %xmm7,80(%edi)
leal 96(%edi),%edi
addl $96,%eax
- jz .L008ecb_ret
-.L010ecb_enc_tail:
+ jz .L012ecb_ret
+.L014ecb_enc_tail:
movups (%esi),%xmm2
cmpl $32,%eax
- jb .L013ecb_enc_one
+ jb .L017ecb_enc_one
movups 16(%esi),%xmm3
- je .L014ecb_enc_two
+ je .L018ecb_enc_two
movups 32(%esi),%xmm4
cmpl $64,%eax
- jb .L015ecb_enc_three
+ jb .L019ecb_enc_three
movups 48(%esi),%xmm5
- je .L016ecb_enc_four
+ je .L020ecb_enc_four
movups 64(%esi),%xmm6
xorps %xmm7,%xmm7
call _aesni_encrypt6
@@ -2526,50 +2891,49 @@
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L013ecb_enc_one:
+.L017ecb_enc_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L017enc1_loop_3:
+.L021enc1_loop_3:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L017enc1_loop_3
+ jnz .L021enc1_loop_3
.byte 102,15,56,221,209
movups %xmm2,(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L014ecb_enc_two:
- xorps %xmm4,%xmm4
- call _aesni_encrypt3
+.L018ecb_enc_two:
+ call _aesni_encrypt2
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L015ecb_enc_three:
+.L019ecb_enc_three:
call _aesni_encrypt3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L016ecb_enc_four:
+.L020ecb_enc_four:
call _aesni_encrypt4
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L009ecb_decrypt:
+.L013ecb_decrypt:
movl %edx,%ebp
movl %ecx,%ebx
cmpl $96,%eax
- jb .L018ecb_dec_tail
+ jb .L022ecb_dec_tail
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -2578,9 +2942,9 @@
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
subl $96,%eax
- jmp .L019ecb_dec_loop6_enter
+ jmp .L023ecb_dec_loop6_enter
.align 16
-.L020ecb_dec_loop6:
+.L024ecb_dec_loop6:
movups %xmm2,(%edi)
movdqu (%esi),%xmm2
movups %xmm3,16(%edi)
@@ -2595,12 +2959,12 @@
leal 96(%edi),%edi
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
-.L019ecb_dec_loop6_enter:
+.L023ecb_dec_loop6_enter:
call _aesni_decrypt6
movl %ebp,%edx
movl %ebx,%ecx
subl $96,%eax
- jnc .L020ecb_dec_loop6
+ jnc .L024ecb_dec_loop6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
@@ -2609,18 +2973,18 @@
movups %xmm7,80(%edi)
leal 96(%edi),%edi
addl $96,%eax
- jz .L008ecb_ret
-.L018ecb_dec_tail:
+ jz .L012ecb_ret
+.L022ecb_dec_tail:
movups (%esi),%xmm2
cmpl $32,%eax
- jb .L021ecb_dec_one
+ jb .L025ecb_dec_one
movups 16(%esi),%xmm3
- je .L022ecb_dec_two
+ je .L026ecb_dec_two
movups 32(%esi),%xmm4
cmpl $64,%eax
- jb .L023ecb_dec_three
+ jb .L027ecb_dec_three
movups 48(%esi),%xmm5
- je .L024ecb_dec_four
+ je .L028ecb_dec_four
movups 64(%esi),%xmm6
xorps %xmm7,%xmm7
call _aesni_decrypt6
@@ -2629,44 +2993,51 @@
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L021ecb_dec_one:
+.L025ecb_dec_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L025dec1_loop_4:
+.L029dec1_loop_4:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L025dec1_loop_4
+ jnz .L029dec1_loop_4
.byte 102,15,56,223,209
movups %xmm2,(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L022ecb_dec_two:
- xorps %xmm4,%xmm4
- call _aesni_decrypt3
+.L026ecb_dec_two:
+ call _aesni_decrypt2
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L023ecb_dec_three:
+.L027ecb_dec_three:
call _aesni_decrypt3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L024ecb_dec_four:
+.L028ecb_dec_four:
call _aesni_decrypt4
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
-.L008ecb_ret:
+.L012ecb_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -2705,13 +3076,15 @@
movl %ebp,20(%esp)
movl %ebp,24(%esp)
movl %ebp,28(%esp)
- shrl $1,%ecx
+ shll $4,%ecx
+ movl $16,%ebx
leal (%edx),%ebp
movdqa (%esp),%xmm5
movdqa %xmm7,%xmm2
- movl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ subl %ecx,%ebx
.byte 102,15,56,0,253
-.L026ccm64_enc_outer:
+.L030ccm64_enc_outer:
movups (%ebp),%xmm0
movl %ebx,%ecx
movups (%esi),%xmm6
@@ -2718,35 +3091,41 @@
xorps %xmm0,%xmm2
movups 16(%ebp),%xmm1
xorps %xmm6,%xmm0
- leal 32(%ebp),%edx
xorps %xmm0,%xmm3
- movups (%edx),%xmm0
-.L027ccm64_enc2_loop:
+ movups 32(%ebp),%xmm0
+.L031ccm64_enc2_loop:
.byte 102,15,56,220,209
- decl %ecx
.byte 102,15,56,220,217
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
- leal 32(%edx),%edx
.byte 102,15,56,220,216
- movups (%edx),%xmm0
- jnz .L027ccm64_enc2_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L031ccm64_enc2_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
paddq 16(%esp),%xmm7
+ decl %eax
.byte 102,15,56,221,208
.byte 102,15,56,221,216
- decl %eax
leal 16(%esi),%esi
xorps %xmm2,%xmm6
movdqa %xmm7,%xmm2
movups %xmm6,(%edi)
+.byte 102,15,56,0,213
leal 16(%edi),%edi
-.byte 102,15,56,0,213
- jnz .L026ccm64_enc_outer
+ jnz .L030ccm64_enc_outer
movl 48(%esp),%esp
movl 40(%esp),%edi
movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -2794,55 +3173,58 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L028enc1_loop_5:
+.L032enc1_loop_5:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L028enc1_loop_5
+ jnz .L032enc1_loop_5
.byte 102,15,56,221,209
+ shll $4,%ebx
+ movl $16,%ecx
movups (%esi),%xmm6
paddq 16(%esp),%xmm7
leal 16(%esi),%esi
- jmp .L029ccm64_dec_outer
+ subl %ebx,%ecx
+ leal 32(%ebp,%ebx,1),%edx
+ movl %ecx,%ebx
+ jmp .L033ccm64_dec_outer
.align 16
-.L029ccm64_dec_outer:
+.L033ccm64_dec_outer:
xorps %xmm2,%xmm6
movdqa %xmm7,%xmm2
- movl %ebx,%ecx
movups %xmm6,(%edi)
leal 16(%edi),%edi
.byte 102,15,56,0,213
subl $1,%eax
- jz .L030ccm64_dec_break
+ jz .L034ccm64_dec_break
movups (%ebp),%xmm0
- shrl $1,%ecx
+ movl %ebx,%ecx
movups 16(%ebp),%xmm1
xorps %xmm0,%xmm6
- leal 32(%ebp),%edx
xorps %xmm0,%xmm2
xorps %xmm6,%xmm3
- movups (%edx),%xmm0
-.L031ccm64_dec2_loop:
+ movups 32(%ebp),%xmm0
+.L035ccm64_dec2_loop:
.byte 102,15,56,220,209
- decl %ecx
.byte 102,15,56,220,217
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
- leal 32(%edx),%edx
.byte 102,15,56,220,216
- movups (%edx),%xmm0
- jnz .L031ccm64_dec2_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L035ccm64_dec2_loop
movups (%esi),%xmm6
paddq 16(%esp),%xmm7
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- leal 16(%esi),%esi
.byte 102,15,56,221,208
.byte 102,15,56,221,216
- jmp .L029ccm64_dec_outer
+ leal 16(%esi),%esi
+ jmp .L033ccm64_dec_outer
.align 16
-.L030ccm64_dec_break:
+.L034ccm64_dec_break:
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movups (%edx),%xmm0
movups 16(%edx),%xmm1
@@ -2849,16 +3231,24 @@
xorps %xmm0,%xmm6
leal 32(%edx),%edx
xorps %xmm6,%xmm3
-.L032enc1_loop_6:
+.L036enc1_loop_6:
.byte 102,15,56,220,217
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L032enc1_loop_6
+ jnz .L036enc1_loop_6
.byte 102,15,56,221,217
movl 48(%esp),%esp
movl 40(%esp),%edi
movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -2884,7 +3274,7 @@
andl $-16,%esp
movl %ebp,80(%esp)
cmpl $1,%eax
- je .L033ctr32_one_shortcut
+ je .L037ctr32_one_shortcut
movdqu (%ebx),%xmm7
movl $202182159,(%esp)
movl $134810123,4(%esp)
@@ -2900,63 +3290,59 @@
.byte 102,15,58,34,253,3
movl 240(%edx),%ecx
bswap %ebx
+ pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
- pxor %xmm0,%xmm0
movdqa (%esp),%xmm2
-.byte 102,15,58,34,203,0
+.byte 102,15,58,34,195,0
leal 3(%ebx),%ebp
-.byte 102,15,58,34,197,0
+.byte 102,15,58,34,205,0
incl %ebx
-.byte 102,15,58,34,203,1
+.byte 102,15,58,34,195,1
incl %ebp
-.byte 102,15,58,34,197,1
+.byte 102,15,58,34,205,1
incl %ebx
-.byte 102,15,58,34,203,2
+.byte 102,15,58,34,195,2
incl %ebp
-.byte 102,15,58,34,197,2
- movdqa %xmm1,48(%esp)
+.byte 102,15,58,34,205,2
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
+ movdqu (%edx),%xmm6
+ movdqa %xmm1,64(%esp)
.byte 102,15,56,0,202
- movdqa %xmm0,64(%esp)
-.byte 102,15,56,0,194
- pshufd $192,%xmm1,%xmm2
- pshufd $128,%xmm1,%xmm3
+ pshufd $192,%xmm0,%xmm2
+ pshufd $128,%xmm0,%xmm3
cmpl $6,%eax
- jb .L034ctr32_tail
+ jb .L038ctr32_tail
+ pxor %xmm6,%xmm7
+ shll $4,%ecx
+ movl $16,%ebx
movdqa %xmm7,32(%esp)
- shrl $1,%ecx
movl %edx,%ebp
- movl %ecx,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
subl $6,%eax
- jmp .L035ctr32_loop6
+ jmp .L039ctr32_loop6
.align 16
-.L035ctr32_loop6:
- pshufd $64,%xmm1,%xmm4
- movdqa 32(%esp),%xmm1
- pshufd $192,%xmm0,%xmm5
- por %xmm1,%xmm2
- pshufd $128,%xmm0,%xmm6
- por %xmm1,%xmm3
- pshufd $64,%xmm0,%xmm7
- por %xmm1,%xmm4
- por %xmm1,%xmm5
- por %xmm1,%xmm6
- por %xmm1,%xmm7
- movups (%ebp),%xmm0
- movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
- decl %ecx
+.L039ctr32_loop6:
+ pshufd $64,%xmm0,%xmm4
+ movdqa 32(%esp),%xmm0
+ pshufd $192,%xmm1,%xmm5
pxor %xmm0,%xmm2
+ pshufd $128,%xmm1,%xmm6
pxor %xmm0,%xmm3
+ pshufd $64,%xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
.byte 102,15,56,220,209
- pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
.byte 102,15,56,220,217
- pxor %xmm0,%xmm5
+ movups 32(%ebp),%xmm0
+ movl %ebx,%ecx
.byte 102,15,56,220,225
- pxor %xmm0,%xmm6
.byte 102,15,56,220,233
- pxor %xmm0,%xmm7
.byte 102,15,56,220,241
- movups (%edx),%xmm0
.byte 102,15,56,220,249
call .L_aesni_encrypt6_enter
movups (%esi),%xmm1
@@ -2967,11 +3353,11 @@
movups %xmm2,(%edi)
movdqa 16(%esp),%xmm0
xorps %xmm1,%xmm4
- movdqa 48(%esp),%xmm1
+ movdqa 64(%esp),%xmm1
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
paddd %xmm0,%xmm1
- paddd 64(%esp),%xmm0
+ paddd 48(%esp),%xmm0
movdqa (%esp),%xmm2
movups 48(%esi),%xmm3
movups 64(%esi),%xmm4
@@ -2978,40 +3364,40 @@
xorps %xmm3,%xmm5
movups 80(%esi),%xmm3
leal 96(%esi),%esi
- movdqa %xmm1,48(%esp)
-.byte 102,15,56,0,202
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
xorps %xmm4,%xmm6
movups %xmm5,48(%edi)
xorps %xmm3,%xmm7
- movdqa %xmm0,64(%esp)
-.byte 102,15,56,0,194
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
movups %xmm6,64(%edi)
- pshufd $192,%xmm1,%xmm2
+ pshufd $192,%xmm0,%xmm2
movups %xmm7,80(%edi)
leal 96(%edi),%edi
- movl %ebx,%ecx
- pshufd $128,%xmm1,%xmm3
+ pshufd $128,%xmm0,%xmm3
subl $6,%eax
- jnc .L035ctr32_loop6
+ jnc .L039ctr32_loop6
addl $6,%eax
- jz .L036ctr32_ret
+ jz .L040ctr32_ret
+ movdqu (%ebp),%xmm7
movl %ebp,%edx
- leal 1(,%ecx,2),%ecx
- movdqa 32(%esp),%xmm7
-.L034ctr32_tail:
+ pxor 32(%esp),%xmm7
+ movl 240(%ebp),%ecx
+.L038ctr32_tail:
por %xmm7,%xmm2
cmpl $2,%eax
- jb .L037ctr32_one
- pshufd $64,%xmm1,%xmm4
+ jb .L041ctr32_one
+ pshufd $64,%xmm0,%xmm4
por %xmm7,%xmm3
- je .L038ctr32_two
- pshufd $192,%xmm0,%xmm5
+ je .L042ctr32_two
+ pshufd $192,%xmm1,%xmm5
por %xmm7,%xmm4
cmpl $4,%eax
- jb .L039ctr32_three
- pshufd $128,%xmm0,%xmm6
+ jb .L043ctr32_three
+ pshufd $128,%xmm1,%xmm6
por %xmm7,%xmm5
- je .L040ctr32_four
+ je .L044ctr32_four
por %xmm7,%xmm6
call _aesni_encrypt6
movups (%esi),%xmm1
@@ -3029,30 +3415,30 @@
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L033ctr32_one_shortcut:
+.L037ctr32_one_shortcut:
movups (%ebx),%xmm2
movl 240(%edx),%ecx
-.L037ctr32_one:
+.L041ctr32_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L041enc1_loop_7:
+.L045enc1_loop_7:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L041enc1_loop_7
+ jnz .L045enc1_loop_7
.byte 102,15,56,221,209
movups (%esi),%xmm6
xorps %xmm2,%xmm6
movups %xmm6,(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L038ctr32_two:
- call _aesni_encrypt3
+.L042ctr32_two:
+ call _aesni_encrypt2
movups (%esi),%xmm5
movups 16(%esi),%xmm6
xorps %xmm5,%xmm2
@@ -3059,9 +3445,9 @@
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L039ctr32_three:
+.L043ctr32_three:
call _aesni_encrypt3
movups (%esi),%xmm5
movups 16(%esi),%xmm6
@@ -3072,9 +3458,9 @@
xorps %xmm7,%xmm4
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L040ctr32_four:
+.L044ctr32_four:
call _aesni_encrypt4
movups (%esi),%xmm6
movups 16(%esi),%xmm7
@@ -3088,7 +3474,18 @@
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
-.L036ctr32_ret:
+.L040ctr32_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
movl 80(%esp),%esp
popl %edi
popl %esi
@@ -3113,12 +3510,12 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L042enc1_loop_8:
+.L046enc1_loop_8:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L042enc1_loop_8
+ jnz .L046enc1_loop_8
.byte 102,15,56,221,209
movl 20(%esp),%esi
movl 24(%esp),%edi
@@ -3142,12 +3539,14 @@
movl %edx,%ebp
movl %ecx,%ebx
subl $96,%eax
- jc .L043xts_enc_short
- shrl $1,%ecx
- movl %ecx,%ebx
- jmp .L044xts_enc_loop6
+ jc .L047xts_enc_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L048xts_enc_loop6
.align 16
-.L044xts_enc_loop6:
+.L048xts_enc_loop6:
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,(%esp)
@@ -3183,6 +3582,7 @@
pand %xmm3,%xmm7
movups (%esi),%xmm2
pxor %xmm1,%xmm7
+ movl %ebx,%ecx
movdqu 16(%esi),%xmm3
xorps %xmm0,%xmm2
movdqu 32(%esi),%xmm4
@@ -3198,19 +3598,17 @@
movdqa %xmm7,80(%esp)
pxor %xmm1,%xmm7
movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
.byte 102,15,56,220,209
- pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
.byte 102,15,56,220,217
- pxor 48(%esp),%xmm5
- decl %ecx
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
.byte 102,15,56,220,225
- pxor 64(%esp),%xmm6
.byte 102,15,56,220,233
- pxor %xmm0,%xmm7
.byte 102,15,56,220,241
- movups (%edx),%xmm0
.byte 102,15,56,220,249
call .L_aesni_encrypt6_enter
movdqa 80(%esp),%xmm1
@@ -3235,19 +3633,18 @@
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
- movl %ebx,%ecx
pxor %xmm2,%xmm1
subl $96,%eax
- jnc .L044xts_enc_loop6
- leal 1(,%ecx,2),%ecx
+ jnc .L048xts_enc_loop6
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movl %ecx,%ebx
-.L043xts_enc_short:
+.L047xts_enc_short:
addl $96,%eax
- jz .L045xts_enc_done6x
+ jz .L049xts_enc_done6x
movdqa %xmm1,%xmm5
cmpl $32,%eax
- jb .L046xts_enc_one
+ jb .L050xts_enc_one
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
paddq %xmm1,%xmm1
@@ -3254,7 +3651,7 @@
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
- je .L047xts_enc_two
+ je .L051xts_enc_two
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm6
@@ -3263,7 +3660,7 @@
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
cmpl $64,%eax
- jb .L048xts_enc_three
+ jb .L052xts_enc_three
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm7
@@ -3273,7 +3670,7 @@
pxor %xmm2,%xmm1
movdqa %xmm5,(%esp)
movdqa %xmm6,16(%esp)
- je .L049xts_enc_four
+ je .L053xts_enc_four
movdqa %xmm7,32(%esp)
pshufd $19,%xmm0,%xmm7
movdqa %xmm1,48(%esp)
@@ -3305,9 +3702,9 @@
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
leal 80(%edi),%edi
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L046xts_enc_one:
+.L050xts_enc_one:
movups (%esi),%xmm2
leal 16(%esi),%esi
xorps %xmm5,%xmm2
@@ -3315,20 +3712,20 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L051enc1_loop_9:
+.L055enc1_loop_9:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L051enc1_loop_9
+ jnz .L055enc1_loop_9
.byte 102,15,56,221,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
leal 16(%edi),%edi
movdqa %xmm5,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L047xts_enc_two:
+.L051xts_enc_two:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -3335,8 +3732,7 @@
leal 32(%esi),%esi
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
- xorps %xmm4,%xmm4
- call _aesni_encrypt3
+ call _aesni_encrypt2
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
@@ -3343,9 +3739,9 @@
movups %xmm3,16(%edi)
leal 32(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L048xts_enc_three:
+.L052xts_enc_three:
movaps %xmm1,%xmm7
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -3363,9 +3759,9 @@
movups %xmm4,32(%edi)
leal 48(%edi),%edi
movdqa %xmm7,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L049xts_enc_four:
+.L053xts_enc_four:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -3387,21 +3783,21 @@
movups %xmm5,48(%edi)
leal 64(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L045xts_enc_done6x:
+.L049xts_enc_done6x:
movl 112(%esp),%eax
andl $15,%eax
- jz .L052xts_enc_ret
+ jz .L056xts_enc_ret
movdqa %xmm1,%xmm5
movl %eax,112(%esp)
- jmp .L053xts_enc_steal
+ jmp .L057xts_enc_steal
.align 16
-.L050xts_enc_done:
+.L054xts_enc_done:
movl 112(%esp),%eax
pxor %xmm0,%xmm0
andl $15,%eax
- jz .L052xts_enc_ret
+ jz .L056xts_enc_ret
pcmpgtd %xmm1,%xmm0
movl %eax,112(%esp)
pshufd $19,%xmm0,%xmm5
@@ -3408,7 +3804,7 @@
paddq %xmm1,%xmm1
pand 96(%esp),%xmm5
pxor %xmm1,%xmm5
-.L053xts_enc_steal:
+.L057xts_enc_steal:
movzbl (%esi),%ecx
movzbl -16(%edi),%edx
leal 1(%esi),%esi
@@ -3416,7 +3812,7 @@
movb %dl,(%edi)
leal 1(%edi),%edi
subl $1,%eax
- jnz .L053xts_enc_steal
+ jnz .L057xts_enc_steal
subl 112(%esp),%edi
movl %ebp,%edx
movl %ebx,%ecx
@@ -3426,16 +3822,30 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L054enc1_loop_10:
+.L058enc1_loop_10:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L054enc1_loop_10
+ jnz .L058enc1_loop_10
.byte 102,15,56,221,209
xorps %xmm5,%xmm2
movups %xmm2,-16(%edi)
-.L052xts_enc_ret:
+.L056xts_enc_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
movl 116(%esp),%esp
popl %edi
popl %esi
@@ -3460,12 +3870,12 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L055enc1_loop_11:
+.L059enc1_loop_11:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L055enc1_loop_11
+ jnz .L059enc1_loop_11
.byte 102,15,56,221,209
movl 20(%esp),%esi
movl 24(%esp),%edi
@@ -3494,12 +3904,14 @@
pcmpgtd %xmm1,%xmm0
andl $-16,%eax
subl $96,%eax
- jc .L056xts_dec_short
- shrl $1,%ecx
- movl %ecx,%ebx
- jmp .L057xts_dec_loop6
+ jc .L060xts_dec_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L061xts_dec_loop6
.align 16
-.L057xts_dec_loop6:
+.L061xts_dec_loop6:
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,(%esp)
@@ -3535,6 +3947,7 @@
pand %xmm3,%xmm7
movups (%esi),%xmm2
pxor %xmm1,%xmm7
+ movl %ebx,%ecx
movdqu 16(%esi),%xmm3
xorps %xmm0,%xmm2
movdqu 32(%esi),%xmm4
@@ -3550,19 +3963,17 @@
movdqa %xmm7,80(%esp)
pxor %xmm1,%xmm7
movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
.byte 102,15,56,222,209
- pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
.byte 102,15,56,222,217
- pxor 48(%esp),%xmm5
- decl %ecx
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
.byte 102,15,56,222,225
- pxor 64(%esp),%xmm6
.byte 102,15,56,222,233
- pxor %xmm0,%xmm7
.byte 102,15,56,222,241
- movups (%edx),%xmm0
.byte 102,15,56,222,249
call .L_aesni_decrypt6_enter
movdqa 80(%esp),%xmm1
@@ -3587,19 +3998,18 @@
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
- movl %ebx,%ecx
pxor %xmm2,%xmm1
subl $96,%eax
- jnc .L057xts_dec_loop6
- leal 1(,%ecx,2),%ecx
+ jnc .L061xts_dec_loop6
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movl %ecx,%ebx
-.L056xts_dec_short:
+.L060xts_dec_short:
addl $96,%eax
- jz .L058xts_dec_done6x
+ jz .L062xts_dec_done6x
movdqa %xmm1,%xmm5
cmpl $32,%eax
- jb .L059xts_dec_one
+ jb .L063xts_dec_one
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
paddq %xmm1,%xmm1
@@ -3606,7 +4016,7 @@
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
- je .L060xts_dec_two
+ je .L064xts_dec_two
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm6
@@ -3615,7 +4025,7 @@
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
cmpl $64,%eax
- jb .L061xts_dec_three
+ jb .L065xts_dec_three
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm7
@@ -3625,7 +4035,7 @@
pxor %xmm2,%xmm1
movdqa %xmm5,(%esp)
movdqa %xmm6,16(%esp)
- je .L062xts_dec_four
+ je .L066xts_dec_four
movdqa %xmm7,32(%esp)
pshufd $19,%xmm0,%xmm7
movdqa %xmm1,48(%esp)
@@ -3657,9 +4067,9 @@
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
leal 80(%edi),%edi
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L059xts_dec_one:
+.L063xts_dec_one:
movups (%esi),%xmm2
leal 16(%esi),%esi
xorps %xmm5,%xmm2
@@ -3667,20 +4077,20 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L064dec1_loop_12:
+.L068dec1_loop_12:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L064dec1_loop_12
+ jnz .L068dec1_loop_12
.byte 102,15,56,223,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
leal 16(%edi),%edi
movdqa %xmm5,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L060xts_dec_two:
+.L064xts_dec_two:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -3687,7 +4097,7 @@
leal 32(%esi),%esi
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
- call _aesni_decrypt3
+ call _aesni_decrypt2
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
@@ -3694,9 +4104,9 @@
movups %xmm3,16(%edi)
leal 32(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L061xts_dec_three:
+.L065xts_dec_three:
movaps %xmm1,%xmm7
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -3714,9 +4124,9 @@
movups %xmm4,32(%edi)
leal 48(%edi),%edi
movdqa %xmm7,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L062xts_dec_four:
+.L066xts_dec_four:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -3738,20 +4148,20 @@
movups %xmm5,48(%edi)
leal 64(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L058xts_dec_done6x:
+.L062xts_dec_done6x:
movl 112(%esp),%eax
andl $15,%eax
- jz .L065xts_dec_ret
+ jz .L069xts_dec_ret
movl %eax,112(%esp)
- jmp .L066xts_dec_only_one_more
+ jmp .L070xts_dec_only_one_more
.align 16
-.L063xts_dec_done:
+.L067xts_dec_done:
movl 112(%esp),%eax
pxor %xmm0,%xmm0
andl $15,%eax
- jz .L065xts_dec_ret
+ jz .L069xts_dec_ret
pcmpgtd %xmm1,%xmm0
movl %eax,112(%esp)
pshufd $19,%xmm0,%xmm2
@@ -3761,7 +4171,7 @@
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
-.L066xts_dec_only_one_more:
+.L070xts_dec_only_one_more:
pshufd $19,%xmm0,%xmm5
movdqa %xmm1,%xmm6
paddq %xmm1,%xmm1
@@ -3775,16 +4185,16 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L067dec1_loop_13:
+.L071dec1_loop_13:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L067dec1_loop_13
+ jnz .L071dec1_loop_13
.byte 102,15,56,223,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
-.L068xts_dec_steal:
+.L072xts_dec_steal:
movzbl 16(%esi),%ecx
movzbl (%edi),%edx
leal 1(%esi),%esi
@@ -3792,7 +4202,7 @@
movb %dl,16(%edi)
leal 1(%edi),%edi
subl $1,%eax
- jnz .L068xts_dec_steal
+ jnz .L072xts_dec_steal
subl 112(%esp),%edi
movl %ebp,%edx
movl %ebx,%ecx
@@ -3802,16 +4212,30 @@
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L069dec1_loop_14:
+.L073dec1_loop_14:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L069dec1_loop_14
+ jnz .L073dec1_loop_14
.byte 102,15,56,223,209
xorps %xmm6,%xmm2
movups %xmm2,(%edi)
-.L065xts_dec_ret:
+.L069xts_dec_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
movl 116(%esp),%esp
popl %edi
popl %esi
@@ -3837,7 +4261,7 @@
movl 32(%esp),%edx
movl 36(%esp),%ebp
testl %eax,%eax
- jz .L070cbc_abort
+ jz .L074cbc_abort
cmpl $0,40(%esp)
xchgl %esp,%ebx
movups (%ebp),%xmm7
@@ -3845,14 +4269,14 @@
movl %edx,%ebp
movl %ebx,16(%esp)
movl %ecx,%ebx
- je .L071cbc_decrypt
+ je .L075cbc_decrypt
movaps %xmm7,%xmm2
cmpl $16,%eax
- jb .L072cbc_enc_tail
+ jb .L076cbc_enc_tail
subl $16,%eax
- jmp .L073cbc_enc_loop
+ jmp .L077cbc_enc_loop
.align 16
-.L073cbc_enc_loop:
+.L077cbc_enc_loop:
movups (%esi),%xmm7
leal 16(%esi),%esi
movups (%edx),%xmm0
@@ -3860,12 +4284,12 @@
xorps %xmm0,%xmm7
leal 32(%edx),%edx
xorps %xmm7,%xmm2
-.L074enc1_loop_15:
+.L078enc1_loop_15:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L074enc1_loop_15
+ jnz .L078enc1_loop_15
.byte 102,15,56,221,209
movl %ebx,%ecx
movl %ebp,%edx
@@ -3872,12 +4296,13 @@
movups %xmm2,(%edi)
leal 16(%edi),%edi
subl $16,%eax
- jnc .L073cbc_enc_loop
+ jnc .L077cbc_enc_loop
addl $16,%eax
- jnz .L072cbc_enc_tail
+ jnz .L076cbc_enc_tail
movaps %xmm2,%xmm7
- jmp .L075cbc_ret
-.L072cbc_enc_tail:
+ pxor %xmm2,%xmm2
+ jmp .L079cbc_ret
+.L076cbc_enc_tail:
movl %eax,%ecx
.long 2767451785
movl $16,%ecx
@@ -3888,20 +4313,20 @@
movl %ebx,%ecx
movl %edi,%esi
movl %ebp,%edx
- jmp .L073cbc_enc_loop
+ jmp .L077cbc_enc_loop
.align 16
-.L071cbc_decrypt:
+.L075cbc_decrypt:
cmpl $80,%eax
- jbe .L076cbc_dec_tail
+ jbe .L080cbc_dec_tail
movaps %xmm7,(%esp)
subl $80,%eax
- jmp .L077cbc_dec_loop6_enter
+ jmp .L081cbc_dec_loop6_enter
.align 16
-.L078cbc_dec_loop6:
+.L082cbc_dec_loop6:
movaps %xmm0,(%esp)
movups %xmm7,(%edi)
leal 16(%edi),%edi
-.L077cbc_dec_loop6_enter:
+.L081cbc_dec_loop6_enter:
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -3931,28 +4356,28 @@
movups %xmm6,64(%edi)
leal 80(%edi),%edi
subl $96,%eax
- ja .L078cbc_dec_loop6
+ ja .L082cbc_dec_loop6
movaps %xmm7,%xmm2
movaps %xmm0,%xmm7
addl $80,%eax
- jle .L079cbc_dec_tail_collected
+ jle .L083cbc_dec_clear_tail_collected
movups %xmm2,(%edi)
leal 16(%edi),%edi
-.L076cbc_dec_tail:
+.L080cbc_dec_tail:
movups (%esi),%xmm2
movaps %xmm2,%xmm6
cmpl $16,%eax
- jbe .L080cbc_dec_one
+ jbe .L084cbc_dec_one
movups 16(%esi),%xmm3
movaps %xmm3,%xmm5
cmpl $32,%eax
- jbe .L081cbc_dec_two
+ jbe .L085cbc_dec_two
movups 32(%esi),%xmm4
cmpl $48,%eax
- jbe .L082cbc_dec_three
+ jbe .L086cbc_dec_three
movups 48(%esi),%xmm5
cmpl $64,%eax
- jbe .L083cbc_dec_four
+ jbe .L087cbc_dec_four
movups 64(%esi),%xmm6
movaps %xmm7,(%esp)
movups (%esi),%xmm2
@@ -3970,43 +4395,47 @@
xorps %xmm0,%xmm6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
movups %xmm5,48(%edi)
+ pxor %xmm5,%xmm5
leal 64(%edi),%edi
movaps %xmm6,%xmm2
+ pxor %xmm6,%xmm6
subl $80,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L080cbc_dec_one:
+.L084cbc_dec_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L084dec1_loop_16:
+.L089dec1_loop_16:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L084dec1_loop_16
+ jnz .L089dec1_loop_16
.byte 102,15,56,223,209
xorps %xmm7,%xmm2
movaps %xmm6,%xmm7
subl $16,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L081cbc_dec_two:
- xorps %xmm4,%xmm4
- call _aesni_decrypt3
+.L085cbc_dec_two:
+ call _aesni_decrypt2
xorps %xmm7,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movaps %xmm3,%xmm2
+ pxor %xmm3,%xmm3
leal 16(%edi),%edi
movaps %xmm5,%xmm7
subl $32,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L082cbc_dec_three:
+.L086cbc_dec_three:
call _aesni_decrypt3
xorps %xmm7,%xmm2
xorps %xmm6,%xmm3
@@ -4013,13 +4442,15 @@
xorps %xmm5,%xmm4
movups %xmm2,(%edi)
movaps %xmm4,%xmm2
+ pxor %xmm4,%xmm4
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
leal 32(%edi),%edi
movups 32(%esi),%xmm7
subl $48,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L083cbc_dec_four:
+.L087cbc_dec_four:
call _aesni_decrypt4
movups 16(%esi),%xmm1
movups 32(%esi),%xmm0
@@ -4029,28 +4460,44 @@
movups %xmm2,(%edi)
xorps %xmm1,%xmm4
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
leal 48(%edi),%edi
movaps %xmm5,%xmm2
+ pxor %xmm5,%xmm5
subl $64,%eax
-.L079cbc_dec_tail_collected:
+ jmp .L088cbc_dec_tail_collected
+.align 16
+.L083cbc_dec_clear_tail_collected:
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+.L088cbc_dec_tail_collected:
andl $15,%eax
- jnz .L085cbc_dec_tail_partial
+ jnz .L090cbc_dec_tail_partial
movups %xmm2,(%edi)
- jmp .L075cbc_ret
+ pxor %xmm0,%xmm0
+ jmp .L079cbc_ret
.align 16
-.L085cbc_dec_tail_partial:
+.L090cbc_dec_tail_partial:
movaps %xmm2,(%esp)
+ pxor %xmm0,%xmm0
movl $16,%ecx
movl %esp,%esi
subl %eax,%ecx
.long 2767451785
-.L075cbc_ret:
+ movdqa %xmm2,(%esp)
+.L079cbc_ret:
movl 16(%esp),%esp
movl 36(%esp),%ebp
+ pxor %xmm2,%xmm2
+ pxor %xmm1,%xmm1
movups %xmm7,(%ebp)
-.L070cbc_abort:
+ pxor %xmm7,%xmm7
+.L074cbc_abort:
popl %edi
popl %esi
popl %ebx
@@ -4060,52 +4507,62 @@
.type _aesni_set_encrypt_key, at function
.align 16
_aesni_set_encrypt_key:
+ pushl %ebp
+ pushl %ebx
testl %eax,%eax
- jz .L086bad_pointer
+ jz .L091bad_pointer
testl %edx,%edx
- jz .L086bad_pointer
+ jz .L091bad_pointer
+ call .L092pic
+.L092pic:
+ popl %ebx
+ leal .Lkey_const-.L092pic(%ebx),%ebx
+ leal OPENSSL_ia32cap_P,%ebp
movups (%eax),%xmm0
xorps %xmm4,%xmm4
+ movl 4(%ebp),%ebp
leal 16(%edx),%edx
+ andl $268437504,%ebp
cmpl $256,%ecx
- je .L08714rounds
+ je .L09314rounds
cmpl $192,%ecx
- je .L08812rounds
+ je .L09412rounds
cmpl $128,%ecx
- jne .L089bad_keybits
+ jne .L095bad_keybits
.align 16
-.L09010rounds:
+.L09610rounds:
+ cmpl $268435456,%ebp
+ je .L09710rounds_alt
movl $9,%ecx
movups %xmm0,-16(%edx)
.byte 102,15,58,223,200,1
- call .L091key_128_cold
+ call .L098key_128_cold
.byte 102,15,58,223,200,2
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,4
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,8
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,16
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,32
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,64
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,128
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,27
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,54
- call .L092key_128
+ call .L099key_128
movups %xmm0,(%edx)
movl %ecx,80(%edx)
- xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L092key_128:
+.L099key_128:
movups %xmm0,(%edx)
leal 16(%edx),%edx
-.L091key_128_cold:
+.L098key_128_cold:
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $140,%xmm0,%xmm4
@@ -4114,38 +4571,91 @@
xorps %xmm1,%xmm0
ret
.align 16
-.L08812rounds:
+.L09710rounds_alt:
+ movdqa (%ebx),%xmm5
+ movl $8,%ecx
+ movdqa 32(%ebx),%xmm4
+ movdqa %xmm0,%xmm2
+ movdqu %xmm0,-16(%edx)
+.L101loop_key128:
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ leal 16(%edx),%edx
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,-16(%edx)
+ movdqa %xmm0,%xmm2
+ decl %ecx
+ jnz .L101loop_key128
+ movdqa 48(%ebx),%xmm4
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ movdqa %xmm0,%xmm2
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,16(%edx)
+ movl $9,%ecx
+ movl %ecx,96(%edx)
+ jmp .L100good_key
+.align 16
+.L09412rounds:
movq 16(%eax),%xmm2
+ cmpl $268435456,%ebp
+ je .L10212rounds_alt
movl $11,%ecx
movups %xmm0,-16(%edx)
.byte 102,15,58,223,202,1
- call .L093key_192a_cold
+ call .L103key_192a_cold
.byte 102,15,58,223,202,2
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,4
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,8
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,16
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,32
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,64
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,128
- call .L094key_192b
+ call .L104key_192b
movups %xmm0,(%edx)
movl %ecx,48(%edx)
- xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L095key_192a:
+.L105key_192a:
movups %xmm0,(%edx)
leal 16(%edx),%edx
.align 16
-.L093key_192a_cold:
+.L103key_192a_cold:
movaps %xmm2,%xmm5
-.L096key_192b_warm:
+.L106key_192b_warm:
shufps $16,%xmm0,%xmm4
movdqa %xmm2,%xmm3
xorps %xmm4,%xmm0
@@ -4159,7 +4669,7 @@
pxor %xmm3,%xmm2
ret
.align 16
-.L094key_192b:
+.L104key_192b:
movaps %xmm0,%xmm3
shufps $68,%xmm0,%xmm5
movups %xmm5,(%edx)
@@ -4166,49 +4676,83 @@
shufps $78,%xmm2,%xmm3
movups %xmm3,16(%edx)
leal 32(%edx),%edx
- jmp .L096key_192b_warm
+ jmp .L106key_192b_warm
.align 16
-.L08714rounds:
+.L10212rounds_alt:
+ movdqa 16(%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $8,%ecx
+ movdqu %xmm0,-16(%edx)
+.L107loop_key192:
+ movq %xmm2,(%edx)
+ movdqa %xmm2,%xmm1
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ pslld $1,%xmm4
+ leal 24(%edx),%edx
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm2
+ movdqu %xmm0,-16(%edx)
+ decl %ecx
+ jnz .L107loop_key192
+ movl $11,%ecx
+ movl %ecx,32(%edx)
+ jmp .L100good_key
+.align 16
+.L09314rounds:
movups 16(%eax),%xmm2
+ leal 16(%edx),%edx
+ cmpl $268435456,%ebp
+ je .L10814rounds_alt
movl $13,%ecx
- leal 16(%edx),%edx
movups %xmm0,-32(%edx)
movups %xmm2,-16(%edx)
.byte 102,15,58,223,202,1
- call .L097key_256a_cold
+ call .L109key_256a_cold
.byte 102,15,58,223,200,1
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,2
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,2
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,4
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,4
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,8
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,8
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,16
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,16
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,32
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,32
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,64
- call .L099key_256a
+ call .L111key_256a
movups %xmm0,(%edx)
movl %ecx,16(%edx)
xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L099key_256a:
+.L111key_256a:
movups %xmm2,(%edx)
leal 16(%edx),%edx
-.L097key_256a_cold:
+.L109key_256a_cold:
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $140,%xmm0,%xmm4
@@ -4217,7 +4761,7 @@
xorps %xmm1,%xmm0
ret
.align 16
-.L098key_256b:
+.L110key_256b:
movups %xmm0,(%edx)
leal 16(%edx),%edx
shufps $16,%xmm2,%xmm4
@@ -4227,13 +4771,70 @@
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
ret
+.align 16
+.L10814rounds_alt:
+ movdqa (%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $7,%ecx
+ movdqu %xmm0,-32(%edx)
+ movdqa %xmm2,%xmm1
+ movdqu %xmm2,-16(%edx)
+.L112loop_key256:
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pslld $1,%xmm4
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ decl %ecx
+ jz .L113done_key256
+ pshufd $255,%xmm0,%xmm2
+ pxor %xmm3,%xmm3
+.byte 102,15,56,221,211
+ movdqa %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm3,%xmm1
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,16(%edx)
+ leal 32(%edx),%edx
+ movdqa %xmm2,%xmm1
+ jmp .L112loop_key256
+.L113done_key256:
+ movl $13,%ecx
+ movl %ecx,16(%edx)
+.L100good_key:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ xorl %eax,%eax
+ popl %ebx
+ popl %ebp
+ ret
.align 4
-.L086bad_pointer:
+.L091bad_pointer:
movl $-1,%eax
+ popl %ebx
+ popl %ebp
ret
.align 4
-.L089bad_keybits:
+.L095bad_keybits:
+ pxor %xmm0,%xmm0
movl $-2,%eax
+ popl %ebx
+ popl %ebp
ret
.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
.globl aesni_set_encrypt_key
@@ -4259,7 +4860,7 @@
movl 12(%esp),%edx
shll $4,%ecx
testl %eax,%eax
- jnz .L100dec_key_ret
+ jnz .L114dec_key_ret
leal 16(%edx,%ecx,1),%eax
movups (%edx),%xmm0
movups (%eax),%xmm1
@@ -4267,7 +4868,7 @@
movups %xmm1,(%edx)
leal 16(%edx),%edx
leal -16(%eax),%eax
-.L101dec_key_inverse:
+.L115dec_key_inverse:
movups (%edx),%xmm0
movups (%eax),%xmm1
.byte 102,15,56,219,192
@@ -4277,16 +4878,25 @@
movups %xmm0,16(%eax)
movups %xmm1,-16(%edx)
cmpl %edx,%eax
- ja .L101dec_key_inverse
+ ja .L115dec_key_inverse
movups (%edx),%xmm0
.byte 102,15,56,219,192
movups %xmm0,(%edx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
xorl %eax,%eax
-.L100dec_key_ret:
+.L114dec_key_ret:
ret
.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.align 64
+.Lkey_const:
+.long 202313229,202313229,202313229,202313229
+.long 67569157,67569157,67569157,67569157
+.long 1,1,1,1
+.long 27,27,27,27
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/bf-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/bf-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/bf-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/bf-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from bf-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/bf-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from bf-586.pl. */
#ifdef PIC
.file "bf-586.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/bf-686.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/bf-686.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/bf-686.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/bf-686.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from bf-686.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/bf-686.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from bf-686.pl. */
#ifdef PIC
.file "bf-686.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/bn-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/bn-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/bn-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/bn-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from bn-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/bn-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from bn-586.pl. */
#ifdef PIC
.file "bn-586.S"
.text
@@ -12,8 +12,7 @@
call .L000PIC_me_up
.L000PIC_me_up:
popl %eax
- leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%eax),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L001maw_non_sse2
movl 4(%esp),%eax
@@ -296,8 +295,7 @@
call .L010PIC_me_up
.L010PIC_me_up:
popl %eax
- leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%eax),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L011mw_non_sse2
movl 4(%esp),%eax
@@ -479,8 +477,7 @@
call .L017PIC_me_up
.L017PIC_me_up:
popl %eax
- leal _GLOBAL_OFFSET_TABLE_+[.-.L017PIC_me_up](%eax),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L018sqr_non_sse2
movl 4(%esp),%eax
@@ -1534,7 +1531,7 @@
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "bn-586.S"
.text
@@ -3056,5 +3053,5 @@
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/cmll-x86.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/cmll-x86.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/cmll-x86.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/cmll-x86.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from cmll-x86.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/cmll-x86.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from cmll-x86.pl. */
#ifdef PIC
.file "cmll-x86.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/co-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/co-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/co-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/co-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from co-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/co-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from co-586.pl. */
#ifdef PIC
.file "co-586.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/crypt586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/crypt586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/crypt586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/crypt586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from crypt586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/crypt586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from crypt586.pl. */
#ifdef PIC
.file "crypt586.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/des-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/des-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/des-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/des-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from des-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/des-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from des-586.pl. */
#ifdef PIC
.file "des-586.S"
.text
@@ -1004,7 +1004,7 @@
call .L000pic_point
.L000pic_point:
popl %ebp
- leal DES_SPtrans-.L000pic_point(%ebp),%ebp
+ leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp
movl 24(%esp),%ecx
cmpl $0,%ebx
je .L001decrypt
@@ -1081,7 +1081,7 @@
call .L003pic_point
.L003pic_point:
popl %ebp
- leal DES_SPtrans-.L003pic_point(%ebp),%ebp
+ leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp
movl 24(%esp),%ecx
cmpl $0,%ebx
je .L004decrypt
@@ -1711,6 +1711,7 @@
.size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin
.align 64
DES_SPtrans:
+.Ldes_sptrans:
.long 34080768,524288,33554434,34080770
.long 33554432,526338,524290,33554434
.long 526338,34080768,34078720,2050
@@ -2842,7 +2843,7 @@
call .L000pic_point
.L000pic_point:
popl %ebp
- leal DES_SPtrans-.L000pic_point(%ebp),%ebp
+ leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp
movl 24(%esp),%ecx
cmpl $0,%ebx
je .L001decrypt
@@ -2919,7 +2920,7 @@
call .L003pic_point
.L003pic_point:
popl %ebp
- leal DES_SPtrans-.L003pic_point(%ebp),%ebp
+ leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp
movl 24(%esp),%ecx
cmpl $0,%ebx
je .L004decrypt
@@ -3549,6 +3550,7 @@
.size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin
.align 64
DES_SPtrans:
+.Ldes_sptrans:
.long 34080768,524288,33554434,34080770
.long 33554432,526338,524290,33554434
.long 526338,34080768,34078720,2050
Modified: trunk/secure/lib/libcrypto/i386/ghash-x86.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/ghash-x86.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/ghash-x86.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/ghash-x86.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from ghash-x86.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/ghash-x86.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from ghash-x86.pl. */
#ifdef PIC
.file "ghash-x86.S"
.text
@@ -949,27 +949,34 @@
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ pshufd $78,%xmm2,%xmm3
+ pshufd $78,%xmm0,%xmm4
+ pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
+ pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
+.byte 102,15,58,15,227,8
+ movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
@@ -987,11 +994,10 @@
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
+ movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
@@ -1002,25 +1008,26 @@
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
@@ -1052,127 +1059,115 @@
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
+ movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
+ pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
- pshufd $78,%xmm6,%xmm3
- pshufd $78,%xmm2,%xmm4
pxor %xmm6,%xmm3
- pxor %xmm2,%xmm4
+ leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
-.byte 102,15,58,68,220,0
- xorps %xmm6,%xmm3
- xorps %xmm7,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm7
- pxor %xmm4,%xmm6
+.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
- leal 32(%esi),%esi
+ nop
subl $32,%ebx
jbe .L014even_tail
+ jmp .L015mod_loop
+.align 32
.L015mod_loop:
+ pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
- pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
+ pxor %xmm0,%xmm4
+ nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
-.byte 102,15,58,68,220,0
- xorps %xmm0,%xmm3
- xorps %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqu (%esi),%xmm3
+.byte 102,15,58,68,229,16
movups (%edx),%xmm2
- pxor %xmm6,%xmm0
- pxor %xmm7,%xmm1
+ xorps %xmm6,%xmm0
+ movdqa (%ecx),%xmm5
+ xorps %xmm7,%xmm1
+ movdqu (%esi),%xmm7
+ pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
-.byte 102,15,56,0,221
+ pxor %xmm1,%xmm3
+.byte 102,15,56,0,253
+ pxor %xmm3,%xmm4
+ movdqa %xmm4,%xmm3
+ psrldq $8,%xmm4
+ pslldq $8,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm3,%xmm0
.byte 102,15,56,0,245
- movdqa %xmm6,%xmm5
+ pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
- pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
+ movups 32(%edx),%xmm5
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pshufd $78,%xmm5,%xmm3
+ psrlq $1,%xmm0
+ pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
- pxor %xmm5,%xmm3
- pshufd $78,%xmm2,%xmm5
- pxor %xmm2,%xmm5
.byte 102,15,58,68,250,17
- movdqa %xmm0,%xmm4
+ movups 16(%edx),%xmm2
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
- movups 16(%edx),%xmm2
- xorps %xmm6,%xmm3
- xorps %xmm7,%xmm3
- movdqa %xmm3,%xmm5
- psrldq $8,%xmm3
- pslldq $8,%xmm5
- pxor %xmm3,%xmm7
- pxor %xmm5,%xmm6
- movdqa (%ecx),%xmm5
leal 32(%esi),%esi
subl $32,%ebx
ja .L015mod_loop
.L014even_tail:
+ pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
- pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
+ pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
-.byte 102,15,58,68,220,0
- xorps %xmm0,%xmm3
- xorps %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- pxor %xmm6,%xmm0
- pxor %xmm7,%xmm1
+.byte 102,15,58,68,229,16
+ movdqa (%ecx),%xmm5
+ xorps %xmm6,%xmm0
+ xorps %xmm7,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pxor %xmm3,%xmm4
+ movdqa %xmm4,%xmm3
+ psrldq $8,%xmm4
+ pslldq $8,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L016done
movups (%edx),%xmm2
@@ -1195,25 +1190,26 @@
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.L016done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
@@ -1228,12 +1224,6 @@
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
-.Lrem_4bit:
-.long 0,0,0,471859200,0,943718400,0,610271232
-.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
-.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
-.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
-.align 64
.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
@@ -1267,6 +1257,12 @@
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
+.align 64
+.Lrem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
@@ -2219,27 +2215,34 @@
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ pshufd $78,%xmm2,%xmm3
+ pshufd $78,%xmm0,%xmm4
+ pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
+ pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
+.byte 102,15,58,15,227,8
+ movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
@@ -2257,11 +2260,10 @@
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
+ movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
@@ -2272,25 +2274,26 @@
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
@@ -2322,127 +2325,115 @@
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
+ movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
+ pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
- pshufd $78,%xmm6,%xmm3
- pshufd $78,%xmm2,%xmm4
pxor %xmm6,%xmm3
- pxor %xmm2,%xmm4
+ leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
-.byte 102,15,58,68,220,0
- xorps %xmm6,%xmm3
- xorps %xmm7,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm7
- pxor %xmm4,%xmm6
+.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
- leal 32(%esi),%esi
+ nop
subl $32,%ebx
jbe .L014even_tail
+ jmp .L015mod_loop
+.align 32
.L015mod_loop:
+ pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
- pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
+ pxor %xmm0,%xmm4
+ nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
-.byte 102,15,58,68,220,0
- xorps %xmm0,%xmm3
- xorps %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqu (%esi),%xmm3
+.byte 102,15,58,68,229,16
movups (%edx),%xmm2
- pxor %xmm6,%xmm0
- pxor %xmm7,%xmm1
+ xorps %xmm6,%xmm0
+ movdqa (%ecx),%xmm5
+ xorps %xmm7,%xmm1
+ movdqu (%esi),%xmm7
+ pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
-.byte 102,15,56,0,221
+ pxor %xmm1,%xmm3
+.byte 102,15,56,0,253
+ pxor %xmm3,%xmm4
+ movdqa %xmm4,%xmm3
+ psrldq $8,%xmm4
+ pslldq $8,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm3,%xmm0
.byte 102,15,56,0,245
- movdqa %xmm6,%xmm5
+ pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
- pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
+ movups 32(%edx),%xmm5
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pshufd $78,%xmm5,%xmm3
+ psrlq $1,%xmm0
+ pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
- pxor %xmm5,%xmm3
- pshufd $78,%xmm2,%xmm5
- pxor %xmm2,%xmm5
.byte 102,15,58,68,250,17
- movdqa %xmm0,%xmm4
+ movups 16(%edx),%xmm2
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
- movups 16(%edx),%xmm2
- xorps %xmm6,%xmm3
- xorps %xmm7,%xmm3
- movdqa %xmm3,%xmm5
- psrldq $8,%xmm3
- pslldq $8,%xmm5
- pxor %xmm3,%xmm7
- pxor %xmm5,%xmm6
- movdqa (%ecx),%xmm5
leal 32(%esi),%esi
subl $32,%ebx
ja .L015mod_loop
.L014even_tail:
+ pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
- pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
+ pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
-.byte 102,15,58,68,220,0
- xorps %xmm0,%xmm3
- xorps %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- pxor %xmm6,%xmm0
- pxor %xmm7,%xmm1
+.byte 102,15,58,68,229,16
+ movdqa (%ecx),%xmm5
+ xorps %xmm6,%xmm0
+ xorps %xmm7,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pxor %xmm3,%xmm4
+ movdqa %xmm4,%xmm3
+ psrldq $8,%xmm4
+ pslldq $8,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L016done
movups (%edx),%xmm2
@@ -2465,25 +2456,26 @@
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
+ psllq $5,%xmm0
+ pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
- psllq $5,%xmm0
- pxor %xmm3,%xmm0
psllq $57,%xmm0
+ movdqa %xmm0,%xmm3
+ pslldq $8,%xmm0
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
- pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
+ psrlq $1,%xmm0
pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.L016done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
@@ -2498,12 +2490,6 @@
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
-.Lrem_4bit:
-.long 0,0,0,471859200,0,943718400,0,610271232
-.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
-.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
-.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
-.align 64
.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
@@ -2537,6 +2523,12 @@
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
+.align 64
+.Lrem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
Modified: trunk/secure/lib/libcrypto/i386/md5-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/md5-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/md5-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/md5-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from md5-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/md5-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from md5-586.pl. */
#ifdef PIC
.file "md5-586.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/rc4-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/rc4-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/rc4-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/rc4-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from rc4-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/rc4-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from rc4-586.pl. */
#ifdef PIC
.file "rc4-586.S"
.text
@@ -33,14 +33,13 @@
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
+ movl %ebp,32(%esp)
testl $-8,%edx
- movl %ebp,32(%esp)
jz .L003go4loop4
call .L004PIC_me_up
.L004PIC_me_up:
popl %ebp
- leal _GLOBAL_OFFSET_TABLE_+[.-.L004PIC_me_up](%ebp),%ebp
- movl OPENSSL_ia32cap_P at GOT(%ebp),%ebp
+ leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
btl $26,(%ebp)
jnc .L003go4loop4
movl 32(%esp),%ebp
@@ -286,8 +285,7 @@
call .L010PIC_me_up
.L010PIC_me_up:
popl %edx
- leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%edx),%edx
- movl OPENSSL_ia32cap_P at GOT(%edx),%edx
+ leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
@@ -362,8 +360,7 @@
call .L020PIC_me_up
.L020PIC_me_up:
popl %edx
- leal _GLOBAL_OFFSET_TABLE_+[.-.L020PIC_me_up](%edx),%edx
- movl OPENSSL_ia32cap_P at GOT(%edx),%edx
+ leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx
movl (%edx),%edx
btl $20,%edx
jc .L0211xchar
@@ -385,7 +382,7 @@
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-.L_RC4_options_begin
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "rc4-586.S"
.text
@@ -418,8 +415,8 @@
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
+ movl %ebp,32(%esp)
testl $-8,%edx
- movl %ebp,32(%esp)
jz .L003go4loop4
leal OPENSSL_ia32cap_P,%ebp
btl $26,(%ebp)
@@ -758,5 +755,5 @@
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-.L_RC4_options_begin
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/rc5-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/rc5-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/rc5-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/rc5-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from rc5-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/rc5-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from rc5-586.pl. */
#ifdef PIC
.file "rc5-586.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/rmd-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/rmd-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/rmd-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/rmd-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from rmd-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/rmd-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from rmd-586.pl. */
#ifdef PIC
.file "rmd-586.S"
.text
Modified: trunk/secure/lib/libcrypto/i386/sha1-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/sha1-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/sha1-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/sha1-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from sha1-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/sha1-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from sha1-586.pl. */
#ifdef PIC
.file "sha1-586.S"
.text
@@ -16,15 +16,17 @@
call .L000pic_point
.L000pic_point:
popl %ebp
- leal _GLOBAL_OFFSET_TABLE_+[.-.L000pic_point](%ebp),%esi
- movl OPENSSL_ia32cap_P at GOT(%esi),%esi
+ leal OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi
leal .LK_XX_XX-.L000pic_point(%ebp),%ebp
movl (%esi),%eax
movl 4(%esi),%edx
testl $512,%edx
jz .L001x86
+ movl 8(%esi),%ecx
testl $16777216,%eax
jz .L001x86
+ testl $536870912,%ecx
+ jnz .Lshaext_shortcut
andl $268435456,%edx
andl $1073741824,%eax
orl %edx,%eax
@@ -1398,9 +1400,9 @@
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
-.type _sha1_block_data_order_ssse3, at function
+.type _sha1_block_data_order_shaext, at function
.align 16
-_sha1_block_data_order_ssse3:
+_sha1_block_data_order_shaext:
pushl %ebp
pushl %ebx
pushl %esi
@@ -1409,6 +1411,176 @@
.L003pic_point:
popl %ebp
leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lshaext_shortcut:
+ movl 20(%esp),%edi
+ movl %esp,%ebx
+ movl 24(%esp),%esi
+ movl 28(%esp),%ecx
+ subl $32,%esp
+ movdqu (%edi),%xmm0
+ movd 16(%edi),%xmm1
+ andl $-32,%esp
+ movdqa 80(%ebp),%xmm3
+ movdqu (%esi),%xmm4
+ pshufd $27,%xmm0,%xmm0
+ movdqu 16(%esi),%xmm5
+ pshufd $27,%xmm1,%xmm1
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,227
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,235
+.byte 102,15,56,0,243
+.byte 102,15,56,0,251
+ jmp .L004loop_shaext
+.align 16
+.L004loop_shaext:
+ decl %ecx
+ leal 64(%esi),%eax
+ movdqa %xmm1,(%esp)
+ paddd %xmm4,%xmm1
+ cmovnel %eax,%esi
+ movdqa %xmm0,16(%esp)
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+ movdqu (%esi),%xmm4
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,213
+ movdqu 16(%esi),%xmm5
+.byte 102,15,56,0,227
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,206
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,235
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,215
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,243
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+ movdqa (%esp),%xmm2
+.byte 102,15,56,0,251
+.byte 15,56,200,202
+ paddd 16(%esp),%xmm0
+ jnz .L004loop_shaext
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm1,%xmm1
+ movdqu %xmm0,(%edi)
+ movd %xmm1,16(%edi)
+ movl %ebx,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext
+.type _sha1_block_data_order_ssse3, at function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L005pic_point
+.L005pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L005pic_point(%ebp),%ebp
.Lssse3_shortcut:
movdqa (%ebp),%xmm7
movdqa 16(%ebp),%xmm0
@@ -1456,936 +1628,917 @@
movdqa %xmm1,16(%esp)
psubd %xmm7,%xmm1
movdqa %xmm2,32(%esp)
+ movl %ecx,%ebp
psubd %xmm7,%xmm2
- movdqa %xmm1,%xmm4
- jmp .L004loop
+ xorl %edx,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebp,%esi
+ jmp .L006loop
.align 16
-.L004loop:
+.L006loop:
+ rorl $2,%ebx
+ xorl %edx,%esi
+ movl %eax,%ebp
+ punpcklqdq %xmm1,%xmm4
+ movdqa %xmm3,%xmm6
addl (%esp),%edi
- xorl %edx,%ecx
-.byte 102,15,58,15,224,8
- movdqa %xmm3,%xmm6
- movl %eax,%ebp
- roll $5,%eax
+ xorl %ecx,%ebx
paddd %xmm3,%xmm7
movdqa %xmm0,64(%esp)
- andl %ecx,%esi
- xorl %edx,%ecx
+ roll $5,%eax
+ addl %esi,%edi
psrldq $4,%xmm6
- xorl %edx,%esi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ pxor %xmm0,%xmm4
addl %eax,%edi
- pxor %xmm0,%xmm4
- rorl $2,%ebx
- addl %esi,%edi
+ rorl $7,%eax
pxor %xmm2,%xmm6
+ xorl %ecx,%ebp
+ movl %edi,%esi
addl 4(%esp),%edx
- xorl %ecx,%ebx
- movl %edi,%esi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%eax
roll $5,%edi
- pxor %xmm6,%xmm4
- andl %ebx,%ebp
- xorl %ecx,%ebx
movdqa %xmm7,48(%esp)
- xorl %ecx,%ebp
+ addl %ebp,%edx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm0
+ xorl %ebx,%eax
addl %edi,%edx
- movdqa %xmm4,%xmm0
+ rorl $7,%edi
movdqa %xmm4,%xmm6
- rorl $7,%eax
- addl %ebp,%edx
- addl 8(%esp),%ecx
- xorl %ebx,%eax
+ xorl %ebx,%esi
pslldq $12,%xmm0
paddd %xmm4,%xmm4
movl %edx,%ebp
+ addl 8(%esp),%ecx
+ psrld $31,%xmm6
+ xorl %eax,%edi
roll $5,%edx
- andl %eax,%esi
- xorl %ebx,%eax
- psrld $31,%xmm6
- xorl %ebx,%esi
- addl %edx,%ecx
movdqa %xmm0,%xmm7
- rorl $7,%edi
addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
psrld $30,%xmm0
+ addl %edx,%ecx
+ rorl $7,%edx
por %xmm6,%xmm4
+ xorl %eax,%ebp
+ movl %ecx,%esi
addl 12(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
+ pslld $2,%xmm7
+ xorl %edi,%edx
roll $5,%ecx
- pslld $2,%xmm7
pxor %xmm0,%xmm4
- andl %edi,%ebp
- xorl %eax,%edi
movdqa 96(%esp),%xmm0
- xorl %eax,%ebp
- addl %ecx,%ebx
+ addl %ebp,%ebx
+ andl %edx,%esi
pxor %xmm7,%xmm4
- movdqa %xmm2,%xmm5
- rorl $7,%edx
- addl %ebp,%ebx
- addl 16(%esp),%eax
+ pshufd $238,%xmm1,%xmm5
xorl %edi,%edx
-.byte 102,15,58,15,233,8
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ punpcklqdq %xmm2,%xmm5
movdqa %xmm4,%xmm7
- movl %ebx,%ebp
- roll $5,%ebx
+ addl 16(%esp),%eax
+ xorl %edx,%ecx
paddd %xmm4,%xmm0
movdqa %xmm1,80(%esp)
- andl %edx,%esi
- xorl %edi,%edx
+ roll $5,%ebx
+ addl %esi,%eax
psrldq $4,%xmm7
- xorl %edi,%esi
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ pxor %xmm1,%xmm5
addl %ebx,%eax
- pxor %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
+ rorl $7,%ebx
pxor %xmm3,%xmm7
+ xorl %edx,%ebp
+ movl %eax,%esi
addl 20(%esp),%edi
- xorl %edx,%ecx
- movl %eax,%esi
+ pxor %xmm7,%xmm5
+ xorl %ecx,%ebx
roll $5,%eax
- pxor %xmm7,%xmm5
- andl %ecx,%ebp
- xorl %edx,%ecx
movdqa %xmm0,(%esp)
- xorl %edx,%ebp
+ addl %ebp,%edi
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm1
+ xorl %ecx,%ebx
addl %eax,%edi
- movdqa %xmm5,%xmm1
+ rorl $7,%eax
movdqa %xmm5,%xmm7
- rorl $7,%ebx
- addl %ebp,%edi
- addl 24(%esp),%edx
- xorl %ecx,%ebx
+ xorl %ecx,%esi
pslldq $12,%xmm1
paddd %xmm5,%xmm5
movl %edi,%ebp
+ addl 24(%esp),%edx
+ psrld $31,%xmm7
+ xorl %ebx,%eax
roll $5,%edi
- andl %ebx,%esi
- xorl %ecx,%ebx
- psrld $31,%xmm7
- xorl %ecx,%esi
- addl %edi,%edx
movdqa %xmm1,%xmm0
- rorl $7,%eax
addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
psrld $30,%xmm1
+ addl %edi,%edx
+ rorl $7,%edi
por %xmm7,%xmm5
+ xorl %ebx,%ebp
+ movl %edx,%esi
addl 28(%esp),%ecx
- xorl %ebx,%eax
- movl %edx,%esi
+ pslld $2,%xmm0
+ xorl %eax,%edi
roll $5,%edx
- pslld $2,%xmm0
pxor %xmm1,%xmm5
- andl %eax,%ebp
- xorl %ebx,%eax
movdqa 112(%esp),%xmm1
- xorl %ebx,%ebp
- addl %edx,%ecx
+ addl %ebp,%ecx
+ andl %edi,%esi
pxor %xmm0,%xmm5
- movdqa %xmm3,%xmm6
- rorl $7,%edi
- addl %ebp,%ecx
- addl 32(%esp),%ebx
+ pshufd $238,%xmm2,%xmm6
xorl %eax,%edi
-.byte 102,15,58,15,242,8
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ punpcklqdq %xmm3,%xmm6
movdqa %xmm5,%xmm0
- movl %ecx,%ebp
- roll $5,%ecx
+ addl 32(%esp),%ebx
+ xorl %edi,%edx
paddd %xmm5,%xmm1
movdqa %xmm2,96(%esp)
- andl %edi,%esi
- xorl %eax,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
psrldq $4,%xmm0
- xorl %eax,%esi
+ andl %edx,%ebp
+ xorl %edi,%edx
+ pxor %xmm2,%xmm6
addl %ecx,%ebx
- pxor %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
+ rorl $7,%ecx
pxor %xmm4,%xmm0
+ xorl %edi,%ebp
+ movl %ebx,%esi
addl 36(%esp),%eax
- xorl %edi,%edx
- movl %ebx,%esi
+ pxor %xmm0,%xmm6
+ xorl %edx,%ecx
roll $5,%ebx
- pxor %xmm0,%xmm6
- andl %edx,%ebp
- xorl %edi,%edx
movdqa %xmm1,16(%esp)
- xorl %edi,%ebp
+ addl %ebp,%eax
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm2
+ xorl %edx,%ecx
addl %ebx,%eax
- movdqa %xmm6,%xmm2
+ rorl $7,%ebx
movdqa %xmm6,%xmm0
- rorl $7,%ecx
- addl %ebp,%eax
- addl 40(%esp),%edi
- xorl %edx,%ecx
+ xorl %edx,%esi
pslldq $12,%xmm2
paddd %xmm6,%xmm6
movl %eax,%ebp
+ addl 40(%esp),%edi
+ psrld $31,%xmm0
+ xorl %ecx,%ebx
roll $5,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
- psrld $31,%xmm0
- xorl %edx,%esi
- addl %eax,%edi
movdqa %xmm2,%xmm1
- rorl $7,%ebx
addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
psrld $30,%xmm2
+ addl %eax,%edi
+ rorl $7,%eax
por %xmm0,%xmm6
- addl 44(%esp),%edx
- xorl %ecx,%ebx
+ xorl %ecx,%ebp
movdqa 64(%esp),%xmm0
movl %edi,%esi
+ addl 44(%esp),%edx
+ pslld $2,%xmm1
+ xorl %ebx,%eax
roll $5,%edi
- pslld $2,%xmm1
pxor %xmm2,%xmm6
- andl %ebx,%ebp
- xorl %ecx,%ebx
movdqa 112(%esp),%xmm2
- xorl %ecx,%ebp
- addl %edi,%edx
+ addl %ebp,%edx
+ andl %eax,%esi
pxor %xmm1,%xmm6
- movdqa %xmm4,%xmm7
- rorl $7,%eax
- addl %ebp,%edx
- addl 48(%esp),%ecx
+ pshufd $238,%xmm3,%xmm7
xorl %ebx,%eax
-.byte 102,15,58,15,251,8
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ punpcklqdq %xmm4,%xmm7
movdqa %xmm6,%xmm1
- movl %edx,%ebp
- roll $5,%edx
+ addl 48(%esp),%ecx
+ xorl %eax,%edi
paddd %xmm6,%xmm2
movdqa %xmm3,64(%esp)
- andl %eax,%esi
- xorl %ebx,%eax
+ roll $5,%edx
+ addl %esi,%ecx
psrldq $4,%xmm1
- xorl %ebx,%esi
+ andl %edi,%ebp
+ xorl %eax,%edi
+ pxor %xmm3,%xmm7
addl %edx,%ecx
- pxor %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
+ rorl $7,%edx
pxor %xmm5,%xmm1
+ xorl %eax,%ebp
+ movl %ecx,%esi
addl 52(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
+ pxor %xmm1,%xmm7
+ xorl %edi,%edx
roll $5,%ecx
- pxor %xmm1,%xmm7
- andl %edi,%ebp
- xorl %eax,%edi
movdqa %xmm2,32(%esp)
- xorl %eax,%ebp
+ addl %ebp,%ebx
+ andl %edx,%esi
+ movdqa %xmm7,%xmm3
+ xorl %edi,%edx
addl %ecx,%ebx
- movdqa %xmm7,%xmm3
+ rorl $7,%ecx
movdqa %xmm7,%xmm1
- rorl $7,%edx
- addl %ebp,%ebx
- addl 56(%esp),%eax
- xorl %edi,%edx
+ xorl %edi,%esi
pslldq $12,%xmm3
paddd %xmm7,%xmm7
movl %ebx,%ebp
+ addl 56(%esp),%eax
+ psrld $31,%xmm1
+ xorl %edx,%ecx
roll $5,%ebx
- andl %edx,%esi
- xorl %edi,%edx
- psrld $31,%xmm1
- xorl %edi,%esi
- addl %ebx,%eax
movdqa %xmm3,%xmm2
- rorl $7,%ecx
addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
psrld $30,%xmm3
+ addl %ebx,%eax
+ rorl $7,%ebx
por %xmm1,%xmm7
- addl 60(%esp),%edi
- xorl %edx,%ecx
+ xorl %edx,%ebp
movdqa 80(%esp),%xmm1
movl %eax,%esi
+ addl 60(%esp),%edi
+ pslld $2,%xmm2
+ xorl %ecx,%ebx
roll $5,%eax
- pslld $2,%xmm2
pxor %xmm3,%xmm7
- andl %ecx,%ebp
- xorl %edx,%ecx
movdqa 112(%esp),%xmm3
- xorl %edx,%ebp
+ addl %ebp,%edi
+ andl %ebx,%esi
+ pxor %xmm2,%xmm7
+ pshufd $238,%xmm6,%xmm2
+ xorl %ecx,%ebx
addl %eax,%edi
- pxor %xmm2,%xmm7
- rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm7,%xmm2
- addl (%esp),%edx
+ rorl $7,%eax
pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
- xorl %ecx,%ebx
+ punpcklqdq %xmm7,%xmm2
+ xorl %ecx,%esi
movl %edi,%ebp
- roll $5,%edi
+ addl (%esp),%edx
pxor %xmm1,%xmm0
movdqa %xmm4,80(%esp)
- andl %ebx,%esi
- xorl %ecx,%ebx
+ xorl %ebx,%eax
+ roll $5,%edi
movdqa %xmm3,%xmm4
+ addl %esi,%edx
paddd %xmm7,%xmm3
- xorl %ecx,%esi
- addl %edi,%edx
+ andl %eax,%ebp
pxor %xmm2,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- addl 4(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%ebp
movdqa %xmm0,%xmm2
movdqa %xmm3,48(%esp)
movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
roll $5,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
pslld $2,%xmm0
- xorl %ebx,%ebp
- addl %edx,%ecx
+ addl %ebp,%ecx
+ andl %edi,%esi
psrld $30,%xmm2
- rorl $7,%edi
- addl %ebp,%ecx
- addl 8(%esp),%ebx
xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ xorl %edi,%edx
roll $5,%ecx
por %xmm2,%xmm0
- andl %edi,%esi
- xorl %eax,%edi
+ addl %esi,%ebx
+ andl %edx,%ebp
movdqa 96(%esp),%xmm2
- xorl %eax,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 12(%esp),%eax
- movdqa %xmm0,%xmm3
- xorl %edi,%edx
+ xorl %edi,%ebp
movl %ebx,%esi
+ pshufd $238,%xmm7,%xmm3
roll $5,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
addl 16(%esp),%edi
pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
- xorl %edx,%esi
+ punpcklqdq %xmm0,%xmm3
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
pxor %xmm2,%xmm1
movdqa %xmm5,96(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
movdqa %xmm4,%xmm5
+ rorl $7,%ebx
paddd %xmm0,%xmm4
- rorl $7,%ebx
- addl %esi,%edi
+ addl %eax,%edi
pxor %xmm3,%xmm1
addl 20(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
movdqa %xmm1,%xmm3
movdqa %xmm4,(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
pslld $2,%xmm1
addl 24(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
psrld $30,%xmm3
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
por %xmm3,%xmm1
addl 28(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movdqa 64(%esp),%xmm3
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ pshufd $238,%xmm0,%xmm4
addl %ecx,%ebx
- rorl $7,%edx
- movdqa %xmm1,%xmm4
- addl %ebp,%ebx
addl 32(%esp),%eax
pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
- xorl %edi,%esi
+ punpcklqdq %xmm1,%xmm4
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
pxor %xmm3,%xmm2
movdqa %xmm6,64(%esp)
- xorl %edx,%esi
- addl %ebx,%eax
+ addl %esi,%eax
+ xorl %edx,%ebp
movdqa 128(%esp),%xmm6
+ rorl $7,%ecx
paddd %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
+ addl %ebx,%eax
pxor %xmm4,%xmm2
addl 36(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
movdqa %xmm2,%xmm4
movdqa %xmm5,16(%esp)
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
pslld $2,%xmm2
addl 40(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
psrld $30,%xmm4
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
por %xmm4,%xmm2
addl 44(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movdqa 80(%esp),%xmm4
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ pshufd $238,%xmm1,%xmm5
addl %edx,%ecx
- rorl $7,%edi
- movdqa %xmm2,%xmm5
- addl %ebp,%ecx
addl 48(%esp),%ebx
pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %eax,%esi
+ punpcklqdq %xmm2,%xmm5
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
pxor %xmm4,%xmm3
movdqa %xmm7,80(%esp)
- xorl %edi,%esi
- addl %ecx,%ebx
+ addl %esi,%ebx
+ xorl %edi,%ebp
movdqa %xmm6,%xmm7
+ rorl $7,%edx
paddd %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
+ addl %ecx,%ebx
pxor %xmm5,%xmm3
addl 52(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
movdqa %xmm3,%xmm5
movdqa %xmm6,32(%esp)
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
pslld $2,%xmm3
addl 56(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
psrld $30,%xmm5
movl %eax,%ebp
roll $5,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
por %xmm5,%xmm3
addl 60(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movdqa 96(%esp),%xmm5
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ pshufd $238,%xmm2,%xmm6
addl %edi,%edx
- rorl $7,%eax
- movdqa %xmm3,%xmm6
- addl %ebp,%edx
addl (%esp),%ecx
pxor %xmm0,%xmm4
-.byte 102,15,58,15,242,8
- xorl %ebx,%esi
+ punpcklqdq %xmm3,%xmm6
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
pxor %xmm5,%xmm4
movdqa %xmm0,96(%esp)
- xorl %eax,%esi
- addl %edx,%ecx
+ addl %esi,%ecx
+ xorl %eax,%ebp
movdqa %xmm7,%xmm0
+ rorl $7,%edi
paddd %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
+ addl %edx,%ecx
pxor %xmm6,%xmm4
addl 4(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
movdqa %xmm4,%xmm6
movdqa %xmm7,48(%esp)
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
pslld $2,%xmm4
addl 8(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
psrld $30,%xmm6
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
por %xmm6,%xmm4
addl 12(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movdqa 64(%esp),%xmm6
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ pshufd $238,%xmm3,%xmm7
addl %eax,%edi
- rorl $7,%ebx
- movdqa %xmm4,%xmm7
- addl %ebp,%edi
addl 16(%esp),%edx
pxor %xmm1,%xmm5
-.byte 102,15,58,15,251,8
- xorl %ecx,%esi
+ punpcklqdq %xmm4,%xmm7
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
pxor %xmm6,%xmm5
movdqa %xmm1,64(%esp)
- xorl %ebx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ xorl %ebx,%ebp
movdqa %xmm0,%xmm1
+ rorl $7,%eax
paddd %xmm4,%xmm0
- rorl $7,%eax
- addl %esi,%edx
+ addl %edi,%edx
pxor %xmm7,%xmm5
addl 20(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
movdqa %xmm5,%xmm7
movdqa %xmm0,(%esp)
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
pslld $2,%xmm5
addl 24(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
psrld $30,%xmm7
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
por %xmm7,%xmm5
addl 28(%esp),%eax
- xorl %edi,%ebp
movdqa 80(%esp),%xmm7
+ rorl $7,%ecx
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
- xorl %edx,%ebp
+ pshufd $238,%xmm4,%xmm0
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- movdqa %xmm5,%xmm0
- addl %ebp,%eax
- movl %ecx,%ebp
+ addl 32(%esp),%edi
pxor %xmm2,%xmm6
-.byte 102,15,58,15,196,8
+ punpcklqdq %xmm5,%xmm0
+ andl %ecx,%esi
xorl %edx,%ecx
- addl 32(%esp),%edi
- andl %edx,%ebp
+ rorl $7,%ebx
pxor %xmm7,%xmm6
movdqa %xmm2,80(%esp)
- andl %ecx,%esi
- rorl $7,%ebx
+ movl %eax,%ebp
+ xorl %ecx,%esi
+ roll $5,%eax
movdqa %xmm1,%xmm2
+ addl %esi,%edi
paddd %xmm5,%xmm1
- addl %ebp,%edi
- movl %eax,%ebp
+ xorl %ebx,%ebp
pxor %xmm0,%xmm6
- roll $5,%eax
- addl %esi,%edi
- xorl %edx,%ecx
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 36(%esp),%edx
+ andl %ebx,%ebp
movdqa %xmm6,%xmm0
movdqa %xmm1,16(%esp)
- movl %ebx,%esi
xorl %ecx,%ebx
- addl 36(%esp),%edx
- andl %ecx,%esi
- pslld $2,%xmm6
- andl %ebx,%ebp
rorl $7,%eax
- psrld $30,%xmm0
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
roll $5,%edi
+ pslld $2,%xmm6
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm0
+ xorl %ebx,%eax
addl %edi,%edx
- por %xmm0,%xmm6
- movl %eax,%ebp
- xorl %ebx,%eax
- movdqa 96(%esp),%xmm0
addl 40(%esp),%ecx
- andl %ebx,%ebp
andl %eax,%esi
+ xorl %ebx,%eax
rorl $7,%edi
- addl %ebp,%ecx
- movdqa %xmm6,%xmm1
+ por %xmm0,%xmm6
movl %edx,%ebp
+ xorl %eax,%esi
+ movdqa 96(%esp),%xmm0
roll $5,%edx
addl %esi,%ecx
- xorl %ebx,%eax
+ xorl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%esi
- xorl %eax,%edi
+ pshufd $238,%xmm5,%xmm1
addl 44(%esp),%ebx
- andl %eax,%esi
andl %edi,%ebp
+ xorl %eax,%edi
rorl $7,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
roll $5,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%ebp
+ addl 48(%esp),%eax
pxor %xmm3,%xmm7
-.byte 102,15,58,15,205,8
+ punpcklqdq %xmm6,%xmm1
+ andl %edx,%esi
xorl %edi,%edx
- addl 48(%esp),%eax
- andl %edi,%ebp
+ rorl $7,%ecx
pxor %xmm0,%xmm7
movdqa %xmm3,96(%esp)
- andl %edx,%esi
- rorl $7,%ecx
+ movl %ebx,%ebp
+ xorl %edx,%esi
+ roll $5,%ebx
movdqa 144(%esp),%xmm3
+ addl %esi,%eax
paddd %xmm6,%xmm2
- addl %ebp,%eax
- movl %ebx,%ebp
+ xorl %ecx,%ebp
pxor %xmm1,%xmm7
- roll $5,%ebx
- addl %esi,%eax
- xorl %edi,%edx
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 52(%esp),%edi
+ andl %ecx,%ebp
movdqa %xmm7,%xmm1
movdqa %xmm2,32(%esp)
- movl %ecx,%esi
xorl %edx,%ecx
- addl 52(%esp),%edi
- andl %edx,%esi
- pslld $2,%xmm7
- andl %ecx,%ebp
rorl $7,%ebx
- psrld $30,%xmm1
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
roll $5,%eax
+ pslld $2,%xmm7
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm1
+ xorl %ecx,%ebx
addl %eax,%edi
- por %xmm1,%xmm7
- movl %ebx,%ebp
- xorl %ecx,%ebx
- movdqa 64(%esp),%xmm1
addl 56(%esp),%edx
- andl %ecx,%ebp
andl %ebx,%esi
+ xorl %ecx,%ebx
rorl $7,%eax
- addl %ebp,%edx
- movdqa %xmm7,%xmm2
+ por %xmm1,%xmm7
movl %edi,%ebp
+ xorl %ebx,%esi
+ movdqa 64(%esp),%xmm1
roll $5,%edi
addl %esi,%edx
- xorl %ecx,%ebx
+ xorl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%esi
- xorl %ebx,%eax
+ pshufd $238,%xmm6,%xmm2
addl 60(%esp),%ecx
- andl %ebx,%esi
andl %eax,%ebp
+ xorl %ebx,%eax
rorl $7,%edi
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
roll $5,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%ebp
+ addl (%esp),%ebx
pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
+ punpcklqdq %xmm7,%xmm2
+ andl %edi,%esi
xorl %eax,%edi
- addl (%esp),%ebx
- andl %eax,%ebp
+ rorl $7,%edx
pxor %xmm1,%xmm0
movdqa %xmm4,64(%esp)
- andl %edi,%esi
- rorl $7,%edx
+ movl %ecx,%ebp
+ xorl %edi,%esi
+ roll $5,%ecx
movdqa %xmm3,%xmm4
+ addl %esi,%ebx
paddd %xmm7,%xmm3
- addl %ebp,%ebx
- movl %ecx,%ebp
+ xorl %edx,%ebp
pxor %xmm2,%xmm0
- roll $5,%ecx
- addl %esi,%ebx
- xorl %eax,%edi
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 4(%esp),%eax
+ andl %edx,%ebp
movdqa %xmm0,%xmm2
movdqa %xmm3,48(%esp)
- movl %edx,%esi
xorl %edi,%edx
- addl 4(%esp),%eax
- andl %edi,%esi
- pslld $2,%xmm0
- andl %edx,%ebp
rorl $7,%ecx
- psrld $30,%xmm2
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
+ pslld $2,%xmm0
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm2
+ xorl %edx,%ecx
addl %ebx,%eax
- por %xmm2,%xmm0
- movl %ecx,%ebp
- xorl %edx,%ecx
- movdqa 80(%esp),%xmm2
addl 8(%esp),%edi
- andl %edx,%ebp
andl %ecx,%esi
+ xorl %edx,%ecx
rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm0,%xmm3
+ por %xmm2,%xmm0
movl %eax,%ebp
+ xorl %ecx,%esi
+ movdqa 80(%esp),%xmm2
roll $5,%eax
addl %esi,%edi
- xorl %edx,%ecx
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%esi
- xorl %ecx,%ebx
+ pshufd $238,%xmm7,%xmm3
addl 12(%esp),%edx
- andl %ecx,%esi
andl %ebx,%ebp
+ xorl %ecx,%ebx
rorl $7,%eax
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
roll $5,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%ebp
+ addl 16(%esp),%ecx
pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
+ punpcklqdq %xmm0,%xmm3
+ andl %eax,%esi
xorl %ebx,%eax
- addl 16(%esp),%ecx
- andl %ebx,%ebp
+ rorl $7,%edi
pxor %xmm2,%xmm1
movdqa %xmm5,80(%esp)
- andl %eax,%esi
- rorl $7,%edi
+ movl %edx,%ebp
+ xorl %eax,%esi
+ roll $5,%edx
movdqa %xmm4,%xmm5
+ addl %esi,%ecx
paddd %xmm0,%xmm4
- addl %ebp,%ecx
- movl %edx,%ebp
+ xorl %edi,%ebp
pxor %xmm3,%xmm1
- roll $5,%edx
- addl %esi,%ecx
- xorl %ebx,%eax
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 20(%esp),%ebx
+ andl %edi,%ebp
movdqa %xmm1,%xmm3
movdqa %xmm4,(%esp)
- movl %edi,%esi
xorl %eax,%edi
- addl 20(%esp),%ebx
- andl %eax,%esi
- pslld $2,%xmm1
- andl %edi,%ebp
rorl $7,%edx
- psrld $30,%xmm3
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
roll $5,%ecx
+ pslld $2,%xmm1
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ psrld $30,%xmm3
+ xorl %edi,%edx
addl %ecx,%ebx
- por %xmm3,%xmm1
- movl %edx,%ebp
- xorl %edi,%edx
- movdqa 96(%esp),%xmm3
addl 24(%esp),%eax
- andl %edi,%ebp
andl %edx,%esi
+ xorl %edi,%edx
rorl $7,%ecx
- addl %ebp,%eax
- movdqa %xmm1,%xmm4
+ por %xmm3,%xmm1
movl %ebx,%ebp
+ xorl %edx,%esi
+ movdqa 96(%esp),%xmm3
roll $5,%ebx
addl %esi,%eax
- xorl %edi,%edx
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
- movl %ecx,%esi
- xorl %edx,%ecx
+ pshufd $238,%xmm0,%xmm4
addl 28(%esp),%edi
- andl %edx,%esi
andl %ecx,%ebp
+ xorl %edx,%ecx
rorl $7,%ebx
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
roll $5,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%ebp
+ addl 32(%esp),%edx
pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
+ punpcklqdq %xmm1,%xmm4
+ andl %ebx,%esi
xorl %ecx,%ebx
- addl 32(%esp),%edx
- andl %ecx,%ebp
+ rorl $7,%eax
pxor %xmm3,%xmm2
movdqa %xmm6,96(%esp)
- andl %ebx,%esi
- rorl $7,%eax
+ movl %edi,%ebp
+ xorl %ebx,%esi
+ roll $5,%edi
movdqa %xmm5,%xmm6
+ addl %esi,%edx
paddd %xmm1,%xmm5
- addl %ebp,%edx
- movl %edi,%ebp
+ xorl %eax,%ebp
pxor %xmm4,%xmm2
- roll $5,%edi
- addl %esi,%edx
- xorl %ecx,%ebx
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 36(%esp),%ecx
+ andl %eax,%ebp
movdqa %xmm2,%xmm4
movdqa %xmm5,16(%esp)
- movl %eax,%esi
xorl %ebx,%eax
- addl 36(%esp),%ecx
- andl %ebx,%esi
- pslld $2,%xmm2
- andl %eax,%ebp
rorl $7,%edi
- psrld $30,%xmm4
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
roll $5,%edx
+ pslld $2,%xmm2
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ psrld $30,%xmm4
+ xorl %eax,%edi
addl %edx,%ecx
- por %xmm4,%xmm2
- movl %edi,%ebp
- xorl %eax,%edi
- movdqa 64(%esp),%xmm4
addl 40(%esp),%ebx
- andl %eax,%ebp
andl %edi,%esi
+ xorl %eax,%edi
rorl $7,%edx
- addl %ebp,%ebx
- movdqa %xmm2,%xmm5
+ por %xmm4,%xmm2
movl %ecx,%ebp
+ xorl %edi,%esi
+ movdqa 64(%esp),%xmm4
roll $5,%ecx
addl %esi,%ebx
- xorl %eax,%edi
+ xorl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%esi
- xorl %edi,%edx
+ pshufd $238,%xmm1,%xmm5
addl 44(%esp),%eax
- andl %edi,%esi
andl %edx,%ebp
+ xorl %edi,%edx
rorl $7,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %edx,%esi
addl %ebx,%eax
addl 48(%esp),%edi
pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %edx,%esi
+ punpcklqdq %xmm2,%xmm5
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
pxor %xmm4,%xmm3
movdqa %xmm7,64(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
movdqa %xmm6,%xmm7
+ rorl $7,%ebx
paddd %xmm2,%xmm6
- rorl $7,%ebx
- addl %esi,%edi
+ addl %eax,%edi
pxor %xmm5,%xmm3
addl 52(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
movdqa %xmm3,%xmm5
movdqa %xmm6,32(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
pslld $2,%xmm3
addl 56(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
psrld $30,%xmm5
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
por %xmm5,%xmm3
addl 60(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
addl (%esp),%eax
- paddd %xmm3,%xmm7
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
- movdqa %xmm7,48(%esp)
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ paddd %xmm3,%xmm7
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
addl 4(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
+ movdqa %xmm7,48(%esp)
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
addl 8(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
addl 12(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L005done
+ je .L007done
movdqa 160(%esp),%xmm7
movdqa 176(%esp),%xmm6
movdqu (%ebp),%xmm0
@@ -2397,113 +2550,112 @@
movl %ebp,196(%esp)
movdqa %xmm7,96(%esp)
addl 16(%esp),%ebx
- xorl %eax,%esi
-.byte 102,15,56,0,206
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- paddd %xmm7,%xmm0
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+.byte 102,15,56,0,206
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- movdqa %xmm0,(%esp)
addl 20(%esp),%eax
- xorl %edi,%ebp
- psubd %xmm7,%xmm0
+ xorl %edx,%ebp
movl %ebx,%esi
+ paddd %xmm7,%xmm0
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ movdqa %xmm0,(%esp)
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
+ psubd %xmm7,%xmm0
roll $5,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
-.byte 102,15,56,0,214
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
- paddd %xmm7,%xmm1
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+.byte 102,15,56,0,214
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
- movdqa %xmm1,16(%esp)
addl 36(%esp),%ebx
- xorl %eax,%ebp
- psubd %xmm7,%xmm1
+ xorl %edi,%ebp
movl %ecx,%esi
+ paddd %xmm7,%xmm1
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ movdqa %xmm1,16(%esp)
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
+ psubd %xmm7,%xmm1
roll $5,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
-.byte 102,15,56,0,222
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- paddd %xmm7,%xmm2
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+.byte 102,15,56,0,222
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
- movdqa %xmm2,32(%esp)
addl 52(%esp),%ecx
- xorl %ebx,%ebp
- psubd %xmm7,%xmm2
+ xorl %eax,%ebp
movl %edx,%esi
+ paddd %xmm7,%xmm2
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ movdqa %xmm2,32(%esp)
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
+ psubd %xmm7,%xmm2
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
addl 4(%ebp),%esi
@@ -2513,109 +2665,112 @@
movl %esi,4(%ebp)
addl 16(%ebp),%edi
movl %ecx,8(%ebp)
- movl %esi,%ebx
+ movl %ecx,%ebx
movl %edx,12(%ebp)
+ xorl %edx,%ebx
movl %edi,16(%ebp)
- movdqa %xmm1,%xmm4
- jmp .L004loop
+ movl %esi,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L006loop
.align 16
-.L005done:
+.L007done:
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 20(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
addl 36(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
addl 52(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
movl 204(%esp),%esp
@@ -2641,10 +2796,10 @@
pushl %ebx
pushl %esi
pushl %edi
- call .L006pic_point
-.L006pic_point:
+ call .L008pic_point
+.L008pic_point:
popl %ebp
- leal .LK_XX_XX-.L006pic_point(%ebp),%ebp
+ leal .LK_XX_XX-.L008pic_point(%ebp),%ebp
.Lavx_shortcut:
vzeroall
vmovdqa (%ebp),%xmm7
@@ -2689,893 +2844,874 @@
vpaddd %xmm7,%xmm1,%xmm5
vpaddd %xmm7,%xmm2,%xmm6
vmovdqa %xmm4,(%esp)
+ movl %ecx,%ebp
vmovdqa %xmm5,16(%esp)
+ xorl %edx,%ebp
vmovdqa %xmm6,32(%esp)
- jmp .L007loop
+ andl %ebp,%esi
+ jmp .L009loop
.align 16
-.L007loop:
- addl (%esp),%edi
- xorl %edx,%ecx
+.L009loop:
+ shrdl $2,%ebx,%ebx
+ xorl %edx,%esi
vpalignr $8,%xmm0,%xmm1,%xmm4
movl %eax,%ebp
- shldl $5,%eax,%eax
+ addl (%esp),%edi
vpaddd %xmm3,%xmm7,%xmm7
vmovdqa %xmm0,64(%esp)
- andl %ecx,%esi
- xorl %edx,%ecx
+ xorl %ecx,%ebx
+ shldl $5,%eax,%eax
vpsrldq $4,%xmm3,%xmm6
- xorl %edx,%esi
+ addl %esi,%edi
+ andl %ebx,%ebp
+ vpxor %xmm0,%xmm4,%xmm4
+ xorl %ecx,%ebx
addl %eax,%edi
- vpxor %xmm0,%xmm4,%xmm4
- shrdl $2,%ebx,%ebx
- addl %esi,%edi
vpxor %xmm2,%xmm6,%xmm6
- addl 4(%esp),%edx
- xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ xorl %ecx,%ebp
vmovdqa %xmm7,48(%esp)
movl %edi,%esi
+ addl 4(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ebx,%eax
shldl $5,%edi,%edi
- vpxor %xmm6,%xmm4,%xmm4
- andl %ebx,%ebp
- xorl %ecx,%ebx
- xorl %ecx,%ebp
- addl %edi,%edx
+ addl %ebp,%edx
+ andl %eax,%esi
vpsrld $31,%xmm4,%xmm6
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 8(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%esi
vpslldq $12,%xmm4,%xmm0
vpaddd %xmm4,%xmm4,%xmm4
movl %edx,%ebp
+ addl 8(%esp),%ecx
+ xorl %eax,%edi
shldl $5,%edx,%edx
- andl %eax,%esi
- xorl %ebx,%eax
vpsrld $30,%xmm0,%xmm7
vpor %xmm6,%xmm4,%xmm4
- xorl %ebx,%esi
+ addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpslld $2,%xmm0,%xmm0
- addl 12(%esp),%ebx
- xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ xorl %eax,%ebp
vpxor %xmm7,%xmm4,%xmm4
movl %ecx,%esi
+ addl 12(%esp),%ebx
+ xorl %edi,%edx
shldl $5,%ecx,%ecx
- andl %edi,%ebp
- xorl %eax,%edi
vpxor %xmm0,%xmm4,%xmm4
- xorl %eax,%ebp
- addl %ecx,%ebx
+ addl %ebp,%ebx
+ andl %edx,%esi
vmovdqa 96(%esp),%xmm0
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
- addl 16(%esp),%eax
xorl %edi,%edx
+ addl %ecx,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
vpalignr $8,%xmm1,%xmm2,%xmm5
movl %ebx,%ebp
- shldl $5,%ebx,%ebx
+ addl 16(%esp),%eax
vpaddd %xmm4,%xmm0,%xmm0
vmovdqa %xmm1,80(%esp)
- andl %edx,%esi
- xorl %edi,%edx
+ xorl %edx,%ecx
+ shldl $5,%ebx,%ebx
vpsrldq $4,%xmm4,%xmm7
- xorl %edi,%esi
+ addl %esi,%eax
+ andl %ecx,%ebp
+ vpxor %xmm1,%xmm5,%xmm5
+ xorl %edx,%ecx
addl %ebx,%eax
- vpxor %xmm1,%xmm5,%xmm5
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
vpxor %xmm3,%xmm7,%xmm7
- addl 20(%esp),%edi
- xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%ebp
vmovdqa %xmm0,(%esp)
movl %eax,%esi
+ addl 20(%esp),%edi
+ vpxor %xmm7,%xmm5,%xmm5
+ xorl %ecx,%ebx
shldl $5,%eax,%eax
- vpxor %xmm7,%xmm5,%xmm5
- andl %ecx,%ebp
- xorl %edx,%ecx
- xorl %edx,%ebp
- addl %eax,%edi
+ addl %ebp,%edi
+ andl %ebx,%esi
vpsrld $31,%xmm5,%xmm7
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
- addl 24(%esp),%edx
xorl %ecx,%ebx
+ addl %eax,%edi
+ shrdl $7,%eax,%eax
+ xorl %ecx,%esi
vpslldq $12,%xmm5,%xmm1
vpaddd %xmm5,%xmm5,%xmm5
movl %edi,%ebp
+ addl 24(%esp),%edx
+ xorl %ebx,%eax
shldl $5,%edi,%edi
- andl %ebx,%esi
- xorl %ecx,%ebx
vpsrld $30,%xmm1,%xmm0
vpor %xmm7,%xmm5,%xmm5
- xorl %ecx,%esi
+ addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
vpslld $2,%xmm1,%xmm1
- addl 28(%esp),%ecx
- xorl %ebx,%eax
+ shrdl $7,%edi,%edi
+ xorl %ebx,%ebp
vpxor %xmm0,%xmm5,%xmm5
movl %edx,%esi
+ addl 28(%esp),%ecx
+ xorl %eax,%edi
shldl $5,%edx,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
vpxor %xmm1,%xmm5,%xmm5
- xorl %ebx,%ebp
- addl %edx,%ecx
+ addl %ebp,%ecx
+ andl %edi,%esi
vmovdqa 112(%esp),%xmm1
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
- addl 32(%esp),%ebx
xorl %eax,%edi
+ addl %edx,%ecx
+ shrdl $7,%edx,%edx
+ xorl %eax,%esi
vpalignr $8,%xmm2,%xmm3,%xmm6
movl %ecx,%ebp
- shldl $5,%ecx,%ecx
+ addl 32(%esp),%ebx
vpaddd %xmm5,%xmm1,%xmm1
vmovdqa %xmm2,96(%esp)
- andl %edi,%esi
- xorl %eax,%edi
+ xorl %edi,%edx
+ shldl $5,%ecx,%ecx
vpsrldq $4,%xmm5,%xmm0
- xorl %eax,%esi
+ addl %esi,%ebx
+ andl %edx,%ebp
+ vpxor %xmm2,%xmm6,%xmm6
+ xorl %edi,%edx
addl %ecx,%ebx
- vpxor %xmm2,%xmm6,%xmm6
- shrdl $7,%edx,%edx
- addl %esi,%ebx
vpxor %xmm4,%xmm0,%xmm0
- addl 36(%esp),%eax
- xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%ebp
vmovdqa %xmm1,16(%esp)
movl %ebx,%esi
+ addl 36(%esp),%eax
+ vpxor %xmm0,%xmm6,%xmm6
+ xorl %edx,%ecx
shldl $5,%ebx,%ebx
- vpxor %xmm0,%xmm6,%xmm6
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
- addl %ebx,%eax
+ addl %ebp,%eax
+ andl %ecx,%esi
vpsrld $31,%xmm6,%xmm0
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
- addl 40(%esp),%edi
xorl %edx,%ecx
+ addl %ebx,%eax
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%esi
vpslldq $12,%xmm6,%xmm2
vpaddd %xmm6,%xmm6,%xmm6
movl %eax,%ebp
+ addl 40(%esp),%edi
+ xorl %ecx,%ebx
shldl $5,%eax,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
vpsrld $30,%xmm2,%xmm1
vpor %xmm0,%xmm6,%xmm6
- xorl %edx,%esi
+ addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
vpslld $2,%xmm2,%xmm2
vmovdqa 64(%esp),%xmm0
- addl 44(%esp),%edx
- xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ xorl %ecx,%ebp
vpxor %xmm1,%xmm6,%xmm6
movl %edi,%esi
+ addl 44(%esp),%edx
+ xorl %ebx,%eax
shldl $5,%edi,%edi
- andl %ebx,%ebp
- xorl %ecx,%ebx
vpxor %xmm2,%xmm6,%xmm6
- xorl %ecx,%ebp
- addl %edi,%edx
+ addl %ebp,%edx
+ andl %eax,%esi
vmovdqa 112(%esp),%xmm2
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 48(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%esi
vpalignr $8,%xmm3,%xmm4,%xmm7
movl %edx,%ebp
- shldl $5,%edx,%edx
+ addl 48(%esp),%ecx
vpaddd %xmm6,%xmm2,%xmm2
vmovdqa %xmm3,64(%esp)
- andl %eax,%esi
- xorl %ebx,%eax
+ xorl %eax,%edi
+ shldl $5,%edx,%edx
vpsrldq $4,%xmm6,%xmm1
- xorl %ebx,%esi
+ addl %esi,%ecx
+ andl %edi,%ebp
+ vpxor %xmm3,%xmm7,%xmm7
+ xorl %eax,%edi
addl %edx,%ecx
- vpxor %xmm3,%xmm7,%xmm7
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpxor %xmm5,%xmm1,%xmm1
- addl 52(%esp),%ebx
- xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ xorl %eax,%ebp
vmovdqa %xmm2,32(%esp)
movl %ecx,%esi
+ addl 52(%esp),%ebx
+ vpxor %xmm1,%xmm7,%xmm7
+ xorl %edi,%edx
shldl $5,%ecx,%ecx
- vpxor %xmm1,%xmm7,%xmm7
- andl %edi,%ebp
- xorl %eax,%edi
- xorl %eax,%ebp
- addl %ecx,%ebx
+ addl %ebp,%ebx
+ andl %edx,%esi
vpsrld $31,%xmm7,%xmm1
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
- addl 56(%esp),%eax
xorl %edi,%edx
+ addl %ecx,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
vpslldq $12,%xmm7,%xmm3
vpaddd %xmm7,%xmm7,%xmm7
movl %ebx,%ebp
+ addl 56(%esp),%eax
+ xorl %edx,%ecx
shldl $5,%ebx,%ebx
- andl %edx,%esi
- xorl %edi,%edx
vpsrld $30,%xmm3,%xmm2
vpor %xmm1,%xmm7,%xmm7
- xorl %edi,%esi
+ addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
vpslld $2,%xmm3,%xmm3
vmovdqa 80(%esp),%xmm1
- addl 60(%esp),%edi
- xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%ebp
vpxor %xmm2,%xmm7,%xmm7
movl %eax,%esi
+ addl 60(%esp),%edi
+ xorl %ecx,%ebx
shldl $5,%eax,%eax
- andl %ecx,%ebp
- xorl %edx,%ecx
vpxor %xmm3,%xmm7,%xmm7
- xorl %edx,%ebp
+ addl %ebp,%edi
+ andl %ebx,%esi
+ vmovdqa 112(%esp),%xmm3
+ xorl %ecx,%ebx
addl %eax,%edi
- vmovdqa 112(%esp),%xmm3
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
vpalignr $8,%xmm6,%xmm7,%xmm2
vpxor %xmm4,%xmm0,%xmm0
+ shrdl $7,%eax,%eax
+ xorl %ecx,%esi
+ movl %edi,%ebp
addl (%esp),%edx
- xorl %ecx,%ebx
- movl %edi,%ebp
- shldl $5,%edi,%edi
vpxor %xmm1,%xmm0,%xmm0
vmovdqa %xmm4,80(%esp)
- andl %ebx,%esi
- xorl %ecx,%ebx
+ xorl %ebx,%eax
+ shldl $5,%edi,%edi
vmovdqa %xmm3,%xmm4
vpaddd %xmm7,%xmm3,%xmm3
- xorl %ecx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ andl %eax,%ebp
vpxor %xmm2,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- addl %esi,%edx
- addl 4(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%ebp
vpsrld $30,%xmm0,%xmm2
vmovdqa %xmm3,48(%esp)
movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
shldl $5,%edx,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
vpslld $2,%xmm0,%xmm0
- xorl %ebx,%ebp
- addl %edx,%ecx
- shrdl $7,%edi,%edi
addl %ebp,%ecx
- addl 8(%esp),%ebx
+ andl %edi,%esi
xorl %eax,%edi
+ addl %edx,%ecx
+ shrdl $7,%edx,%edx
+ xorl %eax,%esi
movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ vpor %xmm2,%xmm0,%xmm0
+ xorl %edi,%edx
shldl $5,%ecx,%ecx
- vpor %xmm2,%xmm0,%xmm0
- andl %edi,%esi
- xorl %eax,%edi
vmovdqa 96(%esp),%xmm2
- xorl %eax,%esi
+ addl %esi,%ebx
+ andl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 12(%esp),%eax
- xorl %edi,%edx
+ xorl %edi,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vpalignr $8,%xmm7,%xmm0,%xmm3
vpxor %xmm5,%xmm1,%xmm1
addl 16(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
vpxor %xmm2,%xmm1,%xmm1
vmovdqa %xmm5,96(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
vmovdqa %xmm4,%xmm5
vpaddd %xmm0,%xmm4,%xmm4
shrdl $7,%ebx,%ebx
- addl %esi,%edi
+ addl %eax,%edi
vpxor %xmm3,%xmm1,%xmm1
addl 20(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
vpsrld $30,%xmm1,%xmm3
vmovdqa %xmm4,(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
vpslld $2,%xmm1,%xmm1
addl 24(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpor %xmm3,%xmm1,%xmm1
addl 28(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
vmovdqa 64(%esp),%xmm3
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
vpalignr $8,%xmm0,%xmm1,%xmm4
vpxor %xmm6,%xmm2,%xmm2
addl 32(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
vpxor %xmm3,%xmm2,%xmm2
vmovdqa %xmm6,64(%esp)
- xorl %edx,%esi
- addl %ebx,%eax
+ addl %esi,%eax
+ xorl %edx,%ebp
vmovdqa 128(%esp),%xmm6
vpaddd %xmm1,%xmm5,%xmm5
shrdl $7,%ecx,%ecx
- addl %esi,%eax
+ addl %ebx,%eax
vpxor %xmm4,%xmm2,%xmm2
addl 36(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
vpsrld $30,%xmm2,%xmm4
vmovdqa %xmm5,16(%esp)
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
vpslld $2,%xmm2,%xmm2
addl 40(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
vpor %xmm4,%xmm2,%xmm2
addl 44(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
vmovdqa 80(%esp),%xmm4
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
vpalignr $8,%xmm1,%xmm2,%xmm5
vpxor %xmm7,%xmm3,%xmm3
addl 48(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
vpxor %xmm4,%xmm3,%xmm3
vmovdqa %xmm7,80(%esp)
- xorl %edi,%esi
- addl %ecx,%ebx
+ addl %esi,%ebx
+ xorl %edi,%ebp
vmovdqa %xmm6,%xmm7
vpaddd %xmm2,%xmm6,%xmm6
shrdl $7,%edx,%edx
- addl %esi,%ebx
+ addl %ecx,%ebx
vpxor %xmm5,%xmm3,%xmm3
addl 52(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
vpsrld $30,%xmm3,%xmm5
vmovdqa %xmm6,32(%esp)
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vpslld $2,%xmm3,%xmm3
addl 56(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
vpor %xmm5,%xmm3,%xmm3
addl 60(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
vmovdqa 96(%esp),%xmm5
movl %edi,%esi
shldl $5,%edi,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
vpalignr $8,%xmm2,%xmm3,%xmm6
vpxor %xmm0,%xmm4,%xmm4
addl (%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
vpxor %xmm5,%xmm4,%xmm4
vmovdqa %xmm0,96(%esp)
- xorl %eax,%esi
- addl %edx,%ecx
+ addl %esi,%ecx
+ xorl %eax,%ebp
vmovdqa %xmm7,%xmm0
vpaddd %xmm3,%xmm7,%xmm7
shrdl $7,%edi,%edi
- addl %esi,%ecx
+ addl %edx,%ecx
vpxor %xmm6,%xmm4,%xmm4
addl 4(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
vpsrld $30,%xmm4,%xmm6
vmovdqa %xmm7,48(%esp)
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
vpslld $2,%xmm4,%xmm4
addl 8(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
vpor %xmm6,%xmm4,%xmm4
addl 12(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
vmovdqa 64(%esp),%xmm6
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
vpalignr $8,%xmm3,%xmm4,%xmm7
vpxor %xmm1,%xmm5,%xmm5
addl 16(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
vpxor %xmm6,%xmm5,%xmm5
vmovdqa %xmm1,64(%esp)
- xorl %ebx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ xorl %ebx,%ebp
vmovdqa %xmm0,%xmm1
vpaddd %xmm4,%xmm0,%xmm0
shrdl $7,%eax,%eax
- addl %esi,%edx
+ addl %edi,%edx
vpxor %xmm7,%xmm5,%xmm5
addl 20(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
vpsrld $30,%xmm5,%xmm7
vmovdqa %xmm0,(%esp)
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
vpslld $2,%xmm5,%xmm5
addl 24(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
vpor %xmm7,%xmm5,%xmm5
addl 28(%esp),%eax
- xorl %edi,%ebp
vmovdqa 80(%esp),%xmm7
+ shrdl $7,%ecx,%ecx
movl %ebx,%esi
+ xorl %edx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vpalignr $8,%xmm4,%xmm5,%xmm0
vpxor %xmm2,%xmm6,%xmm6
- movl %ecx,%ebp
+ addl 32(%esp),%edi
+ andl %ecx,%esi
xorl %edx,%ecx
- addl 32(%esp),%edi
- andl %edx,%ebp
+ shrdl $7,%ebx,%ebx
vpxor %xmm7,%xmm6,%xmm6
vmovdqa %xmm2,80(%esp)
- andl %ecx,%esi
- shrdl $7,%ebx,%ebx
+ movl %eax,%ebp
+ xorl %ecx,%esi
vmovdqa %xmm1,%xmm2
vpaddd %xmm5,%xmm1,%xmm1
- addl %ebp,%edi
- movl %eax,%ebp
- vpxor %xmm0,%xmm6,%xmm6
shldl $5,%eax,%eax
addl %esi,%edi
- xorl %edx,%ecx
+ vpxor %xmm0,%xmm6,%xmm6
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 36(%esp),%edx
vpsrld $30,%xmm6,%xmm0
vmovdqa %xmm1,16(%esp)
- movl %ebx,%esi
+ andl %ebx,%ebp
xorl %ecx,%ebx
- addl 36(%esp),%edx
- andl %ecx,%esi
- vpslld $2,%xmm6,%xmm6
- andl %ebx,%ebp
shrdl $7,%eax,%eax
- addl %esi,%edx
movl %edi,%esi
+ vpslld $2,%xmm6,%xmm6
+ xorl %ebx,%ebp
shldl $5,%edi,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 40(%esp),%ecx
+ andl %eax,%esi
vpor %xmm0,%xmm6,%xmm6
- movl %eax,%ebp
xorl %ebx,%eax
+ shrdl $7,%edi,%edi
vmovdqa 96(%esp),%xmm0
- addl 40(%esp),%ecx
- andl %ebx,%ebp
- andl %eax,%esi
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
movl %edx,%ebp
+ xorl %eax,%esi
shldl $5,%edx,%edx
addl %esi,%ecx
- xorl %ebx,%eax
+ xorl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%esi
- xorl %eax,%edi
addl 44(%esp),%ebx
- andl %eax,%esi
andl %edi,%ebp
+ xorl %eax,%edi
shrdl $7,%edx,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
shldl $5,%ecx,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
vpalignr $8,%xmm5,%xmm6,%xmm1
vpxor %xmm3,%xmm7,%xmm7
- movl %edx,%ebp
+ addl 48(%esp),%eax
+ andl %edx,%esi
xorl %edi,%edx
- addl 48(%esp),%eax
- andl %edi,%ebp
+ shrdl $7,%ecx,%ecx
vpxor %xmm0,%xmm7,%xmm7
vmovdqa %xmm3,96(%esp)
- andl %edx,%esi
- shrdl $7,%ecx,%ecx
+ movl %ebx,%ebp
+ xorl %edx,%esi
vmovdqa 144(%esp),%xmm3
vpaddd %xmm6,%xmm2,%xmm2
- addl %ebp,%eax
- movl %ebx,%ebp
- vpxor %xmm1,%xmm7,%xmm7
shldl $5,%ebx,%ebx
addl %esi,%eax
- xorl %edi,%edx
+ vpxor %xmm1,%xmm7,%xmm7
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 52(%esp),%edi
vpsrld $30,%xmm7,%xmm1
vmovdqa %xmm2,32(%esp)
- movl %ecx,%esi
+ andl %ecx,%ebp
xorl %edx,%ecx
- addl 52(%esp),%edi
- andl %edx,%esi
- vpslld $2,%xmm7,%xmm7
- andl %ecx,%ebp
shrdl $7,%ebx,%ebx
- addl %esi,%edi
movl %eax,%esi
+ vpslld $2,%xmm7,%xmm7
+ xorl %ecx,%ebp
shldl $5,%eax,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 56(%esp),%edx
+ andl %ebx,%esi
vpor %xmm1,%xmm7,%xmm7
- movl %ebx,%ebp
xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
vmovdqa 64(%esp),%xmm1
- addl 56(%esp),%edx
- andl %ecx,%ebp
- andl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
movl %edi,%ebp
+ xorl %ebx,%esi
shldl $5,%edi,%edi
addl %esi,%edx
- xorl %ecx,%ebx
+ xorl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%esi
- xorl %ebx,%eax
addl 60(%esp),%ecx
- andl %ebx,%esi
andl %eax,%ebp
+ xorl %ebx,%eax
shrdl $7,%edi,%edi
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
shldl $5,%edx,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
vpalignr $8,%xmm6,%xmm7,%xmm2
vpxor %xmm4,%xmm0,%xmm0
- movl %edi,%ebp
+ addl (%esp),%ebx
+ andl %edi,%esi
xorl %eax,%edi
- addl (%esp),%ebx
- andl %eax,%ebp
+ shrdl $7,%edx,%edx
vpxor %xmm1,%xmm0,%xmm0
vmovdqa %xmm4,64(%esp)
- andl %edi,%esi
- shrdl $7,%edx,%edx
+ movl %ecx,%ebp
+ xorl %edi,%esi
vmovdqa %xmm3,%xmm4
vpaddd %xmm7,%xmm3,%xmm3
- addl %ebp,%ebx
- movl %ecx,%ebp
- vpxor %xmm2,%xmm0,%xmm0
shldl $5,%ecx,%ecx
addl %esi,%ebx
- xorl %eax,%edi
+ vpxor %xmm2,%xmm0,%xmm0
+ xorl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 4(%esp),%eax
vpsrld $30,%xmm0,%xmm2
vmovdqa %xmm3,48(%esp)
- movl %edx,%esi
+ andl %edx,%ebp
xorl %edi,%edx
- addl 4(%esp),%eax
- andl %edi,%esi
- vpslld $2,%xmm0,%xmm0
- andl %edx,%ebp
shrdl $7,%ecx,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ vpslld $2,%xmm0,%xmm0
+ xorl %edx,%ebp
shldl $5,%ebx,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %ecx,%esi
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 8(%esp),%edi
+ andl %ecx,%esi
vpor %xmm2,%xmm0,%xmm0
- movl %ecx,%ebp
xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
vmovdqa 80(%esp),%xmm2
- addl 8(%esp),%edi
- andl %edx,%ebp
- andl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
movl %eax,%ebp
+ xorl %ecx,%esi
shldl $5,%eax,%eax
addl %esi,%edi
- xorl %edx,%ecx
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%esi
- xorl %ecx,%ebx
addl 12(%esp),%edx
- andl %ecx,%esi
andl %ebx,%ebp
+ xorl %ecx,%ebx
shrdl $7,%eax,%eax
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
shldl $5,%edi,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
vpalignr $8,%xmm7,%xmm0,%xmm3
vpxor %xmm5,%xmm1,%xmm1
- movl %eax,%ebp
+ addl 16(%esp),%ecx
+ andl %eax,%esi
xorl %ebx,%eax
- addl 16(%esp),%ecx
- andl %ebx,%ebp
+ shrdl $7,%edi,%edi
vpxor %xmm2,%xmm1,%xmm1
vmovdqa %xmm5,80(%esp)
- andl %eax,%esi
- shrdl $7,%edi,%edi
+ movl %edx,%ebp
+ xorl %eax,%esi
vmovdqa %xmm4,%xmm5
vpaddd %xmm0,%xmm4,%xmm4
- addl %ebp,%ecx
- movl %edx,%ebp
- vpxor %xmm3,%xmm1,%xmm1
shldl $5,%edx,%edx
addl %esi,%ecx
- xorl %ebx,%eax
+ vpxor %xmm3,%xmm1,%xmm1
+ xorl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 20(%esp),%ebx
vpsrld $30,%xmm1,%xmm3
vmovdqa %xmm4,(%esp)
- movl %edi,%esi
+ andl %edi,%ebp
xorl %eax,%edi
- addl 20(%esp),%ebx
- andl %eax,%esi
- vpslld $2,%xmm1,%xmm1
- andl %edi,%ebp
shrdl $7,%edx,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ vpslld $2,%xmm1,%xmm1
+ xorl %edi,%ebp
shldl $5,%ecx,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 24(%esp),%eax
+ andl %edx,%esi
vpor %xmm3,%xmm1,%xmm1
- movl %edx,%ebp
xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
vmovdqa 96(%esp),%xmm3
- addl 24(%esp),%eax
- andl %edi,%ebp
- andl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
movl %ebx,%ebp
+ xorl %edx,%esi
shldl $5,%ebx,%ebx
addl %esi,%eax
- xorl %edi,%edx
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
- movl %ecx,%esi
- xorl %edx,%ecx
addl 28(%esp),%edi
- andl %edx,%esi
andl %ecx,%ebp
+ xorl %edx,%ecx
shrdl $7,%ebx,%ebx
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
shldl $5,%eax,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
vpalignr $8,%xmm0,%xmm1,%xmm4
vpxor %xmm6,%xmm2,%xmm2
- movl %ebx,%ebp
+ addl 32(%esp),%edx
+ andl %ebx,%esi
xorl %ecx,%ebx
- addl 32(%esp),%edx
- andl %ecx,%ebp
+ shrdl $7,%eax,%eax
vpxor %xmm3,%xmm2,%xmm2
vmovdqa %xmm6,96(%esp)
- andl %ebx,%esi
- shrdl $7,%eax,%eax
+ movl %edi,%ebp
+ xorl %ebx,%esi
vmovdqa %xmm5,%xmm6
vpaddd %xmm1,%xmm5,%xmm5
- addl %ebp,%edx
- movl %edi,%ebp
- vpxor %xmm4,%xmm2,%xmm2
shldl $5,%edi,%edi
addl %esi,%edx
- xorl %ecx,%ebx
+ vpxor %xmm4,%xmm2,%xmm2
+ xorl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 36(%esp),%ecx
vpsrld $30,%xmm2,%xmm4
vmovdqa %xmm5,16(%esp)
- movl %eax,%esi
+ andl %eax,%ebp
xorl %ebx,%eax
- addl 36(%esp),%ecx
- andl %ebx,%esi
- vpslld $2,%xmm2,%xmm2
- andl %eax,%ebp
shrdl $7,%edi,%edi
- addl %esi,%ecx
movl %edx,%esi
+ vpslld $2,%xmm2,%xmm2
+ xorl %eax,%ebp
shldl $5,%edx,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 40(%esp),%ebx
+ andl %edi,%esi
vpor %xmm4,%xmm2,%xmm2
- movl %edi,%ebp
xorl %eax,%edi
+ shrdl $7,%edx,%edx
vmovdqa 64(%esp),%xmm4
- addl 40(%esp),%ebx
- andl %eax,%ebp
- andl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
movl %ecx,%ebp
+ xorl %edi,%esi
shldl $5,%ecx,%ecx
addl %esi,%ebx
- xorl %eax,%edi
+ xorl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%esi
- xorl %edi,%edx
addl 44(%esp),%eax
- andl %edi,%esi
andl %edx,%ebp
+ xorl %edi,%edx
shrdl $7,%ecx,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
shldl $5,%ebx,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %edx,%esi
addl %ebx,%eax
vpalignr $8,%xmm1,%xmm2,%xmm5
vpxor %xmm7,%xmm3,%xmm3
addl 48(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
vpxor %xmm4,%xmm3,%xmm3
vmovdqa %xmm7,64(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
vmovdqa %xmm6,%xmm7
vpaddd %xmm2,%xmm6,%xmm6
shrdl $7,%ebx,%ebx
- addl %esi,%edi
+ addl %eax,%edi
vpxor %xmm5,%xmm3,%xmm3
addl 52(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
vpsrld $30,%xmm3,%xmm5
vmovdqa %xmm6,32(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
vpslld $2,%xmm3,%xmm3
addl 56(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpor %xmm5,%xmm3,%xmm3
addl 60(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
addl (%esp),%eax
vpaddd %xmm3,%xmm7,%xmm7
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
vmovdqa %xmm7,48(%esp)
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
addl 4(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
addl 8(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
addl 12(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L008done
+ je .L010done
vmovdqa 160(%esp),%xmm7
vmovdqa 176(%esp),%xmm6
vmovdqu (%ebp),%xmm0
@@ -3587,110 +3723,109 @@
movl %ebp,196(%esp)
vmovdqa %xmm7,96(%esp)
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
vpshufb %xmm6,%xmm1,%xmm1
movl %ecx,%ebp
shldl $5,%ecx,%ecx
vpaddd %xmm7,%xmm0,%xmm4
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
vmovdqa %xmm4,(%esp)
addl 20(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
vpshufb %xmm6,%xmm2,%xmm2
movl %edx,%ebp
shldl $5,%edx,%edx
vpaddd %xmm7,%xmm1,%xmm5
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vmovdqa %xmm5,16(%esp)
addl 36(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
vpshufb %xmm6,%xmm3,%xmm3
movl %edi,%ebp
shldl $5,%edi,%edi
vpaddd %xmm7,%xmm2,%xmm6
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
vmovdqa %xmm6,32(%esp)
addl 52(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
addl 4(%ebp),%esi
@@ -3699,109 +3834,112 @@
addl 12(%ebp),%edx
movl %esi,4(%ebp)
addl 16(%ebp),%edi
+ movl %ecx,%ebx
movl %ecx,8(%ebp)
- movl %esi,%ebx
+ xorl %edx,%ebx
movl %edx,12(%ebp)
movl %edi,16(%ebp)
- jmp .L007loop
+ movl %esi,%ebp
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L009loop
.align 16
-.L008done:
+.L010done:
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 20(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
addl 36(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
addl 52(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vzeroall
movl 192(%esp),%ebp
addl (%ebp),%eax
@@ -3828,11 +3966,12 @@
.long 2400959708,2400959708,2400959708,2400959708
.long 3395469782,3395469782,3395469782,3395469782
.long 66051,67438087,134810123,202182159
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "sha1-586.S"
.text
@@ -3854,8 +3993,11 @@
movl 4(%esi),%edx
testl $512,%edx
jz .L001x86
+ movl 8(%esi),%ecx
testl $16777216,%eax
jz .L001x86
+ testl $536870912,%ecx
+ jnz .Lshaext_shortcut
andl $268435456,%edx
andl $1073741824,%eax
orl %edx,%eax
@@ -5229,9 +5371,9 @@
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
-.type _sha1_block_data_order_ssse3, at function
+.type _sha1_block_data_order_shaext, at function
.align 16
-_sha1_block_data_order_ssse3:
+_sha1_block_data_order_shaext:
pushl %ebp
pushl %ebx
pushl %esi
@@ -5240,6 +5382,176 @@
.L003pic_point:
popl %ebp
leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lshaext_shortcut:
+ movl 20(%esp),%edi
+ movl %esp,%ebx
+ movl 24(%esp),%esi
+ movl 28(%esp),%ecx
+ subl $32,%esp
+ movdqu (%edi),%xmm0
+ movd 16(%edi),%xmm1
+ andl $-32,%esp
+ movdqa 80(%ebp),%xmm3
+ movdqu (%esi),%xmm4
+ pshufd $27,%xmm0,%xmm0
+ movdqu 16(%esi),%xmm5
+ pshufd $27,%xmm1,%xmm1
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,227
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,235
+.byte 102,15,56,0,243
+.byte 102,15,56,0,251
+ jmp .L004loop_shaext
+.align 16
+.L004loop_shaext:
+ decl %ecx
+ leal 64(%esi),%eax
+ movdqa %xmm1,(%esp)
+ paddd %xmm4,%xmm1
+ cmovnel %eax,%esi
+ movdqa %xmm0,16(%esp)
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+ movdqu (%esi),%xmm4
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,213
+ movdqu 16(%esi),%xmm5
+.byte 102,15,56,0,227
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,206
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,235
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,215
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,243
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+ movdqa (%esp),%xmm2
+.byte 102,15,56,0,251
+.byte 15,56,200,202
+ paddd 16(%esp),%xmm0
+ jnz .L004loop_shaext
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm1,%xmm1
+ movdqu %xmm0,(%edi)
+ movd %xmm1,16(%edi)
+ movl %ebx,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext
+.type _sha1_block_data_order_ssse3, at function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L005pic_point
+.L005pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L005pic_point(%ebp),%ebp
.Lssse3_shortcut:
movdqa (%ebp),%xmm7
movdqa 16(%ebp),%xmm0
@@ -5287,936 +5599,917 @@
movdqa %xmm1,16(%esp)
psubd %xmm7,%xmm1
movdqa %xmm2,32(%esp)
+ movl %ecx,%ebp
psubd %xmm7,%xmm2
- movdqa %xmm1,%xmm4
- jmp .L004loop
+ xorl %edx,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebp,%esi
+ jmp .L006loop
.align 16
-.L004loop:
+.L006loop:
+ rorl $2,%ebx
+ xorl %edx,%esi
+ movl %eax,%ebp
+ punpcklqdq %xmm1,%xmm4
+ movdqa %xmm3,%xmm6
addl (%esp),%edi
- xorl %edx,%ecx
-.byte 102,15,58,15,224,8
- movdqa %xmm3,%xmm6
- movl %eax,%ebp
- roll $5,%eax
+ xorl %ecx,%ebx
paddd %xmm3,%xmm7
movdqa %xmm0,64(%esp)
- andl %ecx,%esi
- xorl %edx,%ecx
+ roll $5,%eax
+ addl %esi,%edi
psrldq $4,%xmm6
- xorl %edx,%esi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
+ pxor %xmm0,%xmm4
addl %eax,%edi
- pxor %xmm0,%xmm4
- rorl $2,%ebx
- addl %esi,%edi
+ rorl $7,%eax
pxor %xmm2,%xmm6
+ xorl %ecx,%ebp
+ movl %edi,%esi
addl 4(%esp),%edx
- xorl %ecx,%ebx
- movl %edi,%esi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%eax
roll $5,%edi
- pxor %xmm6,%xmm4
- andl %ebx,%ebp
- xorl %ecx,%ebx
movdqa %xmm7,48(%esp)
- xorl %ecx,%ebp
+ addl %ebp,%edx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm0
+ xorl %ebx,%eax
addl %edi,%edx
- movdqa %xmm4,%xmm0
+ rorl $7,%edi
movdqa %xmm4,%xmm6
- rorl $7,%eax
- addl %ebp,%edx
- addl 8(%esp),%ecx
- xorl %ebx,%eax
+ xorl %ebx,%esi
pslldq $12,%xmm0
paddd %xmm4,%xmm4
movl %edx,%ebp
+ addl 8(%esp),%ecx
+ psrld $31,%xmm6
+ xorl %eax,%edi
roll $5,%edx
- andl %eax,%esi
- xorl %ebx,%eax
- psrld $31,%xmm6
- xorl %ebx,%esi
- addl %edx,%ecx
movdqa %xmm0,%xmm7
- rorl $7,%edi
addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
psrld $30,%xmm0
+ addl %edx,%ecx
+ rorl $7,%edx
por %xmm6,%xmm4
+ xorl %eax,%ebp
+ movl %ecx,%esi
addl 12(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
+ pslld $2,%xmm7
+ xorl %edi,%edx
roll $5,%ecx
- pslld $2,%xmm7
pxor %xmm0,%xmm4
- andl %edi,%ebp
- xorl %eax,%edi
movdqa 96(%esp),%xmm0
- xorl %eax,%ebp
- addl %ecx,%ebx
+ addl %ebp,%ebx
+ andl %edx,%esi
pxor %xmm7,%xmm4
- movdqa %xmm2,%xmm5
- rorl $7,%edx
- addl %ebp,%ebx
- addl 16(%esp),%eax
+ pshufd $238,%xmm1,%xmm5
xorl %edi,%edx
-.byte 102,15,58,15,233,8
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ movl %ebx,%ebp
+ punpcklqdq %xmm2,%xmm5
movdqa %xmm4,%xmm7
- movl %ebx,%ebp
- roll $5,%ebx
+ addl 16(%esp),%eax
+ xorl %edx,%ecx
paddd %xmm4,%xmm0
movdqa %xmm1,80(%esp)
- andl %edx,%esi
- xorl %edi,%edx
+ roll $5,%ebx
+ addl %esi,%eax
psrldq $4,%xmm7
- xorl %edi,%esi
+ andl %ecx,%ebp
+ xorl %edx,%ecx
+ pxor %xmm1,%xmm5
addl %ebx,%eax
- pxor %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
+ rorl $7,%ebx
pxor %xmm3,%xmm7
+ xorl %edx,%ebp
+ movl %eax,%esi
addl 20(%esp),%edi
- xorl %edx,%ecx
- movl %eax,%esi
+ pxor %xmm7,%xmm5
+ xorl %ecx,%ebx
roll $5,%eax
- pxor %xmm7,%xmm5
- andl %ecx,%ebp
- xorl %edx,%ecx
movdqa %xmm0,(%esp)
- xorl %edx,%ebp
+ addl %ebp,%edi
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm1
+ xorl %ecx,%ebx
addl %eax,%edi
- movdqa %xmm5,%xmm1
+ rorl $7,%eax
movdqa %xmm5,%xmm7
- rorl $7,%ebx
- addl %ebp,%edi
- addl 24(%esp),%edx
- xorl %ecx,%ebx
+ xorl %ecx,%esi
pslldq $12,%xmm1
paddd %xmm5,%xmm5
movl %edi,%ebp
+ addl 24(%esp),%edx
+ psrld $31,%xmm7
+ xorl %ebx,%eax
roll $5,%edi
- andl %ebx,%esi
- xorl %ecx,%ebx
- psrld $31,%xmm7
- xorl %ecx,%esi
- addl %edi,%edx
movdqa %xmm1,%xmm0
- rorl $7,%eax
addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
psrld $30,%xmm1
+ addl %edi,%edx
+ rorl $7,%edi
por %xmm7,%xmm5
+ xorl %ebx,%ebp
+ movl %edx,%esi
addl 28(%esp),%ecx
- xorl %ebx,%eax
- movl %edx,%esi
+ pslld $2,%xmm0
+ xorl %eax,%edi
roll $5,%edx
- pslld $2,%xmm0
pxor %xmm1,%xmm5
- andl %eax,%ebp
- xorl %ebx,%eax
movdqa 112(%esp),%xmm1
- xorl %ebx,%ebp
- addl %edx,%ecx
+ addl %ebp,%ecx
+ andl %edi,%esi
pxor %xmm0,%xmm5
- movdqa %xmm3,%xmm6
- rorl $7,%edi
- addl %ebp,%ecx
- addl 32(%esp),%ebx
+ pshufd $238,%xmm2,%xmm6
xorl %eax,%edi
-.byte 102,15,58,15,242,8
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%ebp
+ punpcklqdq %xmm3,%xmm6
movdqa %xmm5,%xmm0
- movl %ecx,%ebp
- roll $5,%ecx
+ addl 32(%esp),%ebx
+ xorl %edi,%edx
paddd %xmm5,%xmm1
movdqa %xmm2,96(%esp)
- andl %edi,%esi
- xorl %eax,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
psrldq $4,%xmm0
- xorl %eax,%esi
+ andl %edx,%ebp
+ xorl %edi,%edx
+ pxor %xmm2,%xmm6
addl %ecx,%ebx
- pxor %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
+ rorl $7,%ecx
pxor %xmm4,%xmm0
+ xorl %edi,%ebp
+ movl %ebx,%esi
addl 36(%esp),%eax
- xorl %edi,%edx
- movl %ebx,%esi
+ pxor %xmm0,%xmm6
+ xorl %edx,%ecx
roll $5,%ebx
- pxor %xmm0,%xmm6
- andl %edx,%ebp
- xorl %edi,%edx
movdqa %xmm1,16(%esp)
- xorl %edi,%ebp
+ addl %ebp,%eax
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm2
+ xorl %edx,%ecx
addl %ebx,%eax
- movdqa %xmm6,%xmm2
+ rorl $7,%ebx
movdqa %xmm6,%xmm0
- rorl $7,%ecx
- addl %ebp,%eax
- addl 40(%esp),%edi
- xorl %edx,%ecx
+ xorl %edx,%esi
pslldq $12,%xmm2
paddd %xmm6,%xmm6
movl %eax,%ebp
+ addl 40(%esp),%edi
+ psrld $31,%xmm0
+ xorl %ecx,%ebx
roll $5,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
- psrld $31,%xmm0
- xorl %edx,%esi
- addl %eax,%edi
movdqa %xmm2,%xmm1
- rorl $7,%ebx
addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
psrld $30,%xmm2
+ addl %eax,%edi
+ rorl $7,%eax
por %xmm0,%xmm6
- addl 44(%esp),%edx
- xorl %ecx,%ebx
+ xorl %ecx,%ebp
movdqa 64(%esp),%xmm0
movl %edi,%esi
+ addl 44(%esp),%edx
+ pslld $2,%xmm1
+ xorl %ebx,%eax
roll $5,%edi
- pslld $2,%xmm1
pxor %xmm2,%xmm6
- andl %ebx,%ebp
- xorl %ecx,%ebx
movdqa 112(%esp),%xmm2
- xorl %ecx,%ebp
- addl %edi,%edx
+ addl %ebp,%edx
+ andl %eax,%esi
pxor %xmm1,%xmm6
- movdqa %xmm4,%xmm7
- rorl $7,%eax
- addl %ebp,%edx
- addl 48(%esp),%ecx
+ pshufd $238,%xmm3,%xmm7
xorl %ebx,%eax
-.byte 102,15,58,15,251,8
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%esi
+ movl %edx,%ebp
+ punpcklqdq %xmm4,%xmm7
movdqa %xmm6,%xmm1
- movl %edx,%ebp
- roll $5,%edx
+ addl 48(%esp),%ecx
+ xorl %eax,%edi
paddd %xmm6,%xmm2
movdqa %xmm3,64(%esp)
- andl %eax,%esi
- xorl %ebx,%eax
+ roll $5,%edx
+ addl %esi,%ecx
psrldq $4,%xmm1
- xorl %ebx,%esi
+ andl %edi,%ebp
+ xorl %eax,%edi
+ pxor %xmm3,%xmm7
addl %edx,%ecx
- pxor %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
+ rorl $7,%edx
pxor %xmm5,%xmm1
+ xorl %eax,%ebp
+ movl %ecx,%esi
addl 52(%esp),%ebx
- xorl %eax,%edi
- movl %ecx,%esi
+ pxor %xmm1,%xmm7
+ xorl %edi,%edx
roll $5,%ecx
- pxor %xmm1,%xmm7
- andl %edi,%ebp
- xorl %eax,%edi
movdqa %xmm2,32(%esp)
- xorl %eax,%ebp
+ addl %ebp,%ebx
+ andl %edx,%esi
+ movdqa %xmm7,%xmm3
+ xorl %edi,%edx
addl %ecx,%ebx
- movdqa %xmm7,%xmm3
+ rorl $7,%ecx
movdqa %xmm7,%xmm1
- rorl $7,%edx
- addl %ebp,%ebx
- addl 56(%esp),%eax
- xorl %edi,%edx
+ xorl %edi,%esi
pslldq $12,%xmm3
paddd %xmm7,%xmm7
movl %ebx,%ebp
+ addl 56(%esp),%eax
+ psrld $31,%xmm1
+ xorl %edx,%ecx
roll $5,%ebx
- andl %edx,%esi
- xorl %edi,%edx
- psrld $31,%xmm1
- xorl %edi,%esi
- addl %ebx,%eax
movdqa %xmm3,%xmm2
- rorl $7,%ecx
addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
psrld $30,%xmm3
+ addl %ebx,%eax
+ rorl $7,%ebx
por %xmm1,%xmm7
- addl 60(%esp),%edi
- xorl %edx,%ecx
+ xorl %edx,%ebp
movdqa 80(%esp),%xmm1
movl %eax,%esi
+ addl 60(%esp),%edi
+ pslld $2,%xmm2
+ xorl %ecx,%ebx
roll $5,%eax
- pslld $2,%xmm2
pxor %xmm3,%xmm7
- andl %ecx,%ebp
- xorl %edx,%ecx
movdqa 112(%esp),%xmm3
- xorl %edx,%ebp
+ addl %ebp,%edi
+ andl %ebx,%esi
+ pxor %xmm2,%xmm7
+ pshufd $238,%xmm6,%xmm2
+ xorl %ecx,%ebx
addl %eax,%edi
- pxor %xmm2,%xmm7
- rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm7,%xmm2
- addl (%esp),%edx
+ rorl $7,%eax
pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
- xorl %ecx,%ebx
+ punpcklqdq %xmm7,%xmm2
+ xorl %ecx,%esi
movl %edi,%ebp
- roll $5,%edi
+ addl (%esp),%edx
pxor %xmm1,%xmm0
movdqa %xmm4,80(%esp)
- andl %ebx,%esi
- xorl %ecx,%ebx
+ xorl %ebx,%eax
+ roll $5,%edi
movdqa %xmm3,%xmm4
+ addl %esi,%edx
paddd %xmm7,%xmm3
- xorl %ecx,%esi
- addl %edi,%edx
+ andl %eax,%ebp
pxor %xmm2,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- addl 4(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%ebp
movdqa %xmm0,%xmm2
movdqa %xmm3,48(%esp)
movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
roll $5,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
pslld $2,%xmm0
- xorl %ebx,%ebp
- addl %edx,%ecx
+ addl %ebp,%ecx
+ andl %edi,%esi
psrld $30,%xmm2
- rorl $7,%edi
- addl %ebp,%ecx
- addl 8(%esp),%ebx
xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ xorl %edi,%edx
roll $5,%ecx
por %xmm2,%xmm0
- andl %edi,%esi
- xorl %eax,%edi
+ addl %esi,%ebx
+ andl %edx,%ebp
movdqa 96(%esp),%xmm2
- xorl %eax,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 12(%esp),%eax
- movdqa %xmm0,%xmm3
- xorl %edi,%edx
+ xorl %edi,%ebp
movl %ebx,%esi
+ pshufd $238,%xmm7,%xmm3
roll $5,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
addl 16(%esp),%edi
pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
- xorl %edx,%esi
+ punpcklqdq %xmm0,%xmm3
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
pxor %xmm2,%xmm1
movdqa %xmm5,96(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
movdqa %xmm4,%xmm5
+ rorl $7,%ebx
paddd %xmm0,%xmm4
- rorl $7,%ebx
- addl %esi,%edi
+ addl %eax,%edi
pxor %xmm3,%xmm1
addl 20(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
movdqa %xmm1,%xmm3
movdqa %xmm4,(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
pslld $2,%xmm1
addl 24(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
psrld $30,%xmm3
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
por %xmm3,%xmm1
addl 28(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movdqa 64(%esp),%xmm3
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ pshufd $238,%xmm0,%xmm4
addl %ecx,%ebx
- rorl $7,%edx
- movdqa %xmm1,%xmm4
- addl %ebp,%ebx
addl 32(%esp),%eax
pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
- xorl %edi,%esi
+ punpcklqdq %xmm1,%xmm4
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
pxor %xmm3,%xmm2
movdqa %xmm6,64(%esp)
- xorl %edx,%esi
- addl %ebx,%eax
+ addl %esi,%eax
+ xorl %edx,%ebp
movdqa 128(%esp),%xmm6
+ rorl $7,%ecx
paddd %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
+ addl %ebx,%eax
pxor %xmm4,%xmm2
addl 36(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
movdqa %xmm2,%xmm4
movdqa %xmm5,16(%esp)
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
pslld $2,%xmm2
addl 40(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
psrld $30,%xmm4
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
por %xmm4,%xmm2
addl 44(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movdqa 80(%esp),%xmm4
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ pshufd $238,%xmm1,%xmm5
addl %edx,%ecx
- rorl $7,%edi
- movdqa %xmm2,%xmm5
- addl %ebp,%ecx
addl 48(%esp),%ebx
pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %eax,%esi
+ punpcklqdq %xmm2,%xmm5
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
pxor %xmm4,%xmm3
movdqa %xmm7,80(%esp)
- xorl %edi,%esi
- addl %ecx,%ebx
+ addl %esi,%ebx
+ xorl %edi,%ebp
movdqa %xmm6,%xmm7
+ rorl $7,%edx
paddd %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
+ addl %ecx,%ebx
pxor %xmm5,%xmm3
addl 52(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
movdqa %xmm3,%xmm5
movdqa %xmm6,32(%esp)
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
pslld $2,%xmm3
addl 56(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
psrld $30,%xmm5
movl %eax,%ebp
roll $5,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
por %xmm5,%xmm3
addl 60(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movdqa 96(%esp),%xmm5
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ pshufd $238,%xmm2,%xmm6
addl %edi,%edx
- rorl $7,%eax
- movdqa %xmm3,%xmm6
- addl %ebp,%edx
addl (%esp),%ecx
pxor %xmm0,%xmm4
-.byte 102,15,58,15,242,8
- xorl %ebx,%esi
+ punpcklqdq %xmm3,%xmm6
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
pxor %xmm5,%xmm4
movdqa %xmm0,96(%esp)
- xorl %eax,%esi
- addl %edx,%ecx
+ addl %esi,%ecx
+ xorl %eax,%ebp
movdqa %xmm7,%xmm0
+ rorl $7,%edi
paddd %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
+ addl %edx,%ecx
pxor %xmm6,%xmm4
addl 4(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
movdqa %xmm4,%xmm6
movdqa %xmm7,48(%esp)
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
pslld $2,%xmm4
addl 8(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
psrld $30,%xmm6
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
por %xmm6,%xmm4
addl 12(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movdqa 64(%esp),%xmm6
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ pshufd $238,%xmm3,%xmm7
addl %eax,%edi
- rorl $7,%ebx
- movdqa %xmm4,%xmm7
- addl %ebp,%edi
addl 16(%esp),%edx
pxor %xmm1,%xmm5
-.byte 102,15,58,15,251,8
- xorl %ecx,%esi
+ punpcklqdq %xmm4,%xmm7
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
pxor %xmm6,%xmm5
movdqa %xmm1,64(%esp)
- xorl %ebx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ xorl %ebx,%ebp
movdqa %xmm0,%xmm1
+ rorl $7,%eax
paddd %xmm4,%xmm0
- rorl $7,%eax
- addl %esi,%edx
+ addl %edi,%edx
pxor %xmm7,%xmm5
addl 20(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
movdqa %xmm5,%xmm7
movdqa %xmm0,(%esp)
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
pslld $2,%xmm5
addl 24(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
psrld $30,%xmm7
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
por %xmm7,%xmm5
addl 28(%esp),%eax
- xorl %edi,%ebp
movdqa 80(%esp),%xmm7
+ rorl $7,%ecx
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
- xorl %edx,%ebp
+ pshufd $238,%xmm4,%xmm0
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- movdqa %xmm5,%xmm0
- addl %ebp,%eax
- movl %ecx,%ebp
+ addl 32(%esp),%edi
pxor %xmm2,%xmm6
-.byte 102,15,58,15,196,8
+ punpcklqdq %xmm5,%xmm0
+ andl %ecx,%esi
xorl %edx,%ecx
- addl 32(%esp),%edi
- andl %edx,%ebp
+ rorl $7,%ebx
pxor %xmm7,%xmm6
movdqa %xmm2,80(%esp)
- andl %ecx,%esi
- rorl $7,%ebx
+ movl %eax,%ebp
+ xorl %ecx,%esi
+ roll $5,%eax
movdqa %xmm1,%xmm2
+ addl %esi,%edi
paddd %xmm5,%xmm1
- addl %ebp,%edi
- movl %eax,%ebp
+ xorl %ebx,%ebp
pxor %xmm0,%xmm6
- roll $5,%eax
- addl %esi,%edi
- xorl %edx,%ecx
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 36(%esp),%edx
+ andl %ebx,%ebp
movdqa %xmm6,%xmm0
movdqa %xmm1,16(%esp)
- movl %ebx,%esi
xorl %ecx,%ebx
- addl 36(%esp),%edx
- andl %ecx,%esi
- pslld $2,%xmm6
- andl %ebx,%ebp
rorl $7,%eax
- psrld $30,%xmm0
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
roll $5,%edi
+ pslld $2,%xmm6
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm0
+ xorl %ebx,%eax
addl %edi,%edx
- por %xmm0,%xmm6
- movl %eax,%ebp
- xorl %ebx,%eax
- movdqa 96(%esp),%xmm0
addl 40(%esp),%ecx
- andl %ebx,%ebp
andl %eax,%esi
+ xorl %ebx,%eax
rorl $7,%edi
- addl %ebp,%ecx
- movdqa %xmm6,%xmm1
+ por %xmm0,%xmm6
movl %edx,%ebp
+ xorl %eax,%esi
+ movdqa 96(%esp),%xmm0
roll $5,%edx
addl %esi,%ecx
- xorl %ebx,%eax
+ xorl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%esi
- xorl %eax,%edi
+ pshufd $238,%xmm5,%xmm1
addl 44(%esp),%ebx
- andl %eax,%esi
andl %edi,%ebp
+ xorl %eax,%edi
rorl $7,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
roll $5,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%ebp
+ addl 48(%esp),%eax
pxor %xmm3,%xmm7
-.byte 102,15,58,15,205,8
+ punpcklqdq %xmm6,%xmm1
+ andl %edx,%esi
xorl %edi,%edx
- addl 48(%esp),%eax
- andl %edi,%ebp
+ rorl $7,%ecx
pxor %xmm0,%xmm7
movdqa %xmm3,96(%esp)
- andl %edx,%esi
- rorl $7,%ecx
+ movl %ebx,%ebp
+ xorl %edx,%esi
+ roll $5,%ebx
movdqa 144(%esp),%xmm3
+ addl %esi,%eax
paddd %xmm6,%xmm2
- addl %ebp,%eax
- movl %ebx,%ebp
+ xorl %ecx,%ebp
pxor %xmm1,%xmm7
- roll $5,%ebx
- addl %esi,%eax
- xorl %edi,%edx
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 52(%esp),%edi
+ andl %ecx,%ebp
movdqa %xmm7,%xmm1
movdqa %xmm2,32(%esp)
- movl %ecx,%esi
xorl %edx,%ecx
- addl 52(%esp),%edi
- andl %edx,%esi
- pslld $2,%xmm7
- andl %ecx,%ebp
rorl $7,%ebx
- psrld $30,%xmm1
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
roll $5,%eax
+ pslld $2,%xmm7
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm1
+ xorl %ecx,%ebx
addl %eax,%edi
- por %xmm1,%xmm7
- movl %ebx,%ebp
- xorl %ecx,%ebx
- movdqa 64(%esp),%xmm1
addl 56(%esp),%edx
- andl %ecx,%ebp
andl %ebx,%esi
+ xorl %ecx,%ebx
rorl $7,%eax
- addl %ebp,%edx
- movdqa %xmm7,%xmm2
+ por %xmm1,%xmm7
movl %edi,%ebp
+ xorl %ebx,%esi
+ movdqa 64(%esp),%xmm1
roll $5,%edi
addl %esi,%edx
- xorl %ecx,%ebx
+ xorl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%esi
- xorl %ebx,%eax
+ pshufd $238,%xmm6,%xmm2
addl 60(%esp),%ecx
- andl %ebx,%esi
andl %eax,%ebp
+ xorl %ebx,%eax
rorl $7,%edi
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
roll $5,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%ebp
+ addl (%esp),%ebx
pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
+ punpcklqdq %xmm7,%xmm2
+ andl %edi,%esi
xorl %eax,%edi
- addl (%esp),%ebx
- andl %eax,%ebp
+ rorl $7,%edx
pxor %xmm1,%xmm0
movdqa %xmm4,64(%esp)
- andl %edi,%esi
- rorl $7,%edx
+ movl %ecx,%ebp
+ xorl %edi,%esi
+ roll $5,%ecx
movdqa %xmm3,%xmm4
+ addl %esi,%ebx
paddd %xmm7,%xmm3
- addl %ebp,%ebx
- movl %ecx,%ebp
+ xorl %edx,%ebp
pxor %xmm2,%xmm0
- roll $5,%ecx
- addl %esi,%ebx
- xorl %eax,%edi
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 4(%esp),%eax
+ andl %edx,%ebp
movdqa %xmm0,%xmm2
movdqa %xmm3,48(%esp)
- movl %edx,%esi
xorl %edi,%edx
- addl 4(%esp),%eax
- andl %edi,%esi
- pslld $2,%xmm0
- andl %edx,%ebp
rorl $7,%ecx
- psrld $30,%xmm2
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
+ pslld $2,%xmm0
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm2
+ xorl %edx,%ecx
addl %ebx,%eax
- por %xmm2,%xmm0
- movl %ecx,%ebp
- xorl %edx,%ecx
- movdqa 80(%esp),%xmm2
addl 8(%esp),%edi
- andl %edx,%ebp
andl %ecx,%esi
+ xorl %edx,%ecx
rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm0,%xmm3
+ por %xmm2,%xmm0
movl %eax,%ebp
+ xorl %ecx,%esi
+ movdqa 80(%esp),%xmm2
roll $5,%eax
addl %esi,%edi
- xorl %edx,%ecx
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%esi
- xorl %ecx,%ebx
+ pshufd $238,%xmm7,%xmm3
addl 12(%esp),%edx
- andl %ecx,%esi
andl %ebx,%ebp
+ xorl %ecx,%ebx
rorl $7,%eax
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
roll $5,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%ebp
+ addl 16(%esp),%ecx
pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
+ punpcklqdq %xmm0,%xmm3
+ andl %eax,%esi
xorl %ebx,%eax
- addl 16(%esp),%ecx
- andl %ebx,%ebp
+ rorl $7,%edi
pxor %xmm2,%xmm1
movdqa %xmm5,80(%esp)
- andl %eax,%esi
- rorl $7,%edi
+ movl %edx,%ebp
+ xorl %eax,%esi
+ roll $5,%edx
movdqa %xmm4,%xmm5
+ addl %esi,%ecx
paddd %xmm0,%xmm4
- addl %ebp,%ecx
- movl %edx,%ebp
+ xorl %edi,%ebp
pxor %xmm3,%xmm1
- roll $5,%edx
- addl %esi,%ecx
- xorl %ebx,%eax
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 20(%esp),%ebx
+ andl %edi,%ebp
movdqa %xmm1,%xmm3
movdqa %xmm4,(%esp)
- movl %edi,%esi
xorl %eax,%edi
- addl 20(%esp),%ebx
- andl %eax,%esi
- pslld $2,%xmm1
- andl %edi,%ebp
rorl $7,%edx
- psrld $30,%xmm3
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
roll $5,%ecx
+ pslld $2,%xmm1
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ psrld $30,%xmm3
+ xorl %edi,%edx
addl %ecx,%ebx
- por %xmm3,%xmm1
- movl %edx,%ebp
- xorl %edi,%edx
- movdqa 96(%esp),%xmm3
addl 24(%esp),%eax
- andl %edi,%ebp
andl %edx,%esi
+ xorl %edi,%edx
rorl $7,%ecx
- addl %ebp,%eax
- movdqa %xmm1,%xmm4
+ por %xmm3,%xmm1
movl %ebx,%ebp
+ xorl %edx,%esi
+ movdqa 96(%esp),%xmm3
roll $5,%ebx
addl %esi,%eax
- xorl %edi,%edx
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
- movl %ecx,%esi
- xorl %edx,%ecx
+ pshufd $238,%xmm0,%xmm4
addl 28(%esp),%edi
- andl %edx,%esi
andl %ecx,%ebp
+ xorl %edx,%ecx
rorl $7,%ebx
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
roll $5,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%ebp
+ addl 32(%esp),%edx
pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
+ punpcklqdq %xmm1,%xmm4
+ andl %ebx,%esi
xorl %ecx,%ebx
- addl 32(%esp),%edx
- andl %ecx,%ebp
+ rorl $7,%eax
pxor %xmm3,%xmm2
movdqa %xmm6,96(%esp)
- andl %ebx,%esi
- rorl $7,%eax
+ movl %edi,%ebp
+ xorl %ebx,%esi
+ roll $5,%edi
movdqa %xmm5,%xmm6
+ addl %esi,%edx
paddd %xmm1,%xmm5
- addl %ebp,%edx
- movl %edi,%ebp
+ xorl %eax,%ebp
pxor %xmm4,%xmm2
- roll $5,%edi
- addl %esi,%edx
- xorl %ecx,%ebx
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 36(%esp),%ecx
+ andl %eax,%ebp
movdqa %xmm2,%xmm4
movdqa %xmm5,16(%esp)
- movl %eax,%esi
xorl %ebx,%eax
- addl 36(%esp),%ecx
- andl %ebx,%esi
- pslld $2,%xmm2
- andl %eax,%ebp
rorl $7,%edi
- psrld $30,%xmm4
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
roll $5,%edx
+ pslld $2,%xmm2
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ psrld $30,%xmm4
+ xorl %eax,%edi
addl %edx,%ecx
- por %xmm4,%xmm2
- movl %edi,%ebp
- xorl %eax,%edi
- movdqa 64(%esp),%xmm4
addl 40(%esp),%ebx
- andl %eax,%ebp
andl %edi,%esi
+ xorl %eax,%edi
rorl $7,%edx
- addl %ebp,%ebx
- movdqa %xmm2,%xmm5
+ por %xmm4,%xmm2
movl %ecx,%ebp
+ xorl %edi,%esi
+ movdqa 64(%esp),%xmm4
roll $5,%ecx
addl %esi,%ebx
- xorl %eax,%edi
+ xorl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%esi
- xorl %edi,%edx
+ pshufd $238,%xmm1,%xmm5
addl 44(%esp),%eax
- andl %edi,%esi
andl %edx,%ebp
+ xorl %edi,%edx
rorl $7,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %edx,%esi
addl %ebx,%eax
addl 48(%esp),%edi
pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %edx,%esi
+ punpcklqdq %xmm2,%xmm5
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
pxor %xmm4,%xmm3
movdqa %xmm7,64(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
movdqa %xmm6,%xmm7
+ rorl $7,%ebx
paddd %xmm2,%xmm6
- rorl $7,%ebx
- addl %esi,%edi
+ addl %eax,%edi
pxor %xmm5,%xmm3
addl 52(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
movdqa %xmm3,%xmm5
movdqa %xmm6,32(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
pslld $2,%xmm3
addl 56(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
psrld $30,%xmm5
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
por %xmm5,%xmm3
addl 60(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
addl (%esp),%eax
- paddd %xmm3,%xmm7
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
- movdqa %xmm7,48(%esp)
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ paddd %xmm3,%xmm7
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
addl 4(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
+ movdqa %xmm7,48(%esp)
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
addl 8(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
addl 12(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L005done
+ je .L007done
movdqa 160(%esp),%xmm7
movdqa 176(%esp),%xmm6
movdqu (%ebp),%xmm0
@@ -6228,113 +6521,112 @@
movl %ebp,196(%esp)
movdqa %xmm7,96(%esp)
addl 16(%esp),%ebx
- xorl %eax,%esi
-.byte 102,15,56,0,206
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- paddd %xmm7,%xmm0
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+.byte 102,15,56,0,206
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
- movdqa %xmm0,(%esp)
addl 20(%esp),%eax
- xorl %edi,%ebp
- psubd %xmm7,%xmm0
+ xorl %edx,%ebp
movl %ebx,%esi
+ paddd %xmm7,%xmm0
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ movdqa %xmm0,(%esp)
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
+ psubd %xmm7,%xmm0
roll $5,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
-.byte 102,15,56,0,214
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
- paddd %xmm7,%xmm1
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+.byte 102,15,56,0,214
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
- movdqa %xmm1,16(%esp)
addl 36(%esp),%ebx
- xorl %eax,%ebp
- psubd %xmm7,%xmm1
+ xorl %edi,%ebp
movl %ecx,%esi
+ paddd %xmm7,%xmm1
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ movdqa %xmm1,16(%esp)
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
+ psubd %xmm7,%xmm1
roll $5,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
-.byte 102,15,56,0,222
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- paddd %xmm7,%xmm2
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+.byte 102,15,56,0,222
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
- movdqa %xmm2,32(%esp)
addl 52(%esp),%ecx
- xorl %ebx,%ebp
- psubd %xmm7,%xmm2
+ xorl %eax,%ebp
movl %edx,%esi
+ paddd %xmm7,%xmm2
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ movdqa %xmm2,32(%esp)
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
+ psubd %xmm7,%xmm2
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
addl 4(%ebp),%esi
@@ -6344,109 +6636,112 @@
movl %esi,4(%ebp)
addl 16(%ebp),%edi
movl %ecx,8(%ebp)
- movl %esi,%ebx
+ movl %ecx,%ebx
movl %edx,12(%ebp)
+ xorl %edx,%ebx
movl %edi,16(%ebp)
- movdqa %xmm1,%xmm4
- jmp .L004loop
+ movl %esi,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L006loop
.align 16
-.L005done:
+.L007done:
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 20(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %esi,%ecx
addl 36(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
addl %eax,%edi
- rorl $7,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
addl %edi,%edx
- rorl $7,%eax
- addl %esi,%edx
addl 52(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
addl %edx,%ecx
- rorl $7,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ rorl $7,%ecx
addl %ebx,%eax
- rorl $7,%ecx
- addl %ebp,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
movl 204(%esp),%esp
@@ -6472,10 +6767,10 @@
pushl %ebx
pushl %esi
pushl %edi
- call .L006pic_point
-.L006pic_point:
+ call .L008pic_point
+.L008pic_point:
popl %ebp
- leal .LK_XX_XX-.L006pic_point(%ebp),%ebp
+ leal .LK_XX_XX-.L008pic_point(%ebp),%ebp
.Lavx_shortcut:
vzeroall
vmovdqa (%ebp),%xmm7
@@ -6520,893 +6815,874 @@
vpaddd %xmm7,%xmm1,%xmm5
vpaddd %xmm7,%xmm2,%xmm6
vmovdqa %xmm4,(%esp)
+ movl %ecx,%ebp
vmovdqa %xmm5,16(%esp)
+ xorl %edx,%ebp
vmovdqa %xmm6,32(%esp)
- jmp .L007loop
+ andl %ebp,%esi
+ jmp .L009loop
.align 16
-.L007loop:
- addl (%esp),%edi
- xorl %edx,%ecx
+.L009loop:
+ shrdl $2,%ebx,%ebx
+ xorl %edx,%esi
vpalignr $8,%xmm0,%xmm1,%xmm4
movl %eax,%ebp
- shldl $5,%eax,%eax
+ addl (%esp),%edi
vpaddd %xmm3,%xmm7,%xmm7
vmovdqa %xmm0,64(%esp)
- andl %ecx,%esi
- xorl %edx,%ecx
+ xorl %ecx,%ebx
+ shldl $5,%eax,%eax
vpsrldq $4,%xmm3,%xmm6
- xorl %edx,%esi
+ addl %esi,%edi
+ andl %ebx,%ebp
+ vpxor %xmm0,%xmm4,%xmm4
+ xorl %ecx,%ebx
addl %eax,%edi
- vpxor %xmm0,%xmm4,%xmm4
- shrdl $2,%ebx,%ebx
- addl %esi,%edi
vpxor %xmm2,%xmm6,%xmm6
- addl 4(%esp),%edx
- xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ xorl %ecx,%ebp
vmovdqa %xmm7,48(%esp)
movl %edi,%esi
+ addl 4(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ebx,%eax
shldl $5,%edi,%edi
- vpxor %xmm6,%xmm4,%xmm4
- andl %ebx,%ebp
- xorl %ecx,%ebx
- xorl %ecx,%ebp
- addl %edi,%edx
+ addl %ebp,%edx
+ andl %eax,%esi
vpsrld $31,%xmm4,%xmm6
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 8(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%esi
vpslldq $12,%xmm4,%xmm0
vpaddd %xmm4,%xmm4,%xmm4
movl %edx,%ebp
+ addl 8(%esp),%ecx
+ xorl %eax,%edi
shldl $5,%edx,%edx
- andl %eax,%esi
- xorl %ebx,%eax
vpsrld $30,%xmm0,%xmm7
vpor %xmm6,%xmm4,%xmm4
- xorl %ebx,%esi
+ addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpslld $2,%xmm0,%xmm0
- addl 12(%esp),%ebx
- xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ xorl %eax,%ebp
vpxor %xmm7,%xmm4,%xmm4
movl %ecx,%esi
+ addl 12(%esp),%ebx
+ xorl %edi,%edx
shldl $5,%ecx,%ecx
- andl %edi,%ebp
- xorl %eax,%edi
vpxor %xmm0,%xmm4,%xmm4
- xorl %eax,%ebp
- addl %ecx,%ebx
+ addl %ebp,%ebx
+ andl %edx,%esi
vmovdqa 96(%esp),%xmm0
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
- addl 16(%esp),%eax
xorl %edi,%edx
+ addl %ecx,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
vpalignr $8,%xmm1,%xmm2,%xmm5
movl %ebx,%ebp
- shldl $5,%ebx,%ebx
+ addl 16(%esp),%eax
vpaddd %xmm4,%xmm0,%xmm0
vmovdqa %xmm1,80(%esp)
- andl %edx,%esi
- xorl %edi,%edx
+ xorl %edx,%ecx
+ shldl $5,%ebx,%ebx
vpsrldq $4,%xmm4,%xmm7
- xorl %edi,%esi
+ addl %esi,%eax
+ andl %ecx,%ebp
+ vpxor %xmm1,%xmm5,%xmm5
+ xorl %edx,%ecx
addl %ebx,%eax
- vpxor %xmm1,%xmm5,%xmm5
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
vpxor %xmm3,%xmm7,%xmm7
- addl 20(%esp),%edi
- xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%ebp
vmovdqa %xmm0,(%esp)
movl %eax,%esi
+ addl 20(%esp),%edi
+ vpxor %xmm7,%xmm5,%xmm5
+ xorl %ecx,%ebx
shldl $5,%eax,%eax
- vpxor %xmm7,%xmm5,%xmm5
- andl %ecx,%ebp
- xorl %edx,%ecx
- xorl %edx,%ebp
- addl %eax,%edi
+ addl %ebp,%edi
+ andl %ebx,%esi
vpsrld $31,%xmm5,%xmm7
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
- addl 24(%esp),%edx
xorl %ecx,%ebx
+ addl %eax,%edi
+ shrdl $7,%eax,%eax
+ xorl %ecx,%esi
vpslldq $12,%xmm5,%xmm1
vpaddd %xmm5,%xmm5,%xmm5
movl %edi,%ebp
+ addl 24(%esp),%edx
+ xorl %ebx,%eax
shldl $5,%edi,%edi
- andl %ebx,%esi
- xorl %ecx,%ebx
vpsrld $30,%xmm1,%xmm0
vpor %xmm7,%xmm5,%xmm5
- xorl %ecx,%esi
+ addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
vpslld $2,%xmm1,%xmm1
- addl 28(%esp),%ecx
- xorl %ebx,%eax
+ shrdl $7,%edi,%edi
+ xorl %ebx,%ebp
vpxor %xmm0,%xmm5,%xmm5
movl %edx,%esi
+ addl 28(%esp),%ecx
+ xorl %eax,%edi
shldl $5,%edx,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
vpxor %xmm1,%xmm5,%xmm5
- xorl %ebx,%ebp
- addl %edx,%ecx
+ addl %ebp,%ecx
+ andl %edi,%esi
vmovdqa 112(%esp),%xmm1
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
- addl 32(%esp),%ebx
xorl %eax,%edi
+ addl %edx,%ecx
+ shrdl $7,%edx,%edx
+ xorl %eax,%esi
vpalignr $8,%xmm2,%xmm3,%xmm6
movl %ecx,%ebp
- shldl $5,%ecx,%ecx
+ addl 32(%esp),%ebx
vpaddd %xmm5,%xmm1,%xmm1
vmovdqa %xmm2,96(%esp)
- andl %edi,%esi
- xorl %eax,%edi
+ xorl %edi,%edx
+ shldl $5,%ecx,%ecx
vpsrldq $4,%xmm5,%xmm0
- xorl %eax,%esi
+ addl %esi,%ebx
+ andl %edx,%ebp
+ vpxor %xmm2,%xmm6,%xmm6
+ xorl %edi,%edx
addl %ecx,%ebx
- vpxor %xmm2,%xmm6,%xmm6
- shrdl $7,%edx,%edx
- addl %esi,%ebx
vpxor %xmm4,%xmm0,%xmm0
- addl 36(%esp),%eax
- xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%ebp
vmovdqa %xmm1,16(%esp)
movl %ebx,%esi
+ addl 36(%esp),%eax
+ vpxor %xmm0,%xmm6,%xmm6
+ xorl %edx,%ecx
shldl $5,%ebx,%ebx
- vpxor %xmm0,%xmm6,%xmm6
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
- addl %ebx,%eax
+ addl %ebp,%eax
+ andl %ecx,%esi
vpsrld $31,%xmm6,%xmm0
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
- addl 40(%esp),%edi
xorl %edx,%ecx
+ addl %ebx,%eax
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%esi
vpslldq $12,%xmm6,%xmm2
vpaddd %xmm6,%xmm6,%xmm6
movl %eax,%ebp
+ addl 40(%esp),%edi
+ xorl %ecx,%ebx
shldl $5,%eax,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
vpsrld $30,%xmm2,%xmm1
vpor %xmm0,%xmm6,%xmm6
- xorl %edx,%esi
+ addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
vpslld $2,%xmm2,%xmm2
vmovdqa 64(%esp),%xmm0
- addl 44(%esp),%edx
- xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
+ xorl %ecx,%ebp
vpxor %xmm1,%xmm6,%xmm6
movl %edi,%esi
+ addl 44(%esp),%edx
+ xorl %ebx,%eax
shldl $5,%edi,%edi
- andl %ebx,%ebp
- xorl %ecx,%ebx
vpxor %xmm2,%xmm6,%xmm6
- xorl %ecx,%ebp
- addl %edi,%edx
+ addl %ebp,%edx
+ andl %eax,%esi
vmovdqa 112(%esp),%xmm2
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 48(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%esi
vpalignr $8,%xmm3,%xmm4,%xmm7
movl %edx,%ebp
- shldl $5,%edx,%edx
+ addl 48(%esp),%ecx
vpaddd %xmm6,%xmm2,%xmm2
vmovdqa %xmm3,64(%esp)
- andl %eax,%esi
- xorl %ebx,%eax
+ xorl %eax,%edi
+ shldl $5,%edx,%edx
vpsrldq $4,%xmm6,%xmm1
- xorl %ebx,%esi
+ addl %esi,%ecx
+ andl %edi,%ebp
+ vpxor %xmm3,%xmm7,%xmm7
+ xorl %eax,%edi
addl %edx,%ecx
- vpxor %xmm3,%xmm7,%xmm7
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpxor %xmm5,%xmm1,%xmm1
- addl 52(%esp),%ebx
- xorl %eax,%edi
+ shrdl $7,%edx,%edx
+ xorl %eax,%ebp
vmovdqa %xmm2,32(%esp)
movl %ecx,%esi
+ addl 52(%esp),%ebx
+ vpxor %xmm1,%xmm7,%xmm7
+ xorl %edi,%edx
shldl $5,%ecx,%ecx
- vpxor %xmm1,%xmm7,%xmm7
- andl %edi,%ebp
- xorl %eax,%edi
- xorl %eax,%ebp
- addl %ecx,%ebx
+ addl %ebp,%ebx
+ andl %edx,%esi
vpsrld $31,%xmm7,%xmm1
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
- addl 56(%esp),%eax
xorl %edi,%edx
+ addl %ecx,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
vpslldq $12,%xmm7,%xmm3
vpaddd %xmm7,%xmm7,%xmm7
movl %ebx,%ebp
+ addl 56(%esp),%eax
+ xorl %edx,%ecx
shldl $5,%ebx,%ebx
- andl %edx,%esi
- xorl %edi,%edx
vpsrld $30,%xmm3,%xmm2
vpor %xmm1,%xmm7,%xmm7
- xorl %edi,%esi
+ addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
vpslld $2,%xmm3,%xmm3
vmovdqa 80(%esp),%xmm1
- addl 60(%esp),%edi
- xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
+ xorl %edx,%ebp
vpxor %xmm2,%xmm7,%xmm7
movl %eax,%esi
+ addl 60(%esp),%edi
+ xorl %ecx,%ebx
shldl $5,%eax,%eax
- andl %ecx,%ebp
- xorl %edx,%ecx
vpxor %xmm3,%xmm7,%xmm7
- xorl %edx,%ebp
+ addl %ebp,%edi
+ andl %ebx,%esi
+ vmovdqa 112(%esp),%xmm3
+ xorl %ecx,%ebx
addl %eax,%edi
- vmovdqa 112(%esp),%xmm3
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
vpalignr $8,%xmm6,%xmm7,%xmm2
vpxor %xmm4,%xmm0,%xmm0
+ shrdl $7,%eax,%eax
+ xorl %ecx,%esi
+ movl %edi,%ebp
addl (%esp),%edx
- xorl %ecx,%ebx
- movl %edi,%ebp
- shldl $5,%edi,%edi
vpxor %xmm1,%xmm0,%xmm0
vmovdqa %xmm4,80(%esp)
- andl %ebx,%esi
- xorl %ecx,%ebx
+ xorl %ebx,%eax
+ shldl $5,%edi,%edi
vmovdqa %xmm3,%xmm4
vpaddd %xmm7,%xmm3,%xmm3
- xorl %ecx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ andl %eax,%ebp
vpxor %xmm2,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- addl %esi,%edx
- addl 4(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ shrdl $7,%edi,%edi
+ xorl %ebx,%ebp
vpsrld $30,%xmm0,%xmm2
vmovdqa %xmm3,48(%esp)
movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
shldl $5,%edx,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
vpslld $2,%xmm0,%xmm0
- xorl %ebx,%ebp
- addl %edx,%ecx
- shrdl $7,%edi,%edi
addl %ebp,%ecx
- addl 8(%esp),%ebx
+ andl %edi,%esi
xorl %eax,%edi
+ addl %edx,%ecx
+ shrdl $7,%edx,%edx
+ xorl %eax,%esi
movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ vpor %xmm2,%xmm0,%xmm0
+ xorl %edi,%edx
shldl $5,%ecx,%ecx
- vpor %xmm2,%xmm0,%xmm0
- andl %edi,%esi
- xorl %eax,%edi
vmovdqa 96(%esp),%xmm2
- xorl %eax,%esi
+ addl %esi,%ebx
+ andl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 12(%esp),%eax
- xorl %edi,%edx
+ xorl %edi,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vpalignr $8,%xmm7,%xmm0,%xmm3
vpxor %xmm5,%xmm1,%xmm1
addl 16(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
vpxor %xmm2,%xmm1,%xmm1
vmovdqa %xmm5,96(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
vmovdqa %xmm4,%xmm5
vpaddd %xmm0,%xmm4,%xmm4
shrdl $7,%ebx,%ebx
- addl %esi,%edi
+ addl %eax,%edi
vpxor %xmm3,%xmm1,%xmm1
addl 20(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
vpsrld $30,%xmm1,%xmm3
vmovdqa %xmm4,(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
vpslld $2,%xmm1,%xmm1
addl 24(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpor %xmm3,%xmm1,%xmm1
addl 28(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
vmovdqa 64(%esp),%xmm3
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
vpalignr $8,%xmm0,%xmm1,%xmm4
vpxor %xmm6,%xmm2,%xmm2
addl 32(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
vpxor %xmm3,%xmm2,%xmm2
vmovdqa %xmm6,64(%esp)
- xorl %edx,%esi
- addl %ebx,%eax
+ addl %esi,%eax
+ xorl %edx,%ebp
vmovdqa 128(%esp),%xmm6
vpaddd %xmm1,%xmm5,%xmm5
shrdl $7,%ecx,%ecx
- addl %esi,%eax
+ addl %ebx,%eax
vpxor %xmm4,%xmm2,%xmm2
addl 36(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
vpsrld $30,%xmm2,%xmm4
vmovdqa %xmm5,16(%esp)
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
vpslld $2,%xmm2,%xmm2
addl 40(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
vpor %xmm4,%xmm2,%xmm2
addl 44(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
vmovdqa 80(%esp),%xmm4
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
vpalignr $8,%xmm1,%xmm2,%xmm5
vpxor %xmm7,%xmm3,%xmm3
addl 48(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
vpxor %xmm4,%xmm3,%xmm3
vmovdqa %xmm7,80(%esp)
- xorl %edi,%esi
- addl %ecx,%ebx
+ addl %esi,%ebx
+ xorl %edi,%ebp
vmovdqa %xmm6,%xmm7
vpaddd %xmm2,%xmm6,%xmm6
shrdl $7,%edx,%edx
- addl %esi,%ebx
+ addl %ecx,%ebx
vpxor %xmm5,%xmm3,%xmm3
addl 52(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
vpsrld $30,%xmm3,%xmm5
vmovdqa %xmm6,32(%esp)
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vpslld $2,%xmm3,%xmm3
addl 56(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
vpor %xmm5,%xmm3,%xmm3
addl 60(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
vmovdqa 96(%esp),%xmm5
movl %edi,%esi
shldl $5,%edi,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
vpalignr $8,%xmm2,%xmm3,%xmm6
vpxor %xmm0,%xmm4,%xmm4
addl (%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
vpxor %xmm5,%xmm4,%xmm4
vmovdqa %xmm0,96(%esp)
- xorl %eax,%esi
- addl %edx,%ecx
+ addl %esi,%ecx
+ xorl %eax,%ebp
vmovdqa %xmm7,%xmm0
vpaddd %xmm3,%xmm7,%xmm7
shrdl $7,%edi,%edi
- addl %esi,%ecx
+ addl %edx,%ecx
vpxor %xmm6,%xmm4,%xmm4
addl 4(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
vpsrld $30,%xmm4,%xmm6
vmovdqa %xmm7,48(%esp)
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
vpslld $2,%xmm4,%xmm4
addl 8(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
vpor %xmm6,%xmm4,%xmm4
addl 12(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
vmovdqa 64(%esp),%xmm6
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
vpalignr $8,%xmm3,%xmm4,%xmm7
vpxor %xmm1,%xmm5,%xmm5
addl 16(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
vpxor %xmm6,%xmm5,%xmm5
vmovdqa %xmm1,64(%esp)
- xorl %ebx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ xorl %ebx,%ebp
vmovdqa %xmm0,%xmm1
vpaddd %xmm4,%xmm0,%xmm0
shrdl $7,%eax,%eax
- addl %esi,%edx
+ addl %edi,%edx
vpxor %xmm7,%xmm5,%xmm5
addl 20(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
vpsrld $30,%xmm5,%xmm7
vmovdqa %xmm0,(%esp)
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
vpslld $2,%xmm5,%xmm5
addl 24(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
vpor %xmm7,%xmm5,%xmm5
addl 28(%esp),%eax
- xorl %edi,%ebp
vmovdqa 80(%esp),%xmm7
+ shrdl $7,%ecx,%ecx
movl %ebx,%esi
+ xorl %edx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vpalignr $8,%xmm4,%xmm5,%xmm0
vpxor %xmm2,%xmm6,%xmm6
- movl %ecx,%ebp
+ addl 32(%esp),%edi
+ andl %ecx,%esi
xorl %edx,%ecx
- addl 32(%esp),%edi
- andl %edx,%ebp
+ shrdl $7,%ebx,%ebx
vpxor %xmm7,%xmm6,%xmm6
vmovdqa %xmm2,80(%esp)
- andl %ecx,%esi
- shrdl $7,%ebx,%ebx
+ movl %eax,%ebp
+ xorl %ecx,%esi
vmovdqa %xmm1,%xmm2
vpaddd %xmm5,%xmm1,%xmm1
- addl %ebp,%edi
- movl %eax,%ebp
- vpxor %xmm0,%xmm6,%xmm6
shldl $5,%eax,%eax
addl %esi,%edi
- xorl %edx,%ecx
+ vpxor %xmm0,%xmm6,%xmm6
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 36(%esp),%edx
vpsrld $30,%xmm6,%xmm0
vmovdqa %xmm1,16(%esp)
- movl %ebx,%esi
+ andl %ebx,%ebp
xorl %ecx,%ebx
- addl 36(%esp),%edx
- andl %ecx,%esi
- vpslld $2,%xmm6,%xmm6
- andl %ebx,%ebp
shrdl $7,%eax,%eax
- addl %esi,%edx
movl %edi,%esi
+ vpslld $2,%xmm6,%xmm6
+ xorl %ebx,%ebp
shldl $5,%edi,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 40(%esp),%ecx
+ andl %eax,%esi
vpor %xmm0,%xmm6,%xmm6
- movl %eax,%ebp
xorl %ebx,%eax
+ shrdl $7,%edi,%edi
vmovdqa 96(%esp),%xmm0
- addl 40(%esp),%ecx
- andl %ebx,%ebp
- andl %eax,%esi
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
movl %edx,%ebp
+ xorl %eax,%esi
shldl $5,%edx,%edx
addl %esi,%ecx
- xorl %ebx,%eax
+ xorl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%esi
- xorl %eax,%edi
addl 44(%esp),%ebx
- andl %eax,%esi
andl %edi,%ebp
+ xorl %eax,%edi
shrdl $7,%edx,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
shldl $5,%ecx,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
vpalignr $8,%xmm5,%xmm6,%xmm1
vpxor %xmm3,%xmm7,%xmm7
- movl %edx,%ebp
+ addl 48(%esp),%eax
+ andl %edx,%esi
xorl %edi,%edx
- addl 48(%esp),%eax
- andl %edi,%ebp
+ shrdl $7,%ecx,%ecx
vpxor %xmm0,%xmm7,%xmm7
vmovdqa %xmm3,96(%esp)
- andl %edx,%esi
- shrdl $7,%ecx,%ecx
+ movl %ebx,%ebp
+ xorl %edx,%esi
vmovdqa 144(%esp),%xmm3
vpaddd %xmm6,%xmm2,%xmm2
- addl %ebp,%eax
- movl %ebx,%ebp
- vpxor %xmm1,%xmm7,%xmm7
shldl $5,%ebx,%ebx
addl %esi,%eax
- xorl %edi,%edx
+ vpxor %xmm1,%xmm7,%xmm7
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 52(%esp),%edi
vpsrld $30,%xmm7,%xmm1
vmovdqa %xmm2,32(%esp)
- movl %ecx,%esi
+ andl %ecx,%ebp
xorl %edx,%ecx
- addl 52(%esp),%edi
- andl %edx,%esi
- vpslld $2,%xmm7,%xmm7
- andl %ecx,%ebp
shrdl $7,%ebx,%ebx
- addl %esi,%edi
movl %eax,%esi
+ vpslld $2,%xmm7,%xmm7
+ xorl %ecx,%ebp
shldl $5,%eax,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 56(%esp),%edx
+ andl %ebx,%esi
vpor %xmm1,%xmm7,%xmm7
- movl %ebx,%ebp
xorl %ecx,%ebx
+ shrdl $7,%eax,%eax
vmovdqa 64(%esp),%xmm1
- addl 56(%esp),%edx
- andl %ecx,%ebp
- andl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
movl %edi,%ebp
+ xorl %ebx,%esi
shldl $5,%edi,%edi
addl %esi,%edx
- xorl %ecx,%ebx
+ xorl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%esi
- xorl %ebx,%eax
addl 60(%esp),%ecx
- andl %ebx,%esi
andl %eax,%ebp
+ xorl %ebx,%eax
shrdl $7,%edi,%edi
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
shldl $5,%edx,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
vpalignr $8,%xmm6,%xmm7,%xmm2
vpxor %xmm4,%xmm0,%xmm0
- movl %edi,%ebp
+ addl (%esp),%ebx
+ andl %edi,%esi
xorl %eax,%edi
- addl (%esp),%ebx
- andl %eax,%ebp
+ shrdl $7,%edx,%edx
vpxor %xmm1,%xmm0,%xmm0
vmovdqa %xmm4,64(%esp)
- andl %edi,%esi
- shrdl $7,%edx,%edx
+ movl %ecx,%ebp
+ xorl %edi,%esi
vmovdqa %xmm3,%xmm4
vpaddd %xmm7,%xmm3,%xmm3
- addl %ebp,%ebx
- movl %ecx,%ebp
- vpxor %xmm2,%xmm0,%xmm0
shldl $5,%ecx,%ecx
addl %esi,%ebx
- xorl %eax,%edi
+ vpxor %xmm2,%xmm0,%xmm0
+ xorl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 4(%esp),%eax
vpsrld $30,%xmm0,%xmm2
vmovdqa %xmm3,48(%esp)
- movl %edx,%esi
+ andl %edx,%ebp
xorl %edi,%edx
- addl 4(%esp),%eax
- andl %edi,%esi
- vpslld $2,%xmm0,%xmm0
- andl %edx,%ebp
shrdl $7,%ecx,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ vpslld $2,%xmm0,%xmm0
+ xorl %edx,%ebp
shldl $5,%ebx,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %ecx,%esi
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 8(%esp),%edi
+ andl %ecx,%esi
vpor %xmm2,%xmm0,%xmm0
- movl %ecx,%ebp
xorl %edx,%ecx
+ shrdl $7,%ebx,%ebx
vmovdqa 80(%esp),%xmm2
- addl 8(%esp),%edi
- andl %edx,%ebp
- andl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
movl %eax,%ebp
+ xorl %ecx,%esi
shldl $5,%eax,%eax
addl %esi,%edi
- xorl %edx,%ecx
+ xorl %ebx,%ebp
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%esi
- xorl %ecx,%ebx
addl 12(%esp),%edx
- andl %ecx,%esi
andl %ebx,%ebp
+ xorl %ecx,%ebx
shrdl $7,%eax,%eax
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
shldl $5,%edi,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
vpalignr $8,%xmm7,%xmm0,%xmm3
vpxor %xmm5,%xmm1,%xmm1
- movl %eax,%ebp
+ addl 16(%esp),%ecx
+ andl %eax,%esi
xorl %ebx,%eax
- addl 16(%esp),%ecx
- andl %ebx,%ebp
+ shrdl $7,%edi,%edi
vpxor %xmm2,%xmm1,%xmm1
vmovdqa %xmm5,80(%esp)
- andl %eax,%esi
- shrdl $7,%edi,%edi
+ movl %edx,%ebp
+ xorl %eax,%esi
vmovdqa %xmm4,%xmm5
vpaddd %xmm0,%xmm4,%xmm4
- addl %ebp,%ecx
- movl %edx,%ebp
- vpxor %xmm3,%xmm1,%xmm1
shldl $5,%edx,%edx
addl %esi,%ecx
- xorl %ebx,%eax
+ vpxor %xmm3,%xmm1,%xmm1
+ xorl %edi,%ebp
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 20(%esp),%ebx
vpsrld $30,%xmm1,%xmm3
vmovdqa %xmm4,(%esp)
- movl %edi,%esi
+ andl %edi,%ebp
xorl %eax,%edi
- addl 20(%esp),%ebx
- andl %eax,%esi
- vpslld $2,%xmm1,%xmm1
- andl %edi,%ebp
shrdl $7,%edx,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ vpslld $2,%xmm1,%xmm1
+ xorl %edi,%ebp
shldl $5,%ecx,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 24(%esp),%eax
+ andl %edx,%esi
vpor %xmm3,%xmm1,%xmm1
- movl %edx,%ebp
xorl %edi,%edx
+ shrdl $7,%ecx,%ecx
vmovdqa 96(%esp),%xmm3
- addl 24(%esp),%eax
- andl %edi,%ebp
- andl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
movl %ebx,%ebp
+ xorl %edx,%esi
shldl $5,%ebx,%ebx
addl %esi,%eax
- xorl %edi,%edx
+ xorl %ecx,%ebp
+ xorl %edx,%ecx
addl %ebx,%eax
- movl %ecx,%esi
- xorl %edx,%ecx
addl 28(%esp),%edi
- andl %edx,%esi
andl %ecx,%ebp
+ xorl %edx,%ecx
shrdl $7,%ebx,%ebx
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
shldl $5,%eax,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
vpalignr $8,%xmm0,%xmm1,%xmm4
vpxor %xmm6,%xmm2,%xmm2
- movl %ebx,%ebp
+ addl 32(%esp),%edx
+ andl %ebx,%esi
xorl %ecx,%ebx
- addl 32(%esp),%edx
- andl %ecx,%ebp
+ shrdl $7,%eax,%eax
vpxor %xmm3,%xmm2,%xmm2
vmovdqa %xmm6,96(%esp)
- andl %ebx,%esi
- shrdl $7,%eax,%eax
+ movl %edi,%ebp
+ xorl %ebx,%esi
vmovdqa %xmm5,%xmm6
vpaddd %xmm1,%xmm5,%xmm5
- addl %ebp,%edx
- movl %edi,%ebp
- vpxor %xmm4,%xmm2,%xmm2
shldl $5,%edi,%edi
addl %esi,%edx
- xorl %ecx,%ebx
+ vpxor %xmm4,%xmm2,%xmm2
+ xorl %eax,%ebp
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 36(%esp),%ecx
vpsrld $30,%xmm2,%xmm4
vmovdqa %xmm5,16(%esp)
- movl %eax,%esi
+ andl %eax,%ebp
xorl %ebx,%eax
- addl 36(%esp),%ecx
- andl %ebx,%esi
- vpslld $2,%xmm2,%xmm2
- andl %eax,%ebp
shrdl $7,%edi,%edi
- addl %esi,%ecx
movl %edx,%esi
+ vpslld $2,%xmm2,%xmm2
+ xorl %eax,%ebp
shldl $5,%edx,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 40(%esp),%ebx
+ andl %edi,%esi
vpor %xmm4,%xmm2,%xmm2
- movl %edi,%ebp
xorl %eax,%edi
+ shrdl $7,%edx,%edx
vmovdqa 64(%esp),%xmm4
- addl 40(%esp),%ebx
- andl %eax,%ebp
- andl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
movl %ecx,%ebp
+ xorl %edi,%esi
shldl $5,%ecx,%ecx
addl %esi,%ebx
- xorl %eax,%edi
+ xorl %edx,%ebp
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%esi
- xorl %edi,%edx
addl 44(%esp),%eax
- andl %edi,%esi
andl %edx,%ebp
+ xorl %edi,%edx
shrdl $7,%ecx,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
shldl $5,%ebx,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %edx,%esi
addl %ebx,%eax
vpalignr $8,%xmm1,%xmm2,%xmm5
vpxor %xmm7,%xmm3,%xmm3
addl 48(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
vpxor %xmm4,%xmm3,%xmm3
vmovdqa %xmm7,64(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
vmovdqa %xmm6,%xmm7
vpaddd %xmm2,%xmm6,%xmm6
shrdl $7,%ebx,%ebx
- addl %esi,%edi
+ addl %eax,%edi
vpxor %xmm5,%xmm3,%xmm3
addl 52(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
vpsrld $30,%xmm3,%xmm5
vmovdqa %xmm6,32(%esp)
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
vpslld $2,%xmm3,%xmm3
addl 56(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vpor %xmm5,%xmm3,%xmm3
addl 60(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
addl (%esp),%eax
vpaddd %xmm3,%xmm7,%xmm7
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
vmovdqa %xmm7,48(%esp)
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
addl 4(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
addl 8(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
addl 12(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L008done
+ je .L010done
vmovdqa 160(%esp),%xmm7
vmovdqa 176(%esp),%xmm6
vmovdqu (%ebp),%xmm0
@@ -7418,110 +7694,109 @@
movl %ebp,196(%esp)
vmovdqa %xmm7,96(%esp)
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
vpshufb %xmm6,%xmm1,%xmm1
movl %ecx,%ebp
shldl $5,%ecx,%ecx
vpaddd %xmm7,%xmm0,%xmm4
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
vmovdqa %xmm4,(%esp)
addl 20(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
vpshufb %xmm6,%xmm2,%xmm2
movl %edx,%ebp
shldl $5,%edx,%edx
vpaddd %xmm7,%xmm1,%xmm5
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
vmovdqa %xmm5,16(%esp)
addl 36(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
vpshufb %xmm6,%xmm3,%xmm3
movl %edi,%ebp
shldl $5,%edi,%edi
vpaddd %xmm7,%xmm2,%xmm6
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
vmovdqa %xmm6,32(%esp)
addl 52(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
addl 4(%ebp),%esi
@@ -7530,109 +7805,112 @@
addl 12(%ebp),%edx
movl %esi,4(%ebp)
addl 16(%ebp),%edi
+ movl %ecx,%ebx
movl %ecx,8(%ebp)
- movl %esi,%ebx
+ xorl %edx,%ebx
movl %edx,12(%ebp)
movl %edi,16(%ebp)
- jmp .L007loop
+ movl %esi,%ebp
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L009loop
.align 16
-.L008done:
+.L010done:
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 20(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ xorl %edx,%esi
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
addl 24(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
movl %eax,%ebp
shldl $5,%eax,%eax
- xorl %ecx,%esi
+ addl %esi,%edi
+ xorl %ecx,%ebp
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %esi,%edi
addl 28(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
shldl $5,%edi,%edi
- xorl %ebx,%ebp
+ addl %ebp,%edx
+ xorl %ebx,%esi
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %ebp,%edx
addl 32(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
movl %edx,%ebp
shldl $5,%edx,%edx
- xorl %eax,%esi
+ addl %esi,%ecx
+ xorl %eax,%ebp
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %esi,%ecx
addl 36(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
shldl $5,%ecx,%ecx
- xorl %edi,%ebp
+ addl %ebp,%ebx
+ xorl %edi,%esi
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %ebp,%ebx
addl 40(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
movl %ebx,%ebp
shldl $5,%ebx,%ebx
- xorl %edx,%esi
+ addl %esi,%eax
+ xorl %edx,%ebp
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %esi,%eax
addl 44(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
shldl $5,%eax,%eax
- xorl %ecx,%ebp
+ addl %ebp,%edi
+ xorl %ecx,%esi
+ shrdl $7,%ebx,%ebx
addl %eax,%edi
- shrdl $7,%ebx,%ebx
- addl %ebp,%edi
addl 48(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
movl %edi,%ebp
shldl $5,%edi,%edi
- xorl %ebx,%esi
+ addl %esi,%edx
+ xorl %ebx,%ebp
+ shrdl $7,%eax,%eax
addl %edi,%edx
- shrdl $7,%eax,%eax
- addl %esi,%edx
addl 52(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
shldl $5,%edx,%edx
- xorl %eax,%ebp
+ addl %ebp,%ecx
+ xorl %eax,%esi
+ shrdl $7,%edi,%edi
addl %edx,%ecx
- shrdl $7,%edi,%edi
- addl %ebp,%ecx
addl 56(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
shldl $5,%ecx,%ecx
- xorl %edi,%esi
+ addl %esi,%ebx
+ xorl %edi,%ebp
+ shrdl $7,%edx,%edx
addl %ecx,%ebx
- shrdl $7,%edx,%edx
- addl %esi,%ebx
addl 60(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
shldl $5,%ebx,%ebx
- xorl %edx,%ebp
+ addl %ebp,%eax
+ shrdl $7,%ecx,%ecx
addl %ebx,%eax
- shrdl $7,%ecx,%ecx
- addl %ebp,%eax
vzeroall
movl 192(%esp),%ebp
addl (%ebp),%eax
@@ -7659,9 +7937,10 @@
.long 2400959708,2400959708,2400959708,2400959708
.long 3395469782,3395469782,3395469782,3395469782
.long 66051,67438087,134810123,202182159
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/sha256-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/sha256-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/sha256-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/sha256-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from sha256-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/sha256-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from sha256-586.pl. */
#ifdef PIC
.file "sha256-586.S"
.text
@@ -29,237 +29,6762 @@
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal OPENSSL_ia32cap_P-.L001K256(%ebp),%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz .L002loop
+ movl 8(%edx),%edx
+ testl $16777216,%ecx
+ jz .L003no_xmm
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ testl $536870912,%edx
+ jnz .L004shaext
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ je .L005AVX
+ testl $512,%ebx
+ jnz .L006SSSE3
+.L003no_xmm:
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae .L007unrolled
+ jmp .L002loop
.align 16
.L002loop:
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
+ bswap %eax
movl 12(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
movl 16(%edi),%eax
movl 20(%edi),%ebx
movl 24(%edi),%ecx
+ bswap %eax
movl 28(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
movl 32(%edi),%eax
movl 36(%edi),%ebx
movl 40(%edi),%ecx
+ bswap %eax
movl 44(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
movl 48(%edi),%eax
movl 52(%edi),%ebx
movl 56(%edi),%ecx
+ bswap %eax
movl 60(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
addl $64,%edi
- subl $32,%esp
- movl %edi,100(%esp)
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edi
- movl %ebx,4(%esp)
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
movl 16(%esi),%edx
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edi
- movl %ebx,20(%esp)
- movl %ecx,24(%esp)
- movl %edi,28(%esp)
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
.align 16
-.L00300_15:
- movl 92(%esp),%ebx
+.L00800_15:
movl %edx,%ecx
+ movl 24(%esp),%esi
rorl $14,%ecx
- movl 20(%esp),%esi
+ movl 28(%esp),%edi
xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
rorl $5,%ecx
- xorl %edx,%ecx
- rorl $6,%ecx
- movl 24(%esp),%edi
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00800_15
+ movl 156(%esp),%ecx
+ jmp .L00916_63
+.align 16
+.L00916_63:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
xorl %edi,%esi
- movl %edx,16(%esp)
- movl %eax,%ecx
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ rorl $5,%ecx
andl %edx,%esi
- movl 12(%esp),%edx
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
xorl %edi,%esi
- movl %eax,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
addl %esi,%ebx
rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00916_63
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K256:
+.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long 66051,67438087,134810123,202182159
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+.align 16
+.L007unrolled:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebp
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebx
+ movl %ebp,4(%esp)
+ xorl %ecx,%ebp
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ jmp .L010grand_loop
+.align 16
+.L010grand_loop:
+ movl (%edi),%ebx
+ movl 4(%edi),%ecx
+ bswap %ebx
+ movl 8(%edi),%esi
+ bswap %ecx
+ movl %ebx,32(%esp)
+ bswap %esi
+ movl %ecx,36(%esp)
+ movl %esi,40(%esp)
+ movl 12(%edi),%ebx
+ movl 16(%edi),%ecx
+ bswap %ebx
+ movl 20(%edi),%esi
+ bswap %ecx
+ movl %ebx,44(%esp)
+ bswap %esi
+ movl %ecx,48(%esp)
+ movl %esi,52(%esp)
+ movl 24(%edi),%ebx
+ movl 28(%edi),%ecx
+ bswap %ebx
+ movl 32(%edi),%esi
+ bswap %ecx
+ movl %ebx,56(%esp)
+ bswap %esi
+ movl %ecx,60(%esp)
+ movl %esi,64(%esp)
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %ebx
+ movl 44(%edi),%esi
+ bswap %ecx
+ movl %ebx,68(%esp)
+ bswap %esi
+ movl %ecx,72(%esp)
+ movl %esi,76(%esp)
+ movl 48(%edi),%ebx
+ movl 52(%edi),%ecx
+ bswap %ebx
+ movl 56(%edi),%esi
+ bswap %ecx
+ movl %ebx,80(%esp)
+ bswap %esi
+ movl %ecx,84(%esp)
+ movl %esi,88(%esp)
+ movl 60(%edi),%ebx
+ addl $64,%edi
+ bswap %ebx
+ movl %edi,100(%esp)
+ movl %ebx,92(%esp)
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1116352408(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 36(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1899447441(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 40(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3049323471(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 44(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3921009573(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 48(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 961987163(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
rorl $2,%ecx
- addl %ebx,%edx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 52(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1508970993(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 56(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2453635748(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 60(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
movl 8(%esp),%edi
- addl %ecx,%ebx
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2870763221(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 64(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3624381080(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 68(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 310598401(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 72(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 607225278(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 76(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- addl %esi,%edx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1426881987(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
addl %esi,%eax
- cmpl $3248222580,%esi
- jne .L00300_15
- movl 152(%esp),%ebx
-.align 16
-.L00416_63:
- movl %ebx,%esi
- movl 100(%esp),%ecx
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 80(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1925078388(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 84(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
rorl $11,%esi
+ andl %ebp,%eax
+ leal 2162078206(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 88(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2614888103(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 92(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3248222580(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3835390401(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
movl %ecx,%edi
+ rorl $2,%ecx
xorl %ebx,%esi
+ shrl $3,%ebx
rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 4022224774(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
shrl $3,%ebx
- rorl $2,%edi
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 264347078(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
xorl %ecx,%edi
- rorl $17,%edi
- shrl $10,%ecx
- addl 156(%esp),%ebx
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
xorl %ecx,%edi
- addl 120(%esp),%ebx
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 604807628(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
addl %edi,%ebx
- rorl $14,%ecx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 770255983(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1249150122(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1555081692(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1996064986(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
movl 20(%esp),%esi
- xorl %edx,%ecx
- rorl $5,%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2554220882(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2821834349(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2952996808(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3210313671(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3336571891(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3584528711(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 113926993(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
movl %ebx,92(%esp)
- xorl %edx,%ecx
- rorl $6,%ecx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 338241895(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
movl 24(%esp),%edi
- addl %ecx,%ebx
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
xorl %edi,%esi
- movl %edx,16(%esp)
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- andl %edx,%esi
- movl 12(%esp),%edx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 666307205(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 773529912(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
xorl %edi,%esi
- movl %eax,%edi
- addl %esi,%ebx
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1294757372(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1396182291(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1695183700(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1986661051(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2177026350(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2456956037(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2730485921(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2820302411(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3259730800(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3345764771(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3516065817(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
rorl $2,%ecx
- addl %ebx,%edx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3600352804(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 4094571909(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
movl 8(%esp),%edi
- addl %ecx,%ebx
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 275423344(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 430227734(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 506948616(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 659060556(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- movl 152(%esp),%ebx
- addl %esi,%edx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 883997877(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
addl %esi,%eax
- cmpl $3329325298,%esi
- jne .L00416_63
- movl 352(%esp),%esi
- movl 4(%esp),%ebx
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 958139571(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1322822218(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1537002063(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1747873779(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1955562222(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2024104815(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2227730452(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2361852424(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2428436474(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2756734187(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3204031479(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3329325298(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebp
+ movl 12(%esp),%ecx
addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%ecx
- addl 12(%esi),%edi
+ addl 4(%esi),%ebp
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %ecx,8(%esi)
- movl %edi,12(%esi)
- movl 20(%esp),%eax
+ movl %ebp,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ecx
- movl 356(%esp),%edi
addl 16(%esi),%edx
- addl 20(%esi),%eax
+ addl 20(%esi),%edi
addl 24(%esi),%ebx
addl 28(%esi),%ecx
movl %edx,16(%esi)
- movl %eax,20(%esi)
+ movl %edi,20(%esi)
movl %ebx,24(%esi)
movl %ecx,28(%esi)
- addl $352,%esp
- subl $256,%ebp
- cmpl 8(%esp),%edi
- jb .L002loop
- movl 12(%esp),%esp
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ cmpl 104(%esp),%edi
+ jb .L010grand_loop
+ movl 108(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
-.align 64
-.L001K256:
-.long 1116352408,1899447441,3049323471,3921009573
-.long 961987163,1508970993,2453635748,2870763221
-.long 3624381080,310598401,607225278,1426881987
-.long 1925078388,2162078206,2614888103,3248222580
-.long 3835390401,4022224774,264347078,604807628
-.long 770255983,1249150122,1555081692,1996064986
-.long 2554220882,2821834349,2952996808,3210313671
-.long 3336571891,3584528711,113926993,338241895
-.long 666307205,773529912,1294757372,1396182291
-.long 1695183700,1986661051,2177026350,2456956037
-.long 2730485921,2820302411,3259730800,3345764771
-.long 3516065817,3600352804,4094571909,275423344
-.long 430227734,506948616,659060556,883997877
-.long 958139571,1322822218,1537002063,1747873779
-.long 1955562222,2024104815,2227730452,2361852424
-.long 2428436474,2756734187,3204031479,3329325298
+.align 32
+.L004shaext:
+ subl $32,%esp
+ movdqu (%esi),%xmm1
+ leal 128(%ebp),%ebp
+ movdqu 16(%esi),%xmm2
+ movdqa 128(%ebp),%xmm7
+ pshufd $27,%xmm1,%xmm0
+ pshufd $177,%xmm1,%xmm1
+ pshufd $27,%xmm2,%xmm2
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp .L011loop_shaext
+.align 16
+.L011loop_shaext:
+ movdqu (%edi),%xmm3
+ movdqu 16(%edi),%xmm4
+ movdqu 32(%edi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%edi),%xmm6
+ movdqa %xmm2,16(%esp)
+ movdqa -128(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,(%esp)
+.byte 15,56,203,202
+ movdqa -112(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ leal 64(%edi),%edi
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -80(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa -64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa -48(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -16(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa (%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 16(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 48(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 80(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+ movdqa 96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa 128(%ebp),%xmm7
+.byte 15,56,203,202
+ movdqa 112(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ cmpl %edi,%eax
+ nop
+.byte 15,56,203,202
+ paddd 16(%esp),%xmm2
+ paddd (%esp),%xmm1
+ jnz .L011loop_shaext
+ pshufd $177,%xmm2,%xmm2
+ pshufd $27,%xmm1,%xmm7
+ pshufd $177,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+ movl 44(%esp),%esp
+ movdqu %xmm1,(%esi)
+ movdqu %xmm2,16(%esi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L006SSSE3:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ movdqa 256(%ebp),%xmm7
+ jmp .L012grand_ssse3
+.align 16
+.L012grand_ssse3:
+ movdqu (%edi),%xmm0
+ movdqu 16(%edi),%xmm1
+ movdqu 32(%edi),%xmm2
+ movdqu 48(%edi),%xmm3
+ addl $64,%edi
+.byte 102,15,56,0,199
+ movl %edi,100(%esp)
+.byte 102,15,56,0,207
+ movdqa (%ebp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 16(%ebp),%xmm5
+ paddd %xmm0,%xmm4
+.byte 102,15,56,0,223
+ movdqa 32(%ebp),%xmm6
+ paddd %xmm1,%xmm5
+ movdqa 48(%ebp),%xmm7
+ movdqa %xmm4,32(%esp)
+ paddd %xmm2,%xmm6
+ movdqa %xmm5,48(%esp)
+ paddd %xmm3,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ jmp .L013ssse3_00_47
+.align 16
+.L013ssse3_00_47:
+ addl $64,%ebp
+ movl %edx,%ecx
+ movdqa %xmm1,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm3,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,224,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,250,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm3,%xmm7
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm0
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm0
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm0,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa (%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm0,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,32(%esp)
+ movl %edx,%ecx
+ movdqa %xmm2,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm0,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,225,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,251,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm0,%xmm7
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm1
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm1
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm1,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 16(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm1,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,48(%esp)
+ movl %edx,%ecx
+ movdqa %xmm3,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm1,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,226,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,248,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm1,%xmm7
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm2
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm2
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm2,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 32(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm2,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,64(%esp)
+ movl %edx,%ecx
+ movdqa %xmm0,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm2,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,227,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,249,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm2,%xmm7
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm3
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm3
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm3,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 48(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm3,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L013ssse3_00_47
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ movdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L012grand_ssse3
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L005AVX:
+ andl $264,%edx
+ cmpl $264,%edx
+ je .L014AVX_BMI
+ leal -96(%esp),%esp
+ vzeroall
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ vmovdqa 256(%ebp),%xmm7
+ jmp .L015grand_avx
+.align 32
+.L015grand_avx:
+ vmovdqu (%edi),%xmm0
+ vmovdqu 16(%edi),%xmm1
+ vmovdqu 32(%edi),%xmm2
+ vmovdqu 48(%edi),%xmm3
+ addl $64,%edi
+ vpshufb %xmm7,%xmm0,%xmm0
+ movl %edi,100(%esp)
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd (%ebp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 16(%ebp),%xmm1,%xmm5
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ vpaddd 48(%ebp),%xmm3,%xmm7
+ vmovdqa %xmm4,32(%esp)
+ vmovdqa %xmm5,48(%esp)
+ vmovdqa %xmm6,64(%esp)
+ vmovdqa %xmm7,80(%esp)
+ jmp .L016avx_00_47
+.align 16
+.L016avx_00_47:
+ addl $64,%ebp
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ vpaddd %xmm7,%xmm0,%xmm0
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ vpshufd $250,%xmm3,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ vpaddd %xmm4,%xmm0,%xmm0
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ vpaddd %xmm7,%xmm0,%xmm0
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm0,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm0,%xmm0
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ vpaddd (%ebp),%xmm0,%xmm6
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,32(%esp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ vpaddd %xmm7,%xmm1,%xmm1
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ vpshufd $250,%xmm0,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ vpaddd %xmm4,%xmm1,%xmm1
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ vpaddd %xmm7,%xmm1,%xmm1
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm1,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm1,%xmm1
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ vpaddd 16(%ebp),%xmm1,%xmm6
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,48(%esp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ vpaddd %xmm7,%xmm2,%xmm2
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ vpshufd $250,%xmm1,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ vpaddd %xmm4,%xmm2,%xmm2
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ vpaddd %xmm7,%xmm2,%xmm2
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm2,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm2,%xmm2
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,64(%esp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ vpaddd %xmm7,%xmm3,%xmm3
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ vpshufd $250,%xmm2,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ vpaddd %xmm4,%xmm3,%xmm3
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ vpaddd %xmm7,%xmm3,%xmm3
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm3,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm3,%xmm3
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ vpaddd 48(%ebp),%xmm3,%xmm6
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L016avx_00_47
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ vmovdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L015grand_avx
+ movl 108(%esp),%esp
+ vzeroall
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L014AVX_BMI:
+ leal -96(%esp),%esp
+ vzeroall
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ vmovdqa 256(%ebp),%xmm7
+ jmp .L017grand_avx_bmi
+.align 32
+.L017grand_avx_bmi:
+ vmovdqu (%edi),%xmm0
+ vmovdqu 16(%edi),%xmm1
+ vmovdqu 32(%edi),%xmm2
+ vmovdqu 48(%edi),%xmm3
+ addl $64,%edi
+ vpshufb %xmm7,%xmm0,%xmm0
+ movl %edi,100(%esp)
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd (%ebp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 16(%ebp),%xmm1,%xmm5
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ vpaddd 48(%ebp),%xmm3,%xmm7
+ vmovdqa %xmm4,32(%esp)
+ vmovdqa %xmm5,48(%esp)
+ vmovdqa %xmm6,64(%esp)
+ vmovdqa %xmm7,80(%esp)
+ jmp .L018avx_bmi_00_47
+.align 16
+.L018avx_bmi_00_47:
+ addl $64,%ebp
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ vpaddd %xmm7,%xmm0,%xmm0
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 32(%esp),%edx
+ vpshufd $250,%xmm3,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 36(%esp),%edx
+ vpaddd %xmm4,%xmm0,%xmm0
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm0,%xmm0
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm0,%xmm7
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 40(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm0,%xmm0
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 44(%esp),%edx
+ vpaddd (%ebp),%xmm0,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,32(%esp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ vpaddd %xmm7,%xmm1,%xmm1
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 48(%esp),%edx
+ vpshufd $250,%xmm0,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 52(%esp),%edx
+ vpaddd %xmm4,%xmm1,%xmm1
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm1,%xmm1
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm1,%xmm7
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 56(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm1,%xmm1
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 60(%esp),%edx
+ vpaddd 16(%ebp),%xmm1,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,48(%esp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ vpaddd %xmm7,%xmm2,%xmm2
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 64(%esp),%edx
+ vpshufd $250,%xmm1,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 68(%esp),%edx
+ vpaddd %xmm4,%xmm2,%xmm2
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm2,%xmm2
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm2,%xmm7
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 72(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm2,%xmm2
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 76(%esp),%edx
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,64(%esp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ vpaddd %xmm7,%xmm3,%xmm3
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 80(%esp),%edx
+ vpshufd $250,%xmm2,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 84(%esp),%edx
+ vpaddd %xmm4,%xmm3,%xmm3
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm3,%xmm3
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm3,%xmm7
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 88(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm3,%xmm3
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 92(%esp),%edx
+ vpaddd 48(%ebp),%xmm3,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L018avx_bmi_00_47
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ vmovdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L017grand_avx_bmi
+ movl 108(%esp),%esp
+ vzeroall
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
-.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
-.byte 62,0
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "sha256-586.S"
.text
@@ -288,235 +6813,6760 @@
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal OPENSSL_ia32cap_P,%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz .L002loop
+ movl 8(%edx),%edx
+ testl $16777216,%ecx
+ jz .L003no_xmm
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ testl $536870912,%edx
+ jnz .L004shaext
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ je .L005AVX
+ testl $512,%ebx
+ jnz .L006SSSE3
+.L003no_xmm:
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae .L007unrolled
+ jmp .L002loop
.align 16
.L002loop:
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
+ bswap %eax
movl 12(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
movl 16(%edi),%eax
movl 20(%edi),%ebx
movl 24(%edi),%ecx
+ bswap %eax
movl 28(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
movl 32(%edi),%eax
movl 36(%edi),%ebx
movl 40(%edi),%ecx
+ bswap %eax
movl 44(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
movl 48(%edi),%eax
movl 52(%edi),%ebx
movl 56(%edi),%ecx
+ bswap %eax
movl 60(%edi),%edx
- bswap %eax
bswap %ebx
+ pushl %eax
bswap %ecx
+ pushl %ebx
bswap %edx
- pushl %eax
- pushl %ebx
pushl %ecx
pushl %edx
addl $64,%edi
- subl $32,%esp
- movl %edi,100(%esp)
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edi
- movl %ebx,4(%esp)
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
movl 16(%esi),%edx
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edi
- movl %ebx,20(%esp)
- movl %ecx,24(%esp)
- movl %edi,28(%esp)
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
.align 16
-.L00300_15:
- movl 92(%esp),%ebx
+.L00800_15:
movl %edx,%ecx
+ movl 24(%esp),%esi
rorl $14,%ecx
- movl 20(%esp),%esi
+ movl 28(%esp),%edi
xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
rorl $5,%ecx
- xorl %edx,%ecx
- rorl $6,%ecx
- movl 24(%esp),%edi
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00800_15
+ movl 156(%esp),%ecx
+ jmp .L00916_63
+.align 16
+.L00916_63:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
xorl %edi,%esi
- movl %edx,16(%esp)
- movl %eax,%ecx
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ rorl $5,%ecx
andl %edx,%esi
- movl 12(%esp),%edx
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
xorl %edi,%esi
- movl %eax,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
addl %esi,%ebx
rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00916_63
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K256:
+.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long 66051,67438087,134810123,202182159
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+.align 16
+.L007unrolled:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebp
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebx
+ movl %ebp,4(%esp)
+ xorl %ecx,%ebp
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ jmp .L010grand_loop
+.align 16
+.L010grand_loop:
+ movl (%edi),%ebx
+ movl 4(%edi),%ecx
+ bswap %ebx
+ movl 8(%edi),%esi
+ bswap %ecx
+ movl %ebx,32(%esp)
+ bswap %esi
+ movl %ecx,36(%esp)
+ movl %esi,40(%esp)
+ movl 12(%edi),%ebx
+ movl 16(%edi),%ecx
+ bswap %ebx
+ movl 20(%edi),%esi
+ bswap %ecx
+ movl %ebx,44(%esp)
+ bswap %esi
+ movl %ecx,48(%esp)
+ movl %esi,52(%esp)
+ movl 24(%edi),%ebx
+ movl 28(%edi),%ecx
+ bswap %ebx
+ movl 32(%edi),%esi
+ bswap %ecx
+ movl %ebx,56(%esp)
+ bswap %esi
+ movl %ecx,60(%esp)
+ movl %esi,64(%esp)
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %ebx
+ movl 44(%edi),%esi
+ bswap %ecx
+ movl %ebx,68(%esp)
+ bswap %esi
+ movl %ecx,72(%esp)
+ movl %esi,76(%esp)
+ movl 48(%edi),%ebx
+ movl 52(%edi),%ecx
+ bswap %ebx
+ movl 56(%edi),%esi
+ bswap %ecx
+ movl %ebx,80(%esp)
+ bswap %esi
+ movl %ecx,84(%esp)
+ movl %esi,88(%esp)
+ movl 60(%edi),%ebx
+ addl $64,%edi
+ bswap %ebx
+ movl %edi,100(%esp)
+ movl %ebx,92(%esp)
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1116352408(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 36(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1899447441(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 40(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3049323471(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 44(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3921009573(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 48(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 961987163(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
rorl $2,%ecx
- addl %ebx,%edx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 52(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1508970993(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 56(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2453635748(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 60(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
movl 8(%esp),%edi
- addl %ecx,%ebx
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2870763221(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 64(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3624381080(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 68(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 310598401(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 72(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 607225278(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 76(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- addl %esi,%edx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1426881987(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
addl %esi,%eax
- cmpl $3248222580,%esi
- jne .L00300_15
- movl 152(%esp),%ebx
-.align 16
-.L00416_63:
- movl %ebx,%esi
- movl 100(%esp),%ecx
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 80(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1925078388(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 84(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
rorl $11,%esi
+ andl %ebp,%eax
+ leal 2162078206(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 88(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2614888103(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 92(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3248222580(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3835390401(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
movl %ecx,%edi
+ rorl $2,%ecx
xorl %ebx,%esi
+ shrl $3,%ebx
rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 4022224774(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
shrl $3,%ebx
- rorl $2,%edi
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 264347078(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
xorl %ecx,%edi
- rorl $17,%edi
- shrl $10,%ecx
- addl 156(%esp),%ebx
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
xorl %ecx,%edi
- addl 120(%esp),%ebx
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 604807628(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
addl %edi,%ebx
- rorl $14,%ecx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 770255983(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1249150122(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1555081692(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1996064986(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
movl 20(%esp),%esi
- xorl %edx,%ecx
- rorl $5,%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2554220882(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2821834349(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2952996808(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3210313671(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3336571891(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3584528711(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 113926993(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
movl %ebx,92(%esp)
- xorl %edx,%ecx
- rorl $6,%ecx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 338241895(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
movl 24(%esp),%edi
- addl %ecx,%ebx
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
xorl %edi,%esi
- movl %edx,16(%esp)
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- andl %edx,%esi
- movl 12(%esp),%edx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 666307205(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 773529912(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
xorl %edi,%esi
- movl %eax,%edi
- addl %esi,%ebx
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1294757372(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1396182291(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1695183700(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1986661051(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2177026350(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2456956037(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2730485921(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2820302411(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3259730800(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3345764771(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3516065817(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
rorl $2,%ecx
- addl %ebx,%edx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3600352804(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 4094571909(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
movl 8(%esp),%edi
- addl %ecx,%ebx
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 275423344(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 430227734(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 506948616(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 659060556(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- movl 152(%esp),%ebx
- addl %esi,%edx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 883997877(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
addl %esi,%eax
- cmpl $3329325298,%esi
- jne .L00416_63
- movl 352(%esp),%esi
- movl 4(%esp),%ebx
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 958139571(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1322822218(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1537002063(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1747873779(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1955562222(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2024104815(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2227730452(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2361852424(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2428436474(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2756734187(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3204031479(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3329325298(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebp
+ movl 12(%esp),%ecx
addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%ecx
- addl 12(%esi),%edi
+ addl 4(%esi),%ebp
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %ecx,8(%esi)
- movl %edi,12(%esi)
- movl 20(%esp),%eax
+ movl %ebp,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ecx
- movl 356(%esp),%edi
addl 16(%esi),%edx
- addl 20(%esi),%eax
+ addl 20(%esi),%edi
addl 24(%esi),%ebx
addl 28(%esi),%ecx
movl %edx,16(%esi)
- movl %eax,20(%esi)
+ movl %edi,20(%esi)
movl %ebx,24(%esi)
movl %ecx,28(%esi)
- addl $352,%esp
- subl $256,%ebp
- cmpl 8(%esp),%edi
- jb .L002loop
- movl 12(%esp),%esp
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ cmpl 104(%esp),%edi
+ jb .L010grand_loop
+ movl 108(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
-.align 64
-.L001K256:
-.long 1116352408,1899447441,3049323471,3921009573
-.long 961987163,1508970993,2453635748,2870763221
-.long 3624381080,310598401,607225278,1426881987
-.long 1925078388,2162078206,2614888103,3248222580
-.long 3835390401,4022224774,264347078,604807628
-.long 770255983,1249150122,1555081692,1996064986
-.long 2554220882,2821834349,2952996808,3210313671
-.long 3336571891,3584528711,113926993,338241895
-.long 666307205,773529912,1294757372,1396182291
-.long 1695183700,1986661051,2177026350,2456956037
-.long 2730485921,2820302411,3259730800,3345764771
-.long 3516065817,3600352804,4094571909,275423344
-.long 430227734,506948616,659060556,883997877
-.long 958139571,1322822218,1537002063,1747873779
-.long 1955562222,2024104815,2227730452,2361852424
-.long 2428436474,2756734187,3204031479,3329325298
+.align 32
+.L004shaext:
+ subl $32,%esp
+ movdqu (%esi),%xmm1
+ leal 128(%ebp),%ebp
+ movdqu 16(%esi),%xmm2
+ movdqa 128(%ebp),%xmm7
+ pshufd $27,%xmm1,%xmm0
+ pshufd $177,%xmm1,%xmm1
+ pshufd $27,%xmm2,%xmm2
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp .L011loop_shaext
+.align 16
+.L011loop_shaext:
+ movdqu (%edi),%xmm3
+ movdqu 16(%edi),%xmm4
+ movdqu 32(%edi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%edi),%xmm6
+ movdqa %xmm2,16(%esp)
+ movdqa -128(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,(%esp)
+.byte 15,56,203,202
+ movdqa -112(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ leal 64(%edi),%edi
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -80(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa -64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa -48(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -16(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa (%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 16(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 48(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 80(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+ movdqa 96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa 128(%ebp),%xmm7
+.byte 15,56,203,202
+ movdqa 112(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ cmpl %edi,%eax
+ nop
+.byte 15,56,203,202
+ paddd 16(%esp),%xmm2
+ paddd (%esp),%xmm1
+ jnz .L011loop_shaext
+ pshufd $177,%xmm2,%xmm2
+ pshufd $27,%xmm1,%xmm7
+ pshufd $177,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+ movl 44(%esp),%esp
+ movdqu %xmm1,(%esi)
+ movdqu %xmm2,16(%esi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L006SSSE3:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ movdqa 256(%ebp),%xmm7
+ jmp .L012grand_ssse3
+.align 16
+.L012grand_ssse3:
+ movdqu (%edi),%xmm0
+ movdqu 16(%edi),%xmm1
+ movdqu 32(%edi),%xmm2
+ movdqu 48(%edi),%xmm3
+ addl $64,%edi
+.byte 102,15,56,0,199
+ movl %edi,100(%esp)
+.byte 102,15,56,0,207
+ movdqa (%ebp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 16(%ebp),%xmm5
+ paddd %xmm0,%xmm4
+.byte 102,15,56,0,223
+ movdqa 32(%ebp),%xmm6
+ paddd %xmm1,%xmm5
+ movdqa 48(%ebp),%xmm7
+ movdqa %xmm4,32(%esp)
+ paddd %xmm2,%xmm6
+ movdqa %xmm5,48(%esp)
+ paddd %xmm3,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ jmp .L013ssse3_00_47
+.align 16
+.L013ssse3_00_47:
+ addl $64,%ebp
+ movl %edx,%ecx
+ movdqa %xmm1,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm3,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,224,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,250,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm3,%xmm7
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm0
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm0
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm0,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa (%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm0,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,32(%esp)
+ movl %edx,%ecx
+ movdqa %xmm2,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm0,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,225,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,251,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm0,%xmm7
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm1
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm1
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm1,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 16(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm1,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,48(%esp)
+ movl %edx,%ecx
+ movdqa %xmm3,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm1,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,226,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,248,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm1,%xmm7
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm2
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm2
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm2,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 32(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm2,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,64(%esp)
+ movl %edx,%ecx
+ movdqa %xmm0,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm2,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,227,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,249,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm2,%xmm7
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm3
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm3
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm3,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 48(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm3,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L013ssse3_00_47
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ movdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L012grand_ssse3
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L005AVX:
+ andl $264,%edx
+ cmpl $264,%edx
+ je .L014AVX_BMI
+ leal -96(%esp),%esp
+ vzeroall
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ vmovdqa 256(%ebp),%xmm7
+ jmp .L015grand_avx
+.align 32
+.L015grand_avx:
+ vmovdqu (%edi),%xmm0
+ vmovdqu 16(%edi),%xmm1
+ vmovdqu 32(%edi),%xmm2
+ vmovdqu 48(%edi),%xmm3
+ addl $64,%edi
+ vpshufb %xmm7,%xmm0,%xmm0
+ movl %edi,100(%esp)
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd (%ebp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 16(%ebp),%xmm1,%xmm5
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ vpaddd 48(%ebp),%xmm3,%xmm7
+ vmovdqa %xmm4,32(%esp)
+ vmovdqa %xmm5,48(%esp)
+ vmovdqa %xmm6,64(%esp)
+ vmovdqa %xmm7,80(%esp)
+ jmp .L016avx_00_47
+.align 16
+.L016avx_00_47:
+ addl $64,%ebp
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ vpaddd %xmm7,%xmm0,%xmm0
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ vpshufd $250,%xmm3,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ vpaddd %xmm4,%xmm0,%xmm0
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ vpaddd %xmm7,%xmm0,%xmm0
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm0,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm0,%xmm0
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ vpaddd (%ebp),%xmm0,%xmm6
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,32(%esp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ vpaddd %xmm7,%xmm1,%xmm1
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ vpshufd $250,%xmm0,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ vpaddd %xmm4,%xmm1,%xmm1
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ vpaddd %xmm7,%xmm1,%xmm1
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm1,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm1,%xmm1
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ vpaddd 16(%ebp),%xmm1,%xmm6
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,48(%esp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ vpaddd %xmm7,%xmm2,%xmm2
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ vpshufd $250,%xmm1,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ vpaddd %xmm4,%xmm2,%xmm2
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ vpaddd %xmm7,%xmm2,%xmm2
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm2,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm2,%xmm2
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,64(%esp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ vpaddd %xmm7,%xmm3,%xmm3
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrld $3,%xmm4,%xmm7
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ vpslld $14,%xmm4,%xmm5
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ vpshufd $250,%xmm2,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpsrld $11,%xmm6,%xmm6
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpxor %xmm5,%xmm4,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ vpslld $11,%xmm5,%xmm5
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ vpsrld $10,%xmm7,%xmm6
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ vpaddd %xmm4,%xmm3,%xmm3
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ vpxor %xmm5,%xmm6,%xmm6
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ vpsrlq $19,%xmm7,%xmm7
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ vpshufd $132,%xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ vpsrldq $8,%xmm7,%xmm7
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ vpaddd %xmm7,%xmm3,%xmm3
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ vpshufd $80,%xmm3,%xmm7
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ vpsrld $10,%xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ vpsrlq $17,%xmm7,%xmm5
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ vpsrlq $19,%xmm7,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ vpshufd $232,%xmm6,%xmm7
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ vpslldq $8,%xmm7,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ vpaddd %xmm7,%xmm3,%xmm3
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ vpaddd 48(%ebp),%xmm3,%xmm6
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ vmovdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L016avx_00_47
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ shrdl $9,%ecx,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ shrdl $2,%ecx,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ shrdl $14,%edx,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ shrdl $5,%edx,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ shrdl $6,%edx,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ shrdl $9,%ecx,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ shrdl $11,%ecx,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ vmovdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L015grand_avx
+ movl 108(%esp),%esp
+ vzeroall
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L014AVX_BMI:
+ leal -96(%esp),%esp
+ vzeroall
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ vmovdqa 256(%ebp),%xmm7
+ jmp .L017grand_avx_bmi
+.align 32
+.L017grand_avx_bmi:
+ vmovdqu (%edi),%xmm0
+ vmovdqu 16(%edi),%xmm1
+ vmovdqu 32(%edi),%xmm2
+ vmovdqu 48(%edi),%xmm3
+ addl $64,%edi
+ vpshufb %xmm7,%xmm0,%xmm0
+ movl %edi,100(%esp)
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd (%ebp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 16(%ebp),%xmm1,%xmm5
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ vpaddd 48(%ebp),%xmm3,%xmm7
+ vmovdqa %xmm4,32(%esp)
+ vmovdqa %xmm5,48(%esp)
+ vmovdqa %xmm6,64(%esp)
+ vmovdqa %xmm7,80(%esp)
+ jmp .L018avx_bmi_00_47
+.align 16
+.L018avx_bmi_00_47:
+ addl $64,%ebp
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ vpaddd %xmm7,%xmm0,%xmm0
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 32(%esp),%edx
+ vpshufd $250,%xmm3,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 36(%esp),%edx
+ vpaddd %xmm4,%xmm0,%xmm0
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm0,%xmm0
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm0,%xmm7
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 40(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm0,%xmm0
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 44(%esp),%edx
+ vpaddd (%ebp),%xmm0,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,32(%esp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ vpaddd %xmm7,%xmm1,%xmm1
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 48(%esp),%edx
+ vpshufd $250,%xmm0,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 52(%esp),%edx
+ vpaddd %xmm4,%xmm1,%xmm1
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm1,%xmm1
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm1,%xmm7
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 56(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm1,%xmm1
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 60(%esp),%edx
+ vpaddd 16(%ebp),%xmm1,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,48(%esp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ vpaddd %xmm7,%xmm2,%xmm2
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 64(%esp),%edx
+ vpshufd $250,%xmm1,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 68(%esp),%edx
+ vpaddd %xmm4,%xmm2,%xmm2
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm2,%xmm2
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm2,%xmm7
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 72(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm2,%xmm2
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 76(%esp),%edx
+ vpaddd 32(%ebp),%xmm2,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,64(%esp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ vpsrld $7,%xmm4,%xmm6
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ vpaddd %xmm7,%xmm3,%xmm3
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrld $3,%xmm4,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpslld $14,%xmm4,%xmm5
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpxor %xmm6,%xmm7,%xmm4
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 80(%esp),%edx
+ vpshufd $250,%xmm2,%xmm7
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ vpsrld $11,%xmm6,%xmm6
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpslld $11,%xmm5,%xmm5
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpsrld $10,%xmm7,%xmm6
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpxor %xmm5,%xmm4,%xmm4
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpsrlq $17,%xmm7,%xmm5
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 84(%esp),%edx
+ vpaddd %xmm4,%xmm3,%xmm3
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ vpxor %xmm5,%xmm6,%xmm6
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpsrlq $19,%xmm7,%xmm7
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ vpshufd $132,%xmm6,%xmm7
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ vpsrldq $8,%xmm7,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ vpaddd %xmm7,%xmm3,%xmm3
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ vpshufd $80,%xmm3,%xmm7
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 88(%esp),%edx
+ vpsrld $10,%xmm7,%xmm6
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ vpsrlq $17,%xmm7,%xmm5
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ vpxor %xmm5,%xmm6,%xmm6
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ vpsrlq $19,%xmm7,%xmm7
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ vpshufd $232,%xmm6,%xmm7
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ vpslldq $8,%xmm7,%xmm7
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ vpaddd %xmm7,%xmm3,%xmm3
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 92(%esp),%edx
+ vpaddd 48(%ebp),%xmm3,%xmm6
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ vmovdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L018avx_bmi_00_47
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,16(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 24(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 20(%esp),%edx
+ movl %eax,(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 4(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ andl %eax,%ebx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 12(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,12(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 20(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 16(%esp),%edx
+ movl %ebx,28(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl (%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ andl %ebx,%eax
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 8(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,8(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 16(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 12(%esp),%edx
+ movl %eax,24(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 28(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ andl %eax,%ebx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 4(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,4(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 12(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 8(%esp),%edx
+ movl %ebx,20(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 24(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ andl %ebx,%eax
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl (%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 8(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 4(%esp),%edx
+ movl %eax,16(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 20(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ andl %eax,%ebx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 28(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,28(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 4(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl (%esp),%edx
+ movl %ebx,12(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 16(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ andl %ebx,%eax
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 24(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,24(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl (%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 28(%esp),%edx
+ movl %eax,8(%esp)
+ orl %esi,%edx
+ rorxl $2,%eax,%edi
+ rorxl $13,%eax,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%eax,%ecx
+ xorl %edi,%esi
+ movl 12(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ andl %eax,%ebx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ addl %edx,%ecx
+ addl 20(%esp),%edx
+ leal (%ebx,%ecx,1),%ebx
+ rorxl $6,%edx,%ecx
+ rorxl $11,%edx,%esi
+ movl %edx,20(%esp)
+ rorxl $25,%edx,%edi
+ xorl %esi,%ecx
+ andnl 28(%esp),%edx,%esi
+ xorl %edi,%ecx
+ andl 24(%esp),%edx
+ movl %ebx,4(%esp)
+ orl %esi,%edx
+ rorxl $2,%ebx,%edi
+ rorxl $13,%ebx,%esi
+ leal (%edx,%ecx,1),%edx
+ rorxl $22,%ebx,%ecx
+ xorl %edi,%esi
+ movl 8(%esp),%edi
+ xorl %esi,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ andl %ebx,%eax
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ addl %edx,%ecx
+ addl 16(%esp),%edx
+ leal (%eax,%ecx,1),%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ vmovdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L017grand_avx_bmi
+ movl 108(%esp),%esp
+ vzeroall
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
-.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
-.byte 62,0
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/sha512-586.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/sha512-586.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/sha512-586.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/sha512-586.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from sha512-586.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/sha512-586.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from sha512-586.pl. */
#ifdef PIC
.file "sha512-586.S"
.text
@@ -29,251 +29,2244 @@
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
- leal _GLOBAL_OFFSET_TABLE_+[.-.L001K512](%ebp),%edx
- movl OPENSSL_ia32cap_P at GOT(%edx),%edx
- btl $26,(%edx)
- jnc .L002loop_x86
+ leal OPENSSL_ia32cap_P-.L001K512(%ebp),%edx
+ movl (%edx),%ecx
+ testl $67108864,%ecx
+ jz .L002loop_x86
+ movl 4(%edx),%edx
movq (%esi),%mm0
+ andl $16777216,%ecx
movq 8(%esi),%mm1
+ andl $512,%edx
movq 16(%esi),%mm2
+ orl %edx,%ecx
movq 24(%esi),%mm3
movq 32(%esi),%mm4
movq 40(%esi),%mm5
movq 48(%esi),%mm6
movq 56(%esi),%mm7
+ cmpl $16777728,%ecx
+ je .L003SSSE3
subl $80,%esp
+ jmp .L004loop_sse2
.align 16
-.L003loop_sse2:
+.L004loop_sse2:
movq %mm1,8(%esp)
movq %mm2,16(%esp)
movq %mm3,24(%esp)
movq %mm5,40(%esp)
movq %mm6,48(%esp)
+ pxor %mm1,%mm2
movq %mm7,56(%esp)
- movl (%edi),%ecx
- movl 4(%edi),%edx
+ movq %mm0,%mm3
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
addl $8,%edi
- bswap %ecx
- bswap %edx
- movl %ecx,76(%esp)
- movl %edx,72(%esp)
+ movl $15,%edx
+ bswap %eax
+ bswap %ebx
+ jmp .L00500_14_sse2
.align 16
-.L00400_14_sse2:
+.L00500_14_sse2:
+ movd %eax,%mm1
movl (%edi),%eax
+ movd %ebx,%mm7
movl 4(%edi),%ebx
addl $8,%edi
bswap %eax
bswap %ebx
- movl %eax,68(%esp)
- movl %ebx,64(%esp)
+ punpckldq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ decl %edx
+ jnz .L00500_14_sse2
+ movd %eax,%mm1
+ movd %ebx,%mm7
+ punpckldq %mm1,%mm7
movq %mm4,%mm1
- movq %mm4,%mm2
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
+ psllq $4,%mm4
paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
+ pxor %mm0,%mm0
+ movl $32,%edx
+ jmp .L00616_79_sse2
+.align 16
+.L00616_79_sse2:
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm0
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm2
+ addl $8,%ebp
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm2
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
movq 24(%esp),%mm4
- paddq %mm5,%mm3
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm0
+ addl $8,%ebp
+ decl %edx
+ jnz .L00616_79_sse2
+ paddq %mm3,%mm0
+ movq 8(%esp),%mm1
+ movq 24(%esp),%mm3
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ pxor %mm1,%mm2
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movl $640,%eax
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ leal (%esp,%eax,1),%esp
+ subl %eax,%ebp
+ cmpl 88(%esp),%edi
+ jb .L004loop_sse2
+ movl 92(%esp),%esp
+ emms
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L003SSSE3:
+ leal -64(%esp),%edx
+ subl $256,%esp
+ movdqa 640(%ebp),%xmm1
+ movdqu (%edi),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%edi),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%edi),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%edi),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%edi),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%edi),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%edi),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%edi),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movdqa %xmm2,-16(%edx)
+ nop
+.align 32
+.L007loop_ssse3:
+ movdqa 16(%edx),%xmm2
+ movdqa %xmm3,48(%edx)
+ leal 128(%ebp),%ebp
+ movq %mm1,8(%esp)
+ movl %edi,%ebx
+ movq %mm2,16(%esp)
+ leal 128(%edi),%edi
+ movq %mm3,24(%esp)
+ cmpl %eax,%edi
+ movq %mm5,40(%esp)
+ cmovbl %edi,%ebx
+ movq %mm6,48(%esp)
+ movl $4,%ecx
+ pxor %mm1,%mm2
+ movq %mm7,56(%esp)
+ pxor %mm3,%mm3
+ jmp .L00800_47_ssse3
+.align 32
+.L00800_47_ssse3:
+ movdqa %xmm5,%xmm3
+ movdqa %xmm2,%xmm1
+.byte 102,15,58,15,208,8
+ movdqa %xmm4,(%edx)
+.byte 102,15,58,15,220,8
+ movdqa %xmm2,%xmm4
+ psrlq $7,%xmm2
+ paddq %xmm3,%xmm0
+ movdqa %xmm4,%xmm3
+ psrlq $1,%xmm4
+ psllq $56,%xmm3
+ pxor %xmm4,%xmm2
+ psrlq $7,%xmm4
+ pxor %xmm3,%xmm2
+ psllq $7,%xmm3
+ pxor %xmm4,%xmm2
+ movdqa %xmm7,%xmm4
+ pxor %xmm3,%xmm2
+ movdqa %xmm7,%xmm3
+ psrlq $6,%xmm4
+ paddq %xmm2,%xmm0
+ movdqa %xmm7,%xmm2
+ psrlq $19,%xmm3
+ psllq $3,%xmm2
+ pxor %xmm3,%xmm4
+ psrlq $42,%xmm3
+ pxor %xmm2,%xmm4
+ psllq $42,%xmm2
+ pxor %xmm3,%xmm4
+ movdqa 32(%edx),%xmm3
+ pxor %xmm2,%xmm4
+ movdqa (%ebp),%xmm2
+ movq %mm4,%mm1
+ paddq %xmm4,%xmm0
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ paddq %xmm0,%xmm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm2,-128(%edx)
+ movdqa %xmm6,%xmm4
+ movdqa %xmm3,%xmm2
+.byte 102,15,58,15,217,8
+ movdqa %xmm5,16(%edx)
+.byte 102,15,58,15,229,8
+ movdqa %xmm3,%xmm5
+ psrlq $7,%xmm3
+ paddq %xmm4,%xmm1
+ movdqa %xmm5,%xmm4
+ psrlq $1,%xmm5
+ psllq $56,%xmm4
+ pxor %xmm5,%xmm3
+ psrlq $7,%xmm5
+ pxor %xmm4,%xmm3
+ psllq $7,%xmm4
+ pxor %xmm5,%xmm3
+ movdqa %xmm0,%xmm5
+ pxor %xmm4,%xmm3
+ movdqa %xmm0,%xmm4
+ psrlq $6,%xmm5
+ paddq %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psrlq $19,%xmm4
+ psllq $3,%xmm3
+ pxor %xmm4,%xmm5
+ psrlq $42,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $42,%xmm3
+ pxor %xmm4,%xmm5
+ movdqa 48(%edx),%xmm4
+ pxor %xmm3,%xmm5
+ movdqa 16(%ebp),%xmm3
+ movq %mm4,%mm1
+ paddq %xmm5,%xmm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm1,%xmm3
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm3,-112(%edx)
+ movdqa %xmm7,%xmm5
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,226,8
+ movdqa %xmm6,32(%edx)
+.byte 102,15,58,15,238,8
+ movdqa %xmm4,%xmm6
+ psrlq $7,%xmm4
+ paddq %xmm5,%xmm2
+ movdqa %xmm6,%xmm5
+ psrlq $1,%xmm6
+ psllq $56,%xmm5
+ pxor %xmm6,%xmm4
+ psrlq $7,%xmm6
+ pxor %xmm5,%xmm4
+ psllq $7,%xmm5
+ pxor %xmm6,%xmm4
+ movdqa %xmm1,%xmm6
+ pxor %xmm5,%xmm4
+ movdqa %xmm1,%xmm5
+ psrlq $6,%xmm6
+ paddq %xmm4,%xmm2
+ movdqa %xmm1,%xmm4
+ psrlq $19,%xmm5
+ psllq $3,%xmm4
+ pxor %xmm5,%xmm6
+ psrlq $42,%xmm5
+ pxor %xmm4,%xmm6
+ psllq $42,%xmm4
+ pxor %xmm5,%xmm6
+ movdqa (%edx),%xmm5
+ pxor %xmm4,%xmm6
+ movdqa 32(%ebp),%xmm4
+ movq %mm4,%mm1
+ paddq %xmm6,%xmm2
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm2,%xmm4
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- pand %mm2,%mm5
- pand %mm1,%mm0
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $53,%dl
- jne .L00400_14_sse2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm4,-96(%edx)
+ movdqa %xmm0,%xmm6
+ movdqa %xmm5,%xmm4
+.byte 102,15,58,15,235,8
+ movdqa %xmm7,48(%edx)
+.byte 102,15,58,15,247,8
+ movdqa %xmm5,%xmm7
+ psrlq $7,%xmm5
+ paddq %xmm6,%xmm3
+ movdqa %xmm7,%xmm6
+ psrlq $1,%xmm7
+ psllq $56,%xmm6
+ pxor %xmm7,%xmm5
+ psrlq $7,%xmm7
+ pxor %xmm6,%xmm5
+ psllq $7,%xmm6
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm7
+ pxor %xmm6,%xmm5
+ movdqa %xmm2,%xmm6
+ psrlq $6,%xmm7
+ paddq %xmm5,%xmm3
+ movdqa %xmm2,%xmm5
+ psrlq $19,%xmm6
+ psllq $3,%xmm5
+ pxor %xmm6,%xmm7
+ psrlq $42,%xmm6
+ pxor %xmm5,%xmm7
+ psllq $42,%xmm5
+ pxor %xmm6,%xmm7
+ movdqa 16(%edx),%xmm6
+ pxor %xmm5,%xmm7
+ movdqa 48(%ebp),%xmm5
+ movq %mm4,%mm1
+ paddq %xmm7,%xmm3
+ movq -80(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm3,%xmm5
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ movdqa %xmm5,-80(%edx)
+ movdqa %xmm1,%xmm7
+ movdqa %xmm6,%xmm5
+.byte 102,15,58,15,244,8
+ movdqa %xmm0,(%edx)
+.byte 102,15,58,15,248,8
+ movdqa %xmm6,%xmm0
+ psrlq $7,%xmm6
+ paddq %xmm7,%xmm4
+ movdqa %xmm0,%xmm7
+ psrlq $1,%xmm0
+ psllq $56,%xmm7
+ pxor %xmm0,%xmm6
+ psrlq $7,%xmm0
+ pxor %xmm7,%xmm6
+ psllq $7,%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm3,%xmm0
+ pxor %xmm7,%xmm6
+ movdqa %xmm3,%xmm7
+ psrlq $6,%xmm0
+ paddq %xmm6,%xmm4
+ movdqa %xmm3,%xmm6
+ psrlq $19,%xmm7
+ psllq $3,%xmm6
+ pxor %xmm7,%xmm0
+ psrlq $42,%xmm7
+ pxor %xmm6,%xmm0
+ psllq $42,%xmm6
+ pxor %xmm7,%xmm0
+ movdqa 32(%edx),%xmm7
+ pxor %xmm6,%xmm0
+ movdqa 64(%ebp),%xmm6
movq %mm4,%mm1
- movq %mm4,%mm2
+ paddq %xmm0,%xmm4
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ paddq %xmm4,%xmm6
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
pxor %mm6,%mm5
- movq 8(%esp),%mm1
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm6,-64(%edx)
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm6
+.byte 102,15,58,15,253,8
+ movdqa %xmm1,16(%edx)
+.byte 102,15,58,15,193,8
+ movdqa %xmm7,%xmm1
+ psrlq $7,%xmm7
+ paddq %xmm0,%xmm5
+ movdqa %xmm1,%xmm0
+ psrlq $1,%xmm1
+ psllq $56,%xmm0
+ pxor %xmm1,%xmm7
+ psrlq $7,%xmm1
+ pxor %xmm0,%xmm7
+ psllq $7,%xmm0
+ pxor %xmm1,%xmm7
+ movdqa %xmm4,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm4,%xmm0
+ psrlq $6,%xmm1
+ paddq %xmm7,%xmm5
+ movdqa %xmm4,%xmm7
+ psrlq $19,%xmm0
+ psllq $3,%xmm7
+ pxor %xmm0,%xmm1
+ psrlq $42,%xmm0
+ pxor %xmm7,%xmm1
+ psllq $42,%xmm7
+ pxor %xmm0,%xmm1
+ movdqa 48(%edx),%xmm0
+ pxor %xmm7,%xmm1
+ movdqa 80(%ebp),%xmm7
+ movq %mm4,%mm1
+ paddq %xmm1,%xmm5
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm5,%xmm7
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm7,-48(%edx)
+ movdqa %xmm3,%xmm1
+ movdqa %xmm0,%xmm7
+.byte 102,15,58,15,198,8
+ movdqa %xmm2,32(%edx)
+.byte 102,15,58,15,202,8
+ movdqa %xmm0,%xmm2
+ psrlq $7,%xmm0
+ paddq %xmm1,%xmm6
+ movdqa %xmm2,%xmm1
+ psrlq $1,%xmm2
+ psllq $56,%xmm1
+ pxor %xmm2,%xmm0
+ psrlq $7,%xmm2
+ pxor %xmm1,%xmm0
+ psllq $7,%xmm1
+ pxor %xmm2,%xmm0
+ movdqa %xmm5,%xmm2
+ pxor %xmm1,%xmm0
+ movdqa %xmm5,%xmm1
+ psrlq $6,%xmm2
+ paddq %xmm0,%xmm6
+ movdqa %xmm5,%xmm0
+ psrlq $19,%xmm1
+ psllq $3,%xmm0
+ pxor %xmm1,%xmm2
+ psrlq $42,%xmm1
+ pxor %xmm0,%xmm2
+ psllq $42,%xmm0
+ pxor %xmm1,%xmm2
+ movdqa (%edx),%xmm1
+ pxor %xmm0,%xmm2
+ movdqa 96(%ebp),%xmm0
+ movq %mm4,%mm1
+ paddq %xmm2,%xmm6
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm6,%xmm0
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm0,-32(%edx)
+ movdqa %xmm4,%xmm2
+ movdqa %xmm1,%xmm0
+.byte 102,15,58,15,207,8
+ movdqa %xmm3,48(%edx)
+.byte 102,15,58,15,211,8
+ movdqa %xmm1,%xmm3
+ psrlq $7,%xmm1
+ paddq %xmm2,%xmm7
+ movdqa %xmm3,%xmm2
+ psrlq $1,%xmm3
+ psllq $56,%xmm2
+ pxor %xmm3,%xmm1
+ psrlq $7,%xmm3
+ pxor %xmm2,%xmm1
+ psllq $7,%xmm2
+ pxor %xmm3,%xmm1
+ movdqa %xmm6,%xmm3
+ pxor %xmm2,%xmm1
+ movdqa %xmm6,%xmm2
+ psrlq $6,%xmm3
+ paddq %xmm1,%xmm7
+ movdqa %xmm6,%xmm1
+ psrlq $19,%xmm2
+ psllq $3,%xmm1
+ pxor %xmm2,%xmm3
+ psrlq $42,%xmm2
+ pxor %xmm1,%xmm3
+ psllq $42,%xmm1
+ pxor %xmm2,%xmm3
+ movdqa 16(%edx),%xmm2
+ pxor %xmm1,%xmm3
+ movdqa 112(%ebp),%xmm1
+ movq %mm4,%mm1
+ paddq %xmm3,%xmm7
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm7,%xmm1
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm1,-16(%edx)
+ leal 128(%ebp),%ebp
+ decl %ecx
+ jnz .L00800_47_ssse3
+ movdqa (%ebp),%xmm1
+ leal -640(%ebp),%ebp
+ movdqu (%ebx),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%ebx),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movq %mm4,%mm1
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%ebx),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movq %mm4,%mm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%ebx),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movq %mm4,%mm1
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
-.align 16
-.L00516_79_sse2:
- movq %mm2,%mm1
- psrlq $1,%mm2
- movq %mm6,%mm7
- psrlq $6,%mm6
- movq %mm2,%mm3
- psrlq $6,%mm2
- movq %mm6,%mm5
- psrlq $13,%mm6
- pxor %mm2,%mm3
- psrlq $1,%mm2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%ebx),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movq %mm4,%mm1
+ movq -80(%edx),%mm7
pxor %mm6,%mm5
- psrlq $42,%mm6
- pxor %mm2,%mm3
- movq 200(%esp),%mm2
- psllq $56,%mm1
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
- psllq $3,%mm7
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
- paddq 128(%esp),%mm2
- psllq $7,%mm1
- pxor %mm7,%mm5
- psllq $42,%mm7
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
pxor %mm1,%mm3
- pxor %mm7,%mm5
- paddq %mm5,%mm3
- paddq %mm2,%mm3
- movq %mm3,72(%esp)
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%ebx),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
movq %mm4,%mm1
- movq %mm4,%mm2
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
pxor %mm6,%mm5
- movq 8(%esp),%mm1
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%ebx),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movq %mm4,%mm1
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%ebx),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movq %mm4,%mm1
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movq %mm4,%mm1
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $23,%dl
- jne .L00516_79_sse2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm2,-16(%edx)
movq 8(%esp),%mm1
- movq 16(%esp),%mm2
+ paddq %mm3,%mm0
movq 24(%esp),%mm3
- movq 40(%esp),%mm5
- movq 48(%esp),%mm6
movq 56(%esp),%mm7
+ pxor %mm1,%mm2
paddq (%esi),%mm0
paddq 8(%esi),%mm1
paddq 16(%esi),%mm2
@@ -290,12 +2283,10 @@
movq %mm5,40(%esi)
movq %mm6,48(%esi)
movq %mm7,56(%esi)
- addl $640,%esp
- subl $640,%ebp
- cmpl 88(%esp),%edi
- jb .L003loop_sse2
+ cmpl %eax,%edi
+ jb .L007loop_ssse3
+ movl 76(%edx),%esp
emms
- movl 92(%esp),%esp
popl %edi
popl %esi
popl %ebx
@@ -406,7 +2397,7 @@
movl $16,%ecx
.long 2784229001
.align 16
-.L00600_15_x86:
+.L00900_15_x86:
movl 40(%esp),%ecx
movl 44(%esp),%edx
movl %ecx,%esi
@@ -513,9 +2504,9 @@
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $148,%dl
- jne .L00600_15_x86
+ jne .L00900_15_x86
.align 16
-.L00716_79_x86:
+.L01016_79_x86:
movl 312(%esp),%ecx
movl 316(%esp),%edx
movl %ecx,%esi
@@ -688,7 +2679,7 @@
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $23,%dl
- jne .L00716_79_x86
+ jne .L01016_79_x86
movl 840(%esp),%esi
movl 844(%esp),%edi
movl (%esi),%eax
@@ -831,6 +2822,8 @@
.long 4234509866,1501505948
.long 987167468,1607167915
.long 1246189591,1816402316
+.long 67438087,66051
+.long 202182159,134810123
.size sha512_block_data_order,.-.L_sha512_block_data_order_begin
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
@@ -837,7 +2830,7 @@
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "sha512-586.S"
.text
@@ -867,249 +2860,2243 @@
movl %eax,8(%esp)
movl %ebx,12(%esp)
leal OPENSSL_ia32cap_P,%edx
- btl $26,(%edx)
- jnc .L002loop_x86
+ movl (%edx),%ecx
+ testl $67108864,%ecx
+ jz .L002loop_x86
+ movl 4(%edx),%edx
movq (%esi),%mm0
+ andl $16777216,%ecx
movq 8(%esi),%mm1
+ andl $512,%edx
movq 16(%esi),%mm2
+ orl %edx,%ecx
movq 24(%esi),%mm3
movq 32(%esi),%mm4
movq 40(%esi),%mm5
movq 48(%esi),%mm6
movq 56(%esi),%mm7
+ cmpl $16777728,%ecx
+ je .L003SSSE3
subl $80,%esp
+ jmp .L004loop_sse2
.align 16
-.L003loop_sse2:
+.L004loop_sse2:
movq %mm1,8(%esp)
movq %mm2,16(%esp)
movq %mm3,24(%esp)
movq %mm5,40(%esp)
movq %mm6,48(%esp)
+ pxor %mm1,%mm2
movq %mm7,56(%esp)
- movl (%edi),%ecx
- movl 4(%edi),%edx
+ movq %mm0,%mm3
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
addl $8,%edi
- bswap %ecx
- bswap %edx
- movl %ecx,76(%esp)
- movl %edx,72(%esp)
+ movl $15,%edx
+ bswap %eax
+ bswap %ebx
+ jmp .L00500_14_sse2
.align 16
-.L00400_14_sse2:
+.L00500_14_sse2:
+ movd %eax,%mm1
movl (%edi),%eax
+ movd %ebx,%mm7
movl 4(%edi),%ebx
addl $8,%edi
bswap %eax
bswap %ebx
- movl %eax,68(%esp)
- movl %ebx,64(%esp)
+ punpckldq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ decl %edx
+ jnz .L00500_14_sse2
+ movd %eax,%mm1
+ movd %ebx,%mm7
+ punpckldq %mm1,%mm7
movq %mm4,%mm1
- movq %mm4,%mm2
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
+ psllq $4,%mm4
paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
+ pxor %mm0,%mm0
+ movl $32,%edx
+ jmp .L00616_79_sse2
+.align 16
+.L00616_79_sse2:
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm0
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm2
+ addl $8,%ebp
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm2
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
movq 24(%esp),%mm4
- paddq %mm5,%mm3
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm0
+ addl $8,%ebp
+ decl %edx
+ jnz .L00616_79_sse2
+ paddq %mm3,%mm0
+ movq 8(%esp),%mm1
+ movq 24(%esp),%mm3
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ pxor %mm1,%mm2
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movl $640,%eax
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ leal (%esp,%eax,1),%esp
+ subl %eax,%ebp
+ cmpl 88(%esp),%edi
+ jb .L004loop_sse2
+ movl 92(%esp),%esp
+ emms
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L003SSSE3:
+ leal -64(%esp),%edx
+ subl $256,%esp
+ movdqa 640(%ebp),%xmm1
+ movdqu (%edi),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%edi),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%edi),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%edi),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%edi),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%edi),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%edi),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%edi),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movdqa %xmm2,-16(%edx)
+ nop
+.align 32
+.L007loop_ssse3:
+ movdqa 16(%edx),%xmm2
+ movdqa %xmm3,48(%edx)
+ leal 128(%ebp),%ebp
+ movq %mm1,8(%esp)
+ movl %edi,%ebx
+ movq %mm2,16(%esp)
+ leal 128(%edi),%edi
+ movq %mm3,24(%esp)
+ cmpl %eax,%edi
+ movq %mm5,40(%esp)
+ cmovbl %edi,%ebx
+ movq %mm6,48(%esp)
+ movl $4,%ecx
+ pxor %mm1,%mm2
+ movq %mm7,56(%esp)
+ pxor %mm3,%mm3
+ jmp .L00800_47_ssse3
+.align 32
+.L00800_47_ssse3:
+ movdqa %xmm5,%xmm3
+ movdqa %xmm2,%xmm1
+.byte 102,15,58,15,208,8
+ movdqa %xmm4,(%edx)
+.byte 102,15,58,15,220,8
+ movdqa %xmm2,%xmm4
+ psrlq $7,%xmm2
+ paddq %xmm3,%xmm0
+ movdqa %xmm4,%xmm3
+ psrlq $1,%xmm4
+ psllq $56,%xmm3
+ pxor %xmm4,%xmm2
+ psrlq $7,%xmm4
+ pxor %xmm3,%xmm2
+ psllq $7,%xmm3
+ pxor %xmm4,%xmm2
+ movdqa %xmm7,%xmm4
+ pxor %xmm3,%xmm2
+ movdqa %xmm7,%xmm3
+ psrlq $6,%xmm4
+ paddq %xmm2,%xmm0
+ movdqa %xmm7,%xmm2
+ psrlq $19,%xmm3
+ psllq $3,%xmm2
+ pxor %xmm3,%xmm4
+ psrlq $42,%xmm3
+ pxor %xmm2,%xmm4
+ psllq $42,%xmm2
+ pxor %xmm3,%xmm4
+ movdqa 32(%edx),%xmm3
+ pxor %xmm2,%xmm4
+ movdqa (%ebp),%xmm2
+ movq %mm4,%mm1
+ paddq %xmm4,%xmm0
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ paddq %xmm0,%xmm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm2,-128(%edx)
+ movdqa %xmm6,%xmm4
+ movdqa %xmm3,%xmm2
+.byte 102,15,58,15,217,8
+ movdqa %xmm5,16(%edx)
+.byte 102,15,58,15,229,8
+ movdqa %xmm3,%xmm5
+ psrlq $7,%xmm3
+ paddq %xmm4,%xmm1
+ movdqa %xmm5,%xmm4
+ psrlq $1,%xmm5
+ psllq $56,%xmm4
+ pxor %xmm5,%xmm3
+ psrlq $7,%xmm5
+ pxor %xmm4,%xmm3
+ psllq $7,%xmm4
+ pxor %xmm5,%xmm3
+ movdqa %xmm0,%xmm5
+ pxor %xmm4,%xmm3
+ movdqa %xmm0,%xmm4
+ psrlq $6,%xmm5
+ paddq %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psrlq $19,%xmm4
+ psllq $3,%xmm3
+ pxor %xmm4,%xmm5
+ psrlq $42,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $42,%xmm3
+ pxor %xmm4,%xmm5
+ movdqa 48(%edx),%xmm4
+ pxor %xmm3,%xmm5
+ movdqa 16(%ebp),%xmm3
+ movq %mm4,%mm1
+ paddq %xmm5,%xmm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm1,%xmm3
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm3,-112(%edx)
+ movdqa %xmm7,%xmm5
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,226,8
+ movdqa %xmm6,32(%edx)
+.byte 102,15,58,15,238,8
+ movdqa %xmm4,%xmm6
+ psrlq $7,%xmm4
+ paddq %xmm5,%xmm2
+ movdqa %xmm6,%xmm5
+ psrlq $1,%xmm6
+ psllq $56,%xmm5
+ pxor %xmm6,%xmm4
+ psrlq $7,%xmm6
+ pxor %xmm5,%xmm4
+ psllq $7,%xmm5
+ pxor %xmm6,%xmm4
+ movdqa %xmm1,%xmm6
+ pxor %xmm5,%xmm4
+ movdqa %xmm1,%xmm5
+ psrlq $6,%xmm6
+ paddq %xmm4,%xmm2
+ movdqa %xmm1,%xmm4
+ psrlq $19,%xmm5
+ psllq $3,%xmm4
+ pxor %xmm5,%xmm6
+ psrlq $42,%xmm5
+ pxor %xmm4,%xmm6
+ psllq $42,%xmm4
+ pxor %xmm5,%xmm6
+ movdqa (%edx),%xmm5
+ pxor %xmm4,%xmm6
+ movdqa 32(%ebp),%xmm4
+ movq %mm4,%mm1
+ paddq %xmm6,%xmm2
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm2,%xmm4
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- pand %mm2,%mm5
- pand %mm1,%mm0
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $53,%dl
- jne .L00400_14_sse2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm4,-96(%edx)
+ movdqa %xmm0,%xmm6
+ movdqa %xmm5,%xmm4
+.byte 102,15,58,15,235,8
+ movdqa %xmm7,48(%edx)
+.byte 102,15,58,15,247,8
+ movdqa %xmm5,%xmm7
+ psrlq $7,%xmm5
+ paddq %xmm6,%xmm3
+ movdqa %xmm7,%xmm6
+ psrlq $1,%xmm7
+ psllq $56,%xmm6
+ pxor %xmm7,%xmm5
+ psrlq $7,%xmm7
+ pxor %xmm6,%xmm5
+ psllq $7,%xmm6
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm7
+ pxor %xmm6,%xmm5
+ movdqa %xmm2,%xmm6
+ psrlq $6,%xmm7
+ paddq %xmm5,%xmm3
+ movdqa %xmm2,%xmm5
+ psrlq $19,%xmm6
+ psllq $3,%xmm5
+ pxor %xmm6,%xmm7
+ psrlq $42,%xmm6
+ pxor %xmm5,%xmm7
+ psllq $42,%xmm5
+ pxor %xmm6,%xmm7
+ movdqa 16(%edx),%xmm6
+ pxor %xmm5,%xmm7
+ movdqa 48(%ebp),%xmm5
+ movq %mm4,%mm1
+ paddq %xmm7,%xmm3
+ movq -80(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm3,%xmm5
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ movdqa %xmm5,-80(%edx)
+ movdqa %xmm1,%xmm7
+ movdqa %xmm6,%xmm5
+.byte 102,15,58,15,244,8
+ movdqa %xmm0,(%edx)
+.byte 102,15,58,15,248,8
+ movdqa %xmm6,%xmm0
+ psrlq $7,%xmm6
+ paddq %xmm7,%xmm4
+ movdqa %xmm0,%xmm7
+ psrlq $1,%xmm0
+ psllq $56,%xmm7
+ pxor %xmm0,%xmm6
+ psrlq $7,%xmm0
+ pxor %xmm7,%xmm6
+ psllq $7,%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm3,%xmm0
+ pxor %xmm7,%xmm6
+ movdqa %xmm3,%xmm7
+ psrlq $6,%xmm0
+ paddq %xmm6,%xmm4
+ movdqa %xmm3,%xmm6
+ psrlq $19,%xmm7
+ psllq $3,%xmm6
+ pxor %xmm7,%xmm0
+ psrlq $42,%xmm7
+ pxor %xmm6,%xmm0
+ psllq $42,%xmm6
+ pxor %xmm7,%xmm0
+ movdqa 32(%edx),%xmm7
+ pxor %xmm6,%xmm0
+ movdqa 64(%ebp),%xmm6
movq %mm4,%mm1
- movq %mm4,%mm2
+ paddq %xmm0,%xmm4
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ paddq %xmm4,%xmm6
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
pxor %mm6,%mm5
- movq 8(%esp),%mm1
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm6,-64(%edx)
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm6
+.byte 102,15,58,15,253,8
+ movdqa %xmm1,16(%edx)
+.byte 102,15,58,15,193,8
+ movdqa %xmm7,%xmm1
+ psrlq $7,%xmm7
+ paddq %xmm0,%xmm5
+ movdqa %xmm1,%xmm0
+ psrlq $1,%xmm1
+ psllq $56,%xmm0
+ pxor %xmm1,%xmm7
+ psrlq $7,%xmm1
+ pxor %xmm0,%xmm7
+ psllq $7,%xmm0
+ pxor %xmm1,%xmm7
+ movdqa %xmm4,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm4,%xmm0
+ psrlq $6,%xmm1
+ paddq %xmm7,%xmm5
+ movdqa %xmm4,%xmm7
+ psrlq $19,%xmm0
+ psllq $3,%xmm7
+ pxor %xmm0,%xmm1
+ psrlq $42,%xmm0
+ pxor %xmm7,%xmm1
+ psllq $42,%xmm7
+ pxor %xmm0,%xmm1
+ movdqa 48(%edx),%xmm0
+ pxor %xmm7,%xmm1
+ movdqa 80(%ebp),%xmm7
+ movq %mm4,%mm1
+ paddq %xmm1,%xmm5
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm5,%xmm7
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm7,-48(%edx)
+ movdqa %xmm3,%xmm1
+ movdqa %xmm0,%xmm7
+.byte 102,15,58,15,198,8
+ movdqa %xmm2,32(%edx)
+.byte 102,15,58,15,202,8
+ movdqa %xmm0,%xmm2
+ psrlq $7,%xmm0
+ paddq %xmm1,%xmm6
+ movdqa %xmm2,%xmm1
+ psrlq $1,%xmm2
+ psllq $56,%xmm1
+ pxor %xmm2,%xmm0
+ psrlq $7,%xmm2
+ pxor %xmm1,%xmm0
+ psllq $7,%xmm1
+ pxor %xmm2,%xmm0
+ movdqa %xmm5,%xmm2
+ pxor %xmm1,%xmm0
+ movdqa %xmm5,%xmm1
+ psrlq $6,%xmm2
+ paddq %xmm0,%xmm6
+ movdqa %xmm5,%xmm0
+ psrlq $19,%xmm1
+ psllq $3,%xmm0
+ pxor %xmm1,%xmm2
+ psrlq $42,%xmm1
+ pxor %xmm0,%xmm2
+ psllq $42,%xmm0
+ pxor %xmm1,%xmm2
+ movdqa (%edx),%xmm1
+ pxor %xmm0,%xmm2
+ movdqa 96(%ebp),%xmm0
+ movq %mm4,%mm1
+ paddq %xmm2,%xmm6
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm6,%xmm0
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm0,-32(%edx)
+ movdqa %xmm4,%xmm2
+ movdqa %xmm1,%xmm0
+.byte 102,15,58,15,207,8
+ movdqa %xmm3,48(%edx)
+.byte 102,15,58,15,211,8
+ movdqa %xmm1,%xmm3
+ psrlq $7,%xmm1
+ paddq %xmm2,%xmm7
+ movdqa %xmm3,%xmm2
+ psrlq $1,%xmm3
+ psllq $56,%xmm2
+ pxor %xmm3,%xmm1
+ psrlq $7,%xmm3
+ pxor %xmm2,%xmm1
+ psllq $7,%xmm2
+ pxor %xmm3,%xmm1
+ movdqa %xmm6,%xmm3
+ pxor %xmm2,%xmm1
+ movdqa %xmm6,%xmm2
+ psrlq $6,%xmm3
+ paddq %xmm1,%xmm7
+ movdqa %xmm6,%xmm1
+ psrlq $19,%xmm2
+ psllq $3,%xmm1
+ pxor %xmm2,%xmm3
+ psrlq $42,%xmm2
+ pxor %xmm1,%xmm3
+ psllq $42,%xmm1
+ pxor %xmm2,%xmm3
+ movdqa 16(%edx),%xmm2
+ pxor %xmm1,%xmm3
+ movdqa 112(%ebp),%xmm1
+ movq %mm4,%mm1
+ paddq %xmm3,%xmm7
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm7,%xmm1
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm1,-16(%edx)
+ leal 128(%ebp),%ebp
+ decl %ecx
+ jnz .L00800_47_ssse3
+ movdqa (%ebp),%xmm1
+ leal -640(%ebp),%ebp
+ movdqu (%ebx),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%ebx),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movq %mm4,%mm1
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%ebx),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movq %mm4,%mm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%ebx),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movq %mm4,%mm1
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
-.align 16
-.L00516_79_sse2:
- movq %mm2,%mm1
- psrlq $1,%mm2
- movq %mm6,%mm7
- psrlq $6,%mm6
- movq %mm2,%mm3
- psrlq $6,%mm2
- movq %mm6,%mm5
- psrlq $13,%mm6
- pxor %mm2,%mm3
- psrlq $1,%mm2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%ebx),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movq %mm4,%mm1
+ movq -80(%edx),%mm7
pxor %mm6,%mm5
- psrlq $42,%mm6
- pxor %mm2,%mm3
- movq 200(%esp),%mm2
- psllq $56,%mm1
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
- psllq $3,%mm7
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
- paddq 128(%esp),%mm2
- psllq $7,%mm1
- pxor %mm7,%mm5
- psllq $42,%mm7
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
pxor %mm1,%mm3
- pxor %mm7,%mm5
- paddq %mm5,%mm3
- paddq %mm2,%mm3
- movq %mm3,72(%esp)
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%ebx),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
movq %mm4,%mm1
- movq %mm4,%mm2
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
pxor %mm6,%mm5
- movq 8(%esp),%mm1
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%ebx),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movq %mm4,%mm1
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%ebx),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movq %mm4,%mm1
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movq %mm4,%mm1
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $23,%dl
- jne .L00516_79_sse2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm2,-16(%edx)
movq 8(%esp),%mm1
- movq 16(%esp),%mm2
+ paddq %mm3,%mm0
movq 24(%esp),%mm3
- movq 40(%esp),%mm5
- movq 48(%esp),%mm6
movq 56(%esp),%mm7
+ pxor %mm1,%mm2
paddq (%esi),%mm0
paddq 8(%esi),%mm1
paddq 16(%esi),%mm2
@@ -1126,12 +5113,10 @@
movq %mm5,40(%esi)
movq %mm6,48(%esi)
movq %mm7,56(%esi)
- addl $640,%esp
- subl $640,%ebp
- cmpl 88(%esp),%edi
- jb .L003loop_sse2
+ cmpl %eax,%edi
+ jb .L007loop_ssse3
+ movl 76(%edx),%esp
emms
- movl 92(%esp),%esp
popl %edi
popl %esi
popl %ebx
@@ -1242,7 +5227,7 @@
movl $16,%ecx
.long 2784229001
.align 16
-.L00600_15_x86:
+.L00900_15_x86:
movl 40(%esp),%ecx
movl 44(%esp),%edx
movl %ecx,%esi
@@ -1349,9 +5334,9 @@
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $148,%dl
- jne .L00600_15_x86
+ jne .L00900_15_x86
.align 16
-.L00716_79_x86:
+.L01016_79_x86:
movl 312(%esp),%ecx
movl 316(%esp),%edx
movl %ecx,%esi
@@ -1524,7 +5509,7 @@
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $23,%dl
- jne .L00716_79_x86
+ jne .L01016_79_x86
movl 840(%esp),%esi
movl 844(%esp),%edi
movl (%esi),%eax
@@ -1667,6 +5652,8 @@
.long 4234509866,1501505948
.long 987167468,1607167915
.long 1246189591,1816402316
+.long 67438087,66051
+.long 202182159,134810123
.size sha512_block_data_order,.-.L_sha512_block_data_order_begin
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
@@ -1673,5 +5660,5 @@
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/vpaes-x86.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/vpaes-x86.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/vpaes-x86.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/vpaes-x86.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from vpaes-x86.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/vpaes-x86.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from vpaes-x86.pl. */
#ifdef PIC
.file "vpaes-x86.S"
.text
@@ -77,33 +77,33 @@
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
+ pand %xmm6,%xmm0
movdqu (%edx),%xmm5
- psrld $4,%xmm1
- pand %xmm6,%xmm0
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
-.byte 102,15,56,0,193
pxor %xmm5,%xmm2
- pxor %xmm2,%xmm0
+ psrld $4,%xmm1
addl $16,%edx
+.byte 102,15,56,0,193
leal 192(%ebp),%ebx
+ pxor %xmm2,%xmm0
jmp .L000enc_entry
.align 16
.L001enc_loop:
movdqa 32(%ebp),%xmm4
+ movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,226
+.byte 102,15,56,0,195
pxor %xmm5,%xmm4
- movdqa 48(%ebp),%xmm0
-.byte 102,15,56,0,195
+ movdqa 64(%ebp),%xmm5
pxor %xmm4,%xmm0
- movdqa 64(%ebp),%xmm5
+ movdqa -64(%ebx,%ecx,1),%xmm1
.byte 102,15,56,0,234
- movdqa -64(%ebx,%ecx,1),%xmm1
movdqa 80(%ebp),%xmm2
+ movdqa (%ebx,%ecx,1),%xmm4
.byte 102,15,56,0,211
+ movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
- movdqa (%ebx,%ecx,1),%xmm4
- movdqa %xmm0,%xmm3
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
@@ -112,28 +112,28 @@
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
+ subl $1,%eax
pxor %xmm3,%xmm0
- subl $1,%eax
.L000enc_entry:
movdqa %xmm6,%xmm1
+ movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
- movdqa -32(%ebp),%xmm5
.byte 102,15,56,0,232
+ movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
- movdqa %xmm7,%xmm3
.byte 102,15,56,0,217
+ movdqa %xmm7,%xmm4
pxor %xmm5,%xmm3
- movdqa %xmm7,%xmm4
.byte 102,15,56,0,224
+ movdqa %xmm7,%xmm2
pxor %xmm5,%xmm4
- movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
+ movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
- movdqa %xmm7,%xmm3
+.byte 102,15,56,0,220
movdqu (%edx),%xmm5
-.byte 102,15,56,0,220
pxor %xmm1,%xmm3
jnz .L001enc_loop
movdqa 96(%ebp),%xmm4
@@ -149,8 +149,8 @@
.type _vpaes_decrypt_core, at function
.align 16
_vpaes_decrypt_core:
+ leal 608(%ebp),%ebx
movl 240(%edx),%eax
- leal 608(%ebp),%ebx
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
@@ -173,56 +173,56 @@
.align 16
.L003dec_loop:
movdqa -32(%ebx),%xmm4
+ movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa -16(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
- addl $16,%edx
-.byte 102,15,56,0,197
movdqa (%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 16(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
- subl $1,%eax
-.byte 102,15,56,0,197
movdqa 32(%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 48(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
-.byte 102,15,56,0,197
movdqa 64(%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 80(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
+ addl $16,%edx
.byte 102,15,58,15,237,12
+ pxor %xmm1,%xmm0
+ subl $1,%eax
.L002dec_entry:
movdqa %xmm6,%xmm1
+ movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
+ pand %xmm6,%xmm0
psrld $4,%xmm1
- pand %xmm6,%xmm0
- movdqa -32(%ebp),%xmm2
.byte 102,15,56,0,208
+ movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
- movdqa %xmm7,%xmm3
.byte 102,15,56,0,217
+ movdqa %xmm7,%xmm4
pxor %xmm2,%xmm3
- movdqa %xmm7,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
+ movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
- movdqa %xmm7,%xmm3
.byte 102,15,56,0,220
+ movdqu (%edx),%xmm0
pxor %xmm1,%xmm3
- movdqu (%edx),%xmm0
jnz .L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
@@ -331,12 +331,12 @@
.type _vpaes_schedule_192_smear, at function
.align 16
_vpaes_schedule_192_smear:
- pshufd $128,%xmm6,%xmm0
- pxor %xmm0,%xmm6
+ pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
+ pxor %xmm1,%xmm6
+ pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
- pxor %xmm1,%xmm1
movhlps %xmm1,%xmm6
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
@@ -739,33 +739,33 @@
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
+ pand %xmm6,%xmm0
movdqu (%edx),%xmm5
- psrld $4,%xmm1
- pand %xmm6,%xmm0
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
-.byte 102,15,56,0,193
pxor %xmm5,%xmm2
- pxor %xmm2,%xmm0
+ psrld $4,%xmm1
addl $16,%edx
+.byte 102,15,56,0,193
leal 192(%ebp),%ebx
+ pxor %xmm2,%xmm0
jmp .L000enc_entry
.align 16
.L001enc_loop:
movdqa 32(%ebp),%xmm4
+ movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,226
+.byte 102,15,56,0,195
pxor %xmm5,%xmm4
- movdqa 48(%ebp),%xmm0
-.byte 102,15,56,0,195
+ movdqa 64(%ebp),%xmm5
pxor %xmm4,%xmm0
- movdqa 64(%ebp),%xmm5
+ movdqa -64(%ebx,%ecx,1),%xmm1
.byte 102,15,56,0,234
- movdqa -64(%ebx,%ecx,1),%xmm1
movdqa 80(%ebp),%xmm2
+ movdqa (%ebx,%ecx,1),%xmm4
.byte 102,15,56,0,211
+ movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
- movdqa (%ebx,%ecx,1),%xmm4
- movdqa %xmm0,%xmm3
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
@@ -774,28 +774,28 @@
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
+ subl $1,%eax
pxor %xmm3,%xmm0
- subl $1,%eax
.L000enc_entry:
movdqa %xmm6,%xmm1
+ movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
- movdqa -32(%ebp),%xmm5
.byte 102,15,56,0,232
+ movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
- movdqa %xmm7,%xmm3
.byte 102,15,56,0,217
+ movdqa %xmm7,%xmm4
pxor %xmm5,%xmm3
- movdqa %xmm7,%xmm4
.byte 102,15,56,0,224
+ movdqa %xmm7,%xmm2
pxor %xmm5,%xmm4
- movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
+ movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
- movdqa %xmm7,%xmm3
+.byte 102,15,56,0,220
movdqu (%edx),%xmm5
-.byte 102,15,56,0,220
pxor %xmm1,%xmm3
jnz .L001enc_loop
movdqa 96(%ebp),%xmm4
@@ -811,8 +811,8 @@
.type _vpaes_decrypt_core, at function
.align 16
_vpaes_decrypt_core:
+ leal 608(%ebp),%ebx
movl 240(%edx),%eax
- leal 608(%ebp),%ebx
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
@@ -835,56 +835,56 @@
.align 16
.L003dec_loop:
movdqa -32(%ebx),%xmm4
+ movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa -16(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
- addl $16,%edx
-.byte 102,15,56,0,197
movdqa (%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 16(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
- subl $1,%eax
-.byte 102,15,56,0,197
movdqa 32(%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 48(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
-.byte 102,15,56,0,197
movdqa 64(%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 80(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
+ addl $16,%edx
.byte 102,15,58,15,237,12
+ pxor %xmm1,%xmm0
+ subl $1,%eax
.L002dec_entry:
movdqa %xmm6,%xmm1
+ movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
+ pand %xmm6,%xmm0
psrld $4,%xmm1
- pand %xmm6,%xmm0
- movdqa -32(%ebp),%xmm2
.byte 102,15,56,0,208
+ movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
- movdqa %xmm7,%xmm3
.byte 102,15,56,0,217
+ movdqa %xmm7,%xmm4
pxor %xmm2,%xmm3
- movdqa %xmm7,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
+ movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
- movdqa %xmm7,%xmm3
.byte 102,15,56,0,220
+ movdqu (%edx),%xmm0
pxor %xmm1,%xmm3
- movdqu (%edx),%xmm0
jnz .L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
@@ -993,12 +993,12 @@
.type _vpaes_schedule_192_smear, at function
.align 16
_vpaes_schedule_192_smear:
- pshufd $128,%xmm6,%xmm0
- pxor %xmm0,%xmm6
+ pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
+ pxor %xmm1,%xmm6
+ pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
- pxor %xmm1,%xmm1
movhlps %xmm1,%xmm6
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
Modified: trunk/secure/lib/libcrypto/i386/wp-mmx.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/wp-mmx.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/wp-mmx.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/wp-mmx.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from wp-mmx.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/wp-mmx.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from wp-mmx.pl. */
#ifdef PIC
.file "wp-mmx.S"
.text
@@ -70,228 +70,230 @@
movq 4096(%ebp,%esi,8),%mm0
movl (%esp),%eax
movl 4(%esp),%ebx
- movb %al,%cl
- movb %ah,%dl
+ movzbl %al,%ecx
+ movzbl %ah,%edx
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm0
movq 7(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
movl 8(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
movq 6(%ebp,%esi,8),%mm2
movq 5(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
movq 4(%ebp,%esi,8),%mm4
movq 3(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
movl 12(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
movq 2(%ebp,%esi,8),%mm6
movq 1(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm1
pxor 7(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
movl 16(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm3
pxor 5(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm5
pxor 3(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
movl 20(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm7
pxor 1(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm2
pxor 7(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
movl 24(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm4
pxor 5(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm6
pxor 3(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
movl 28(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm0
pxor 1(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm3
pxor 7(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
movl 32(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm5
pxor 5(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm7
pxor 3(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
movl 36(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm1
pxor 1(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm4
pxor 7(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
movl 40(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm6
pxor 5(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm0
pxor 3(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
movl 44(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm2
pxor 1(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm5
pxor 7(%ebp,%edi,8),%mm6
- movb %al,%cl
- movb %ah,%dl
movl 48(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm7
pxor 5(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm1
pxor 3(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
movl 52(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm3
pxor 1(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm6
pxor 7(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
movl 56(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm0
pxor 5(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm2
pxor 3(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
movl 60(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm4
pxor 1(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm7
pxor 7(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
movl 64(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm1
pxor 5(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm3
pxor 3(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
movl 68(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm5
pxor 1(%ebp,%edi,8),%mm6
movq %mm0,(%esp)
@@ -302,226 +304,226 @@
movq %mm5,40(%esp)
movq %mm6,48(%esp)
movq %mm7,56(%esp)
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm0
pxor 7(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
movl 72(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm2
pxor 5(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm4
pxor 3(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
movl 76(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm6
pxor 1(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm1
pxor 7(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
movl 80(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm3
pxor 5(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm5
pxor 3(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
movl 84(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm7
pxor 1(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm2
pxor 7(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
movl 88(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm4
pxor 5(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm6
pxor 3(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
movl 92(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm0
pxor 1(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm3
pxor 7(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
movl 96(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm5
pxor 5(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm7
pxor 3(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
movl 100(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm1
pxor 1(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm4
pxor 7(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
movl 104(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm6
pxor 5(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm0
pxor 3(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
movl 108(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm2
pxor 1(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm5
pxor 7(%ebp,%edi,8),%mm6
- movb %al,%cl
- movb %ah,%dl
movl 112(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm7
pxor 5(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm1
pxor 3(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
movl 116(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm3
pxor 1(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm6
pxor 7(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
movl 120(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm0
pxor 5(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm2
pxor 3(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
movl 124(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm4
pxor 1(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm7
pxor 7(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm1
pxor 5(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm3
pxor 3(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm5
pxor 1(%ebp,%edi,8),%mm6
leal 128(%esp),%ebx
@@ -1176,228 +1178,230 @@
movq 4096(%ebp,%esi,8),%mm0
movl (%esp),%eax
movl 4(%esp),%ebx
- movb %al,%cl
- movb %ah,%dl
+ movzbl %al,%ecx
+ movzbl %ah,%edx
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm0
movq 7(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
movl 8(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
movq 6(%ebp,%esi,8),%mm2
movq 5(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
movq 4(%ebp,%esi,8),%mm4
movq 3(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
movl 12(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
movq 2(%ebp,%esi,8),%mm6
movq 1(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm1
pxor 7(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
movl 16(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm3
pxor 5(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm5
pxor 3(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
movl 20(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm7
pxor 1(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm2
pxor 7(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
movl 24(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm4
pxor 5(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm6
pxor 3(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
movl 28(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm0
pxor 1(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm3
pxor 7(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
movl 32(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm5
pxor 5(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm7
pxor 3(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
movl 36(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm1
pxor 1(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm4
pxor 7(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
movl 40(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm6
pxor 5(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm0
pxor 3(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
movl 44(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm2
pxor 1(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm5
pxor 7(%ebp,%edi,8),%mm6
- movb %al,%cl
- movb %ah,%dl
movl 48(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm7
pxor 5(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm1
pxor 3(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
movl 52(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm3
pxor 1(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm6
pxor 7(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
movl 56(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm0
pxor 5(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm2
pxor 3(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
movl 60(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm4
pxor 1(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm7
pxor 7(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
movl 64(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm1
pxor 5(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm3
pxor 3(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
movl 68(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm5
pxor 1(%ebp,%edi,8),%mm6
movq %mm0,(%esp)
@@ -1408,226 +1412,226 @@
movq %mm5,40(%esp)
movq %mm6,48(%esp)
movq %mm7,56(%esp)
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm0
pxor 7(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
movl 72(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm2
pxor 5(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm4
pxor 3(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
movl 76(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm6
pxor 1(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm1
pxor 7(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
movl 80(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm3
pxor 5(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm5
pxor 3(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
movl 84(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm7
pxor 1(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm2
pxor 7(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
movl 88(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm4
pxor 5(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm6
pxor 3(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
movl 92(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm0
pxor 1(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm3
pxor 7(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
movl 96(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm5
pxor 5(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm7
pxor 3(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
movl 100(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm1
pxor 1(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm4
pxor 7(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
movl 104(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm6
pxor 5(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm0
pxor 3(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
movl 108(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm2
pxor 1(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm5
pxor 7(%ebp,%edi,8),%mm6
- movb %al,%cl
- movb %ah,%dl
movl 112(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm7
pxor 5(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm1
pxor 3(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
movl 116(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm3
pxor 1(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm6
pxor 7(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
movl 120(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm0
pxor 5(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm2
pxor 3(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
movl 124(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm4
pxor 1(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm7
pxor 7(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm1
pxor 5(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm3
pxor 3(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm5
pxor 1(%ebp,%edi,8),%mm6
leal 128(%esp),%ebx
Modified: trunk/secure/lib/libcrypto/i386/x86-gf2m.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/x86-gf2m.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/x86-gf2m.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-gf2m.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from x86-gf2m.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-gf2m.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from x86-gf2m.pl. */
#ifdef PIC
.file "x86-gf2m.S"
.text
@@ -247,8 +247,7 @@
call .L000PIC_me_up
.L000PIC_me_up:
popl %edx
- leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx
- movl OPENSSL_ia32cap_P at GOT(%edx),%edx
+ leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx
movl (%edx),%eax
movl 4(%edx),%edx
testl $8388608,%eax
@@ -348,7 +347,7 @@
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "x86-gf2m.S"
.text
@@ -692,5 +691,5 @@
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/x86-mont.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/x86-mont.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/x86-mont.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-mont.S 299966 2016-05-16 19:30:27Z jkim $
-# Do not modify. This file is auto-generated from x86-mont.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-mont.S 337982 2018-08-17 18:32:53Z jkim $ */
+/* Do not modify. This file is auto-generated from x86-mont.pl. */
#ifdef PIC
.file "x86-mont.S"
.text
@@ -19,48 +19,54 @@
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
- movl %esp,%ebp
addl $2,%edi
negl %edi
- leal -32(%esp,%edi,4),%esp
+ leal -32(%esp,%edi,4),%ebp
negl %edi
- movl %esp,%eax
+ movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
- subl %eax,%esp
- xorl %esp,%edx
+ subl %eax,%ebp
+ xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
- subl %edx,%esp
- andl $-64,%esp
- movl %ebp,%eax
- subl %esp,%eax
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+ jmp .L002page_walk_done
+.align 16
.L001page_walk:
- movl (%esp,%eax,1),%edx
- subl $4096,%eax
-.byte 46
- jnc .L001page_walk
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
- movl 12(%esi),%edx
+ movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
- movl %edx,16(%esp)
+ movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
- movl %ebp,24(%esp)
- call .L002PIC_me_up
-.L002PIC_me_up:
+ movl %edx,24(%esp)
+ call .L003PIC_me_up
+.L003PIC_me_up:
popl %eax
- leal _GLOBAL_OFFSET_TABLE_+[.-.L002PIC_me_up](%eax),%eax
- movl OPENSSL_ia32cap_P at GOT(%eax),%eax
+ leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
btl $26,(%eax)
- jnc .L003non_sse2
+ jnc .L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@@ -84,7 +90,7 @@
psrlq $32,%mm3
incl %ecx
.align 16
-.L0041st:
+.L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -99,7 +105,7 @@
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
- jl .L0041st
+ jl .L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -113,7 +119,7 @@
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
-.L005outer:
+.L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@@ -135,7 +141,7 @@
paddq %mm6,%mm2
incl %ecx
decl %ebx
-.L006inner:
+.L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -152,7 +158,7 @@
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
- jnz .L006inner
+ jnz .L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@@ -170,11 +176,11 @@
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
- jle .L005outer
+ jle .L006outer
emms
- jmp .L007common_tail
+ jmp .L008common_tail
.align 16
-.L003non_sse2:
+.L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -185,12 +191,12 @@
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz .L008bn_sqr_mont
+ jz .L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
-.L009mull:
+.L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -199,7 +205,7 @@
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L009mull
+ jl .L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -217,9 +223,9 @@
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp .L0102ndmadd
+ jmp .L0112ndmadd
.align 16
-.L0111stmadd:
+.L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -230,7 +236,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L0111stmadd
+ jl .L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -253,7 +259,7 @@
adcl $0,%edx
movl $1,%ecx
.align 16
-.L0102ndmadd:
+.L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -264,7 +270,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0102ndmadd
+ jl .L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -280,7 +286,7 @@
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je .L007common_tail
+ je .L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
@@ -287,9 +293,9 @@
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp .L0111stmadd
+ jmp .L0121stmadd
.align 16
-.L008bn_sqr_mont:
+.L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -300,7 +306,7 @@
andl $1,%ebx
incl %ecx
.align 16
-.L012sqr:
+.L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -312,7 +318,7 @@
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl .L012sqr
+ jl .L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -336,7 +342,7 @@
movl 4(%esi),%eax
movl $1,%ecx
.align 16
-.L0133rdmadd:
+.L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -355,7 +361,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0133rdmadd
+ jl .L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -371,7 +377,7 @@
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je .L007common_tail
+ je .L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -383,12 +389,12 @@
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je .L014sqrlast
+ je .L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
-.L015sqradd:
+.L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -404,13 +410,13 @@
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle .L015sqradd
+ jle .L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-.L014sqrlast:
+.L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -425,9 +431,9 @@
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp .L0133rdmadd
+ jmp .L0143rdmadd
.align 16
-.L007common_tail:
+.L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -435,26 +441,28 @@
movl %ebx,%ecx
xorl %edx,%edx
.align 16
-.L016sub:
+.L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge .L016sub
+ jge .L017sub
sbbl $0,%eax
- andl %eax,%esi
- notl %eax
- movl %edi,%ebp
- andl %eax,%ebp
- orl %ebp,%esi
+ movl $-1,%edx
+ xorl %eax,%edx
+ jmp .L018copy
.align 16
-.L017copy:
- movl (%esi,%ebx,4),%eax
- movl %eax,(%edi,%ebx,4)
+.L018copy:
+ movl 32(%esp,%ebx,4),%esi
+ movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
+ andl %eax,%esi
+ andl %edx,%ebp
+ orl %esi,%ebp
+ movl %ebp,(%edi,%ebx,4)
decl %ebx
- jge .L017copy
+ jge .L018copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
@@ -469,7 +477,7 @@
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#else
.file "x86-mont.S"
.text
@@ -488,44 +496,51 @@
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
- movl %esp,%ebp
addl $2,%edi
negl %edi
- leal -32(%esp,%edi,4),%esp
+ leal -32(%esp,%edi,4),%ebp
negl %edi
- movl %esp,%eax
+ movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
- subl %eax,%esp
- xorl %esp,%edx
+ subl %eax,%ebp
+ xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
- subl %edx,%esp
- andl $-64,%esp
- movl %ebp,%eax
- subl %esp,%eax
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+ jmp .L002page_walk_done
+.align 16
.L001page_walk:
- movl (%esp,%eax,1),%edx
- subl $4096,%eax
-.byte 46
- jnc .L001page_walk
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
- movl 12(%esi),%edx
+ movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
- movl %edx,16(%esp)
+ movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
- movl %ebp,24(%esp)
+ movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
- jnc .L002non_sse2
+ jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@@ -549,7 +564,7 @@
psrlq $32,%mm3
incl %ecx
.align 16
-.L0031st:
+.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -564,7 +579,7 @@
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
- jl .L0031st
+ jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -578,7 +593,7 @@
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
-.L004outer:
+.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@@ -600,7 +615,7 @@
paddq %mm6,%mm2
incl %ecx
decl %ebx
-.L005inner:
+.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -617,7 +632,7 @@
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
- jnz .L005inner
+ jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@@ -635,11 +650,11 @@
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
- jle .L004outer
+ jle .L005outer
emms
- jmp .L006common_tail
+ jmp .L007common_tail
.align 16
-.L002non_sse2:
+.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -650,12 +665,12 @@
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz .L007bn_sqr_mont
+ jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
-.L008mull:
+.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -664,7 +679,7 @@
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L008mull
+ jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -682,9 +697,9 @@
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp .L0092ndmadd
+ jmp .L0102ndmadd
.align 16
-.L0101stmadd:
+.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -695,7 +710,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L0101stmadd
+ jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -718,7 +733,7 @@
adcl $0,%edx
movl $1,%ecx
.align 16
-.L0092ndmadd:
+.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -729,7 +744,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0092ndmadd
+ jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -745,7 +760,7 @@
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je .L006common_tail
+ je .L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
@@ -752,9 +767,9 @@
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp .L0101stmadd
+ jmp .L0111stmadd
.align 16
-.L007bn_sqr_mont:
+.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -765,7 +780,7 @@
andl $1,%ebx
incl %ecx
.align 16
-.L011sqr:
+.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -777,7 +792,7 @@
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl .L011sqr
+ jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -801,7 +816,7 @@
movl 4(%esi),%eax
movl $1,%ecx
.align 16
-.L0123rdmadd:
+.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -820,7 +835,7 @@
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0123rdmadd
+ jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -836,7 +851,7 @@
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je .L006common_tail
+ je .L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -848,12 +863,12 @@
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je .L013sqrlast
+ je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
-.L014sqradd:
+.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -869,13 +884,13 @@
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle .L014sqradd
+ jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-.L013sqrlast:
+.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -890,9 +905,9 @@
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp .L0123rdmadd
+ jmp .L0133rdmadd
.align 16
-.L006common_tail:
+.L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -900,26 +915,28 @@
movl %ebx,%ecx
xorl %edx,%edx
.align 16
-.L015sub:
+.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge .L015sub
+ jge .L016sub
sbbl $0,%eax
- andl %eax,%esi
- notl %eax
- movl %edi,%ebp
- andl %eax,%ebp
- orl %ebp,%esi
+ movl $-1,%edx
+ xorl %eax,%edx
+ jmp .L017copy
.align 16
-.L016copy:
- movl (%esi,%ebx,4),%eax
- movl %eax,(%edi,%ebx,4)
+.L017copy:
+ movl 32(%esp,%ebx,4),%esi
+ movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
+ andl %eax,%esi
+ andl %edx,%ebp
+ orl %esi,%ebp
+ movl %ebp,(%edi,%ebx,4)
decl %ebx
- jge .L016copy
+ jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
@@ -934,5 +951,5 @@
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
#endif
Modified: trunk/secure/lib/libcrypto/i386/x86cpuid.S
===================================================================
--- trunk/secure/lib/libcrypto/i386/x86cpuid.S 2019-01-20 05:38:15 UTC (rev 12153)
+++ trunk/secure/lib/libcrypto/i386/x86cpuid.S 2019-01-20 05:38:27 UTC (rev 12154)
@@ -1,6 +1,6 @@
/* $MidnightBSD$ */
-# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86cpuid.S 299983 2016-05-16 22:42:09Z jkim $
-# Do not modify. This file is auto-generated from x86cpuid.pl.
+/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86cpuid.S 325335 2017-11-02 18:22:53Z jkim $ */
+/* Do not modify. This file is auto-generated from x86cpuid.pl. */
#ifdef PIC
.file "x86cpuid.S"
.text
@@ -24,6 +24,8 @@
popl %eax
xorl %eax,%ecx
xorl %eax,%eax
+ movl 20(%esp),%esi
+ movl %eax,8(%esi)
btl $21,%ecx
jnc .L000nocpuid
.byte 0x0f,0xa2
@@ -77,14 +79,14 @@
jmp .L002generic
.L001intel:
cmpl $4,%edi
- movl $-1,%edi
+ movl $-1,%esi
jb .L003nocacheinfo
movl $4,%eax
movl $0,%ecx
.byte 0x0f,0xa2
- movl %eax,%edi
- shrl $14,%edi
- andl $4095,%edi
+ movl %eax,%esi
+ shrl $14,%esi
+ andl $4095,%esi
.L003nocacheinfo:
movl $1,%eax
xorl %ecx,%ecx
@@ -101,7 +103,7 @@
btl $28,%edx
jnc .L002generic
andl $4026531839,%edx
- cmpl $0,%edi
+ cmpl $0,%esi
je .L002generic
orl $268435456,%edx
shrl $16,%ebx
@@ -113,21 +115,30 @@
andl $4294965247,%ecx
movl %edx,%esi
orl %ecx,%ebp
- btl $27,%ecx
- jnc .L005clear_avx
+ cmpl $7,%edi
+ movl 20(%esp),%edi
+ jb .L005no_extended_info
+ movl $7,%eax
xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ movl %ebx,8(%edi)
+.L005no_extended_info:
+ btl $27,%ebp
+ jnc .L006clear_avx
+ xorl %ecx,%ecx
.byte 15,1,208
andl $6,%eax
cmpl $6,%eax
- je .L006done
+ je .L007done
cmpl $2,%eax
- je .L005clear_avx
-.L007clear_xmm:
+ je .L006clear_avx
+.L008clear_xmm:
andl $4261412861,%ebp
andl $4278190079,%esi
-.L005clear_avx:
+.L006clear_avx:
andl $4026525695,%ebp
-.L006done:
+ andl $4294967263,8(%edi)
+.L007done:
movl %esi,%eax
movl %ebp,%edx
.L000nocpuid:
@@ -144,15 +155,14 @@
.L_OPENSSL_rdtsc_begin:
xorl %eax,%eax
xorl %edx,%edx
- call .L008PIC_me_up
-.L008PIC_me_up:
+ call .L009PIC_me_up
+.L009PIC_me_up:
popl %ecx
- leal _GLOBAL_OFFSET_TABLE_+[.-.L008PIC_me_up](%ecx),%ecx
- movl OPENSSL_ia32cap_P at GOT(%ecx),%ecx
+ leal OPENSSL_ia32cap_P-.L009PIC_me_up(%ecx),%ecx
btl $4,(%ecx)
- jnc .L009notsc
+ jnc .L010notsc
.byte 0x0f,0x31
-.L009notsc:
+.L010notsc:
ret
.size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin
.globl OPENSSL_instrument_halt
@@ -160,20 +170,19 @@
.align 16
OPENSSL_instrument_halt:
.L_OPENSSL_instrument_halt_begin:
- call .L010PIC_me_up
-.L010PIC_me_up:
+ call .L011PIC_me_up
+.L011PIC_me_up:
popl %ecx
- leal _GLOBAL_OFFSET_TABLE_+[.-.L010PIC_me_up](%ecx),%ecx
- movl OPENSSL_ia32cap_P at GOT(%ecx),%ecx
+ leal OPENSSL_ia32cap_P-.L011PIC_me_up(%ecx),%ecx
btl $4,(%ecx)
- jnc .L011nohalt
+ jnc .L012nohalt
.long 2421723150
andl $3,%eax
- jnz .L011nohalt
+ jnz .L012nohalt
pushfl
popl %eax
btl $9,%eax
- jnc .L011nohalt
+ jnc .L012nohalt
.byte 0x0f,0x31
pushl %edx
pushl %eax
@@ -183,7 +192,7 @@
sbbl 4(%esp),%edx
addl $8,%esp
ret
-.L011nohalt:
+.L012nohalt:
xorl %eax,%eax
xorl %edx,%edx
ret
@@ -196,21 +205,21 @@
pushfl
popl %eax
btl $9,%eax
- jnc .L012nospin
+ jnc .L013nospin
movl 4(%esp),%eax
movl 8(%esp),%ecx
.long 2430111262
xorl %eax,%eax
movl (%ecx),%edx
- jmp .L013spin
+ jmp .L014spin
.align 16
-.L013spin:
+.L014spin:
incl %eax
cmpl (%ecx),%edx
- je .L013spin
+ je .L014spin
.long 529567888
ret
-.L012nospin:
+.L013nospin:
xorl %eax,%eax
xorl %edx,%edx
ret
@@ -222,17 +231,16 @@
.L_OPENSSL_wipe_cpu_begin:
xorl %eax,%eax
xorl %edx,%edx
- call .L014PIC_me_up
-.L014PIC_me_up:
+ call .L015PIC_me_up
+.L015PIC_me_up:
popl %ecx
- leal _GLOBAL_OFFSET_TABLE_+[.-.L014PIC_me_up](%ecx),%ecx
- movl OPENSSL_ia32cap_P at GOT(%ecx),%ecx
+ leal OPENSSL_ia32cap_P-.L015PIC_me_up(%ecx),%ecx
movl (%ecx),%ecx
btl $1,(%ecx)
- jnc .L015no_x87
+ jnc .L016no_x87
andl $83886080,%ecx
cmpl $83886080,%ecx
- jne .L016no_sse2
+ jne .L017no_sse2
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
@@ -241,9 +249,9 @@
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
-.L016no_sse2:
+.L017no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
-.L015no_x87:
+.L016no_x87:
leal 4(%esp),%eax
ret
.size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin
@@ -257,11 +265,11 @@
pushl %ebx
nop
movl (%edx),%eax
-.L017spin:
+.L018spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
- jne .L017spin
+ jne .L018spin
movl %ebx,%eax
popl %ebx
ret
@@ -302,32 +310,32 @@
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
- jae .L018lot
+ jae .L019lot
cmpl $0,%ecx
- je .L019ret
-.L020little:
+ je .L020ret
+.L021little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
- jnz .L020little
-.L019ret:
+ jnz .L021little
+.L020ret:
ret
.align 16
-.L018lot:
+.L019lot:
testl $3,%edx
- jz .L021aligned
+ jz .L022aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
- jmp .L018lot
-.L021aligned:
+ jmp .L019lot
+.L022aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
- jnz .L021aligned
+ jnz .L022aligned
cmpl $0,%ecx
- jne .L020little
+ jne .L021little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl OPENSSL_ia32_rdrand
@@ -336,18 +344,33 @@
OPENSSL_ia32_rdrand:
.L_OPENSSL_ia32_rdrand_begin:
movl $8,%ecx
-.L022loop:
+.L023loop:
.byte 15,199,240
- jc .L023break
- loop .L022loop
-.L023break:
+ jc .L024break
+ loop .L023loop
+.L024break:
cmpl $0,%eax
cmovel %ecx,%eax
ret
.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin
+.globl OPENSSL_ia32_rdseed
+.type OPENSSL_ia32_rdseed, at function
+.align 16
+OPENSSL_ia32_rdseed:
+.L_OPENSSL_ia32_rdseed_begin:
+ movl $8,%ecx
+.L025loop:
+.byte 15,199,248
+ jc .L026break
+ loop .L025loop
+.L026break:
+ cmpl $0,%eax
+ cmovel %ecx,%eax
+ ret
+.size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin
.hidden OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
.section .init
call OPENSSL_cpuid_setup
#else
@@ -373,6 +396,8 @@
popl %eax
xorl %eax,%ecx
xorl %eax,%eax
+ movl 20(%esp),%esi
+ movl %eax,8(%esi)
btl $21,%ecx
jnc .L000nocpuid
.byte 0x0f,0xa2
@@ -426,14 +451,14 @@
jmp .L002generic
.L001intel:
cmpl $4,%edi
- movl $-1,%edi
+ movl $-1,%esi
jb .L003nocacheinfo
movl $4,%eax
movl $0,%ecx
.byte 0x0f,0xa2
- movl %eax,%edi
- shrl $14,%edi
- andl $4095,%edi
+ movl %eax,%esi
+ shrl $14,%esi
+ andl $4095,%esi
.L003nocacheinfo:
movl $1,%eax
xorl %ecx,%ecx
@@ -450,7 +475,7 @@
btl $28,%edx
jnc .L002generic
andl $4026531839,%edx
- cmpl $0,%edi
+ cmpl $0,%esi
je .L002generic
orl $268435456,%edx
shrl $16,%ebx
@@ -462,21 +487,30 @@
andl $4294965247,%ecx
movl %edx,%esi
orl %ecx,%ebp
- btl $27,%ecx
- jnc .L005clear_avx
+ cmpl $7,%edi
+ movl 20(%esp),%edi
+ jb .L005no_extended_info
+ movl $7,%eax
xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ movl %ebx,8(%edi)
+.L005no_extended_info:
+ btl $27,%ebp
+ jnc .L006clear_avx
+ xorl %ecx,%ecx
.byte 15,1,208
andl $6,%eax
cmpl $6,%eax
- je .L006done
+ je .L007done
cmpl $2,%eax
- je .L005clear_avx
-.L007clear_xmm:
+ je .L006clear_avx
+.L008clear_xmm:
andl $4261412861,%ebp
andl $4278190079,%esi
-.L005clear_avx:
+.L006clear_avx:
andl $4026525695,%ebp
-.L006done:
+ andl $4294967263,8(%edi)
+.L007done:
movl %esi,%eax
movl %ebp,%edx
.L000nocpuid:
@@ -495,9 +529,9 @@
xorl %edx,%edx
leal OPENSSL_ia32cap_P,%ecx
btl $4,(%ecx)
- jnc .L008notsc
+ jnc .L009notsc
.byte 0x0f,0x31
-.L008notsc:
+.L009notsc:
ret
.size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin
.globl OPENSSL_instrument_halt
@@ -507,14 +541,14 @@
.L_OPENSSL_instrument_halt_begin:
leal OPENSSL_ia32cap_P,%ecx
btl $4,(%ecx)
- jnc .L009nohalt
+ jnc .L010nohalt
.long 2421723150
andl $3,%eax
- jnz .L009nohalt
+ jnz .L010nohalt
pushfl
popl %eax
btl $9,%eax
- jnc .L009nohalt
+ jnc .L010nohalt
.byte 0x0f,0x31
pushl %edx
pushl %eax
@@ -524,7 +558,7 @@
sbbl 4(%esp),%edx
addl $8,%esp
ret
-.L009nohalt:
+.L010nohalt:
xorl %eax,%eax
xorl %edx,%edx
ret
@@ -537,21 +571,21 @@
pushfl
popl %eax
btl $9,%eax
- jnc .L010nospin
+ jnc .L011nospin
movl 4(%esp),%eax
movl 8(%esp),%ecx
.long 2430111262
xorl %eax,%eax
movl (%ecx),%edx
- jmp .L011spin
+ jmp .L012spin
.align 16
-.L011spin:
+.L012spin:
incl %eax
cmpl (%ecx),%edx
- je .L011spin
+ je .L012spin
.long 529567888
ret
-.L010nospin:
+.L011nospin:
xorl %eax,%eax
xorl %edx,%edx
ret
@@ -566,10 +600,10 @@
leal OPENSSL_ia32cap_P,%ecx
movl (%ecx),%ecx
btl $1,(%ecx)
- jnc .L012no_x87
+ jnc .L013no_x87
andl $83886080,%ecx
cmpl $83886080,%ecx
- jne .L013no_sse2
+ jne .L014no_sse2
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
@@ -578,9 +612,9 @@
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
-.L013no_sse2:
+.L014no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
-.L012no_x87:
+.L013no_x87:
leal 4(%esp),%eax
ret
.size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin
@@ -594,11 +628,11 @@
pushl %ebx
nop
movl (%edx),%eax
-.L014spin:
+.L015spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
- jne .L014spin
+ jne .L015spin
movl %ebx,%eax
popl %ebx
ret
@@ -639,32 +673,32 @@
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
- jae .L015lot
+ jae .L016lot
cmpl $0,%ecx
- je .L016ret
-.L017little:
+ je .L017ret
+.L018little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
- jnz .L017little
-.L016ret:
+ jnz .L018little
+.L017ret:
ret
.align 16
-.L015lot:
+.L016lot:
testl $3,%edx
- jz .L018aligned
+ jz .L019aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
- jmp .L015lot
-.L018aligned:
+ jmp .L016lot
+.L019aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
- jnz .L018aligned
+ jnz .L019aligned
cmpl $0,%ecx
- jne .L017little
+ jne .L018little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl OPENSSL_ia32_rdrand
@@ -673,18 +707,33 @@
OPENSSL_ia32_rdrand:
.L_OPENSSL_ia32_rdrand_begin:
movl $8,%ecx
-.L019loop:
+.L020loop:
.byte 15,199,240
- jc .L020break
- loop .L019loop
-.L020break:
+ jc .L021break
+ loop .L020loop
+.L021break:
cmpl $0,%eax
cmovel %ecx,%eax
ret
.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin
+.globl OPENSSL_ia32_rdseed
+.type OPENSSL_ia32_rdseed, at function
+.align 16
+OPENSSL_ia32_rdseed:
+.L_OPENSSL_ia32_rdseed_begin:
+ movl $8,%ecx
+.L022loop:
+.byte 15,199,248
+ jc .L023break
+ loop .L022loop
+.L023break:
+ cmpl $0,%eax
+ cmovel %ecx,%eax
+ ret
+.size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin
.hidden OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
.section .init
call OPENSSL_cpuid_setup
#endif
More information about the Midnightbsd-cvs
mailing list