[Midnightbsd-cvs] src [12156] trunk/secure/lib/libcrypto: add missing files
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sun Jan 20 00:40:36 EST 2019
Revision: 12156
http://svnweb.midnightbsd.org/src/?rev=12156
Author: laffer1
Date: 2019-01-20 00:40:35 -0500 (Sun, 20 Jan 2019)
Log Message:
-----------
add missing files
Added Paths:
-----------
trunk/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
trunk/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
trunk/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S
trunk/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
trunk/secure/lib/libcrypto/amd64/rsaz-avx2.S
trunk/secure/lib/libcrypto/amd64/rsaz-x86_64.S
trunk/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S
trunk/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S
trunk/secure/lib/libcrypto/engines/lib4758cca/Makefile.depend
trunk/secure/lib/libcrypto/engines/libaep/Makefile.depend
trunk/secure/lib/libcrypto/engines/libatalla/Makefile.depend
trunk/secure/lib/libcrypto/engines/libcapi/
trunk/secure/lib/libcrypto/engines/libcapi/Makefile
trunk/secure/lib/libcrypto/engines/libcapi/Makefile.depend
trunk/secure/lib/libcrypto/engines/libchil/Makefile.depend
trunk/secure/lib/libcrypto/engines/libcswift/Makefile.depend
trunk/secure/lib/libcrypto/engines/libgost/Makefile.depend
trunk/secure/lib/libcrypto/engines/libnuron/Makefile.depend
trunk/secure/lib/libcrypto/engines/libsureware/Makefile.depend
trunk/secure/lib/libcrypto/engines/libubsec/Makefile.depend
trunk/secure/lib/libcrypto/man/ASN1_TIME_set.3
trunk/secure/lib/libcrypto/man/EC_GFp_simple_method.3
trunk/secure/lib/libcrypto/man/EC_GROUP_copy.3
trunk/secure/lib/libcrypto/man/EC_GROUP_new.3
trunk/secure/lib/libcrypto/man/EC_KEY_new.3
trunk/secure/lib/libcrypto/man/EC_POINT_add.3
trunk/secure/lib/libcrypto/man/EC_POINT_new.3
trunk/secure/lib/libcrypto/man/EVP_PKEY_meth_new.3
trunk/secure/lib/libcrypto/man/OPENSSL_instrument_bus.3
trunk/secure/lib/libcrypto/man/X509_check_host.3
trunk/secure/lib/libcrypto/man/X509_check_private_key.3
trunk/secure/lib/libcrypto/man/X509_cmp_time.3
trunk/secure/lib/libcrypto/man/d2i_ECPKParameters.3
trunk/secure/lib/libcrypto/man/ec.3
Added: trunk/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,756 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
+.text
+
+.type _aesni_ctr32_ghash_6x, at function
+.align 32
+_aesni_ctr32_ghash_6x:
+ vmovdqu 32(%r11),%xmm2
+ subq $6,%rdx
+ vpxor %xmm4,%xmm4,%xmm4
+ vmovdqu 0-128(%rcx),%xmm15
+ vpaddb %xmm2,%xmm1,%xmm10
+ vpaddb %xmm2,%xmm10,%xmm11
+ vpaddb %xmm2,%xmm11,%xmm12
+ vpaddb %xmm2,%xmm12,%xmm13
+ vpaddb %xmm2,%xmm13,%xmm14
+ vpxor %xmm15,%xmm1,%xmm9
+ vmovdqu %xmm4,16+8(%rsp)
+ jmp .Loop6x
+
+.align 32
+.Loop6x:
+ addl $100663296,%ebx
+ jc .Lhandle_ctr32
+ vmovdqu 0-32(%r9),%xmm3
+ vpaddb %xmm2,%xmm14,%xmm1
+ vpxor %xmm15,%xmm10,%xmm10
+ vpxor %xmm15,%xmm11,%xmm11
+
+.Lresume_ctr32:
+ vmovdqu %xmm1,(%r8)
+ vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
+ vpxor %xmm15,%xmm12,%xmm12
+ vmovups 16-128(%rcx),%xmm2
+ vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
+ xorq %r12,%r12
+ cmpq %r14,%r15
+
+ vaesenc %xmm2,%xmm9,%xmm9
+ vmovdqu 48+8(%rsp),%xmm0
+ vpxor %xmm15,%xmm13,%xmm13
+ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
+ vaesenc %xmm2,%xmm10,%xmm10
+ vpxor %xmm15,%xmm14,%xmm14
+ setnc %r12b
+ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
+ vaesenc %xmm2,%xmm11,%xmm11
+ vmovdqu 16-32(%r9),%xmm3
+ negq %r12
+ vaesenc %xmm2,%xmm12,%xmm12
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
+ vpxor %xmm4,%xmm8,%xmm8
+ vaesenc %xmm2,%xmm13,%xmm13
+ vpxor %xmm5,%xmm1,%xmm4
+ andq $0x60,%r12
+ vmovups 32-128(%rcx),%xmm15
+ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
+ vaesenc %xmm2,%xmm14,%xmm14
+
+ vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
+ leaq (%r14,%r12,1),%r14
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor 16+8(%rsp),%xmm8,%xmm8
+ vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
+ vmovdqu 64+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 88(%r14),%r13
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 80(%r14),%r12
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,32+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,40+8(%rsp)
+ vmovdqu 48-32(%r9),%xmm5
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 48-128(%rcx),%xmm15
+ vpxor %xmm1,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm2,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpxor %xmm3,%xmm7,%xmm7
+ vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
+ vaesenc %xmm15,%xmm11,%xmm11
+ vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
+ vmovdqu 80+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqu 64-32(%r9),%xmm1
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 64-128(%rcx),%xmm15
+ vpxor %xmm2,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm3,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 72(%r14),%r13
+ vpxor %xmm5,%xmm7,%xmm7
+ vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 64(%r14),%r12
+ vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
+ vmovdqu 96+8(%rsp),%xmm0
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,48+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,56+8(%rsp)
+ vpxor %xmm2,%xmm4,%xmm4
+ vmovdqu 96-32(%r9),%xmm2
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 80-128(%rcx),%xmm15
+ vpxor %xmm3,%xmm6,%xmm6
+ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 56(%r14),%r13
+ vpxor %xmm1,%xmm7,%xmm7
+ vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
+ vpxor 112+8(%rsp),%xmm8,%xmm8
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 48(%r14),%r12
+ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,64+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,72+8(%rsp)
+ vpxor %xmm3,%xmm4,%xmm4
+ vmovdqu 112-32(%r9),%xmm3
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vmovups 96-128(%rcx),%xmm15
+ vpxor %xmm5,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm1,%xmm6,%xmm6
+ vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
+ vaesenc %xmm15,%xmm10,%xmm10
+ movbeq 40(%r14),%r13
+ vpxor %xmm2,%xmm7,%xmm7
+ vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 32(%r14),%r12
+ vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r13,80+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ movq %r12,88+8(%rsp)
+ vpxor %xmm5,%xmm6,%xmm6
+ vaesenc %xmm15,%xmm14,%xmm14
+ vpxor %xmm1,%xmm6,%xmm6
+
+ vmovups 112-128(%rcx),%xmm15
+ vpslldq $8,%xmm6,%xmm5
+ vpxor %xmm2,%xmm4,%xmm4
+ vmovdqu 16(%r11),%xmm3
+
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor %xmm8,%xmm7,%xmm7
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpxor %xmm5,%xmm4,%xmm4
+ movbeq 24(%r14),%r13
+ vaesenc %xmm15,%xmm11,%xmm11
+ movbeq 16(%r14),%r12
+ vpalignr $8,%xmm4,%xmm4,%xmm0
+ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
+ movq %r13,96+8(%rsp)
+ vaesenc %xmm15,%xmm12,%xmm12
+ movq %r12,104+8(%rsp)
+ vaesenc %xmm15,%xmm13,%xmm13
+ vmovups 128-128(%rcx),%xmm1
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups 144-128(%rcx),%xmm15
+ vaesenc %xmm1,%xmm10,%xmm10
+ vpsrldq $8,%xmm6,%xmm6
+ vaesenc %xmm1,%xmm11,%xmm11
+ vpxor %xmm6,%xmm7,%xmm7
+ vaesenc %xmm1,%xmm12,%xmm12
+ vpxor %xmm0,%xmm4,%xmm4
+ movbeq 8(%r14),%r13
+ vaesenc %xmm1,%xmm13,%xmm13
+ movbeq 0(%r14),%r12
+ vaesenc %xmm1,%xmm14,%xmm14
+ vmovups 160-128(%rcx),%xmm1
+ cmpl $11,%ebp
+ jb .Lenc_tail
+
+ vaesenc %xmm15,%xmm9,%xmm9
+ vaesenc %xmm15,%xmm10,%xmm10
+ vaesenc %xmm15,%xmm11,%xmm11
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vaesenc %xmm1,%xmm9,%xmm9
+ vaesenc %xmm1,%xmm10,%xmm10
+ vaesenc %xmm1,%xmm11,%xmm11
+ vaesenc %xmm1,%xmm12,%xmm12
+ vaesenc %xmm1,%xmm13,%xmm13
+ vmovups 176-128(%rcx),%xmm15
+ vaesenc %xmm1,%xmm14,%xmm14
+ vmovups 192-128(%rcx),%xmm1
+ je .Lenc_tail
+
+ vaesenc %xmm15,%xmm9,%xmm9
+ vaesenc %xmm15,%xmm10,%xmm10
+ vaesenc %xmm15,%xmm11,%xmm11
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vaesenc %xmm15,%xmm14,%xmm14
+
+ vaesenc %xmm1,%xmm9,%xmm9
+ vaesenc %xmm1,%xmm10,%xmm10
+ vaesenc %xmm1,%xmm11,%xmm11
+ vaesenc %xmm1,%xmm12,%xmm12
+ vaesenc %xmm1,%xmm13,%xmm13
+ vmovups 208-128(%rcx),%xmm15
+ vaesenc %xmm1,%xmm14,%xmm14
+ vmovups 224-128(%rcx),%xmm1
+ jmp .Lenc_tail
+
+.align 32
+.Lhandle_ctr32:
+ vmovdqu (%r11),%xmm0
+ vpshufb %xmm0,%xmm1,%xmm6
+ vmovdqu 48(%r11),%xmm5
+ vpaddd 64(%r11),%xmm6,%xmm10
+ vpaddd %xmm5,%xmm6,%xmm11
+ vmovdqu 0-32(%r9),%xmm3
+ vpaddd %xmm5,%xmm10,%xmm12
+ vpshufb %xmm0,%xmm10,%xmm10
+ vpaddd %xmm5,%xmm11,%xmm13
+ vpshufb %xmm0,%xmm11,%xmm11
+ vpxor %xmm15,%xmm10,%xmm10
+ vpaddd %xmm5,%xmm12,%xmm14
+ vpshufb %xmm0,%xmm12,%xmm12
+ vpxor %xmm15,%xmm11,%xmm11
+ vpaddd %xmm5,%xmm13,%xmm1
+ vpshufb %xmm0,%xmm13,%xmm13
+ vpshufb %xmm0,%xmm14,%xmm14
+ vpshufb %xmm0,%xmm1,%xmm1
+ jmp .Lresume_ctr32
+
+.align 32
+.Lenc_tail:
+ vaesenc %xmm15,%xmm9,%xmm9
+ vmovdqu %xmm7,16+8(%rsp)
+ vpalignr $8,%xmm4,%xmm4,%xmm8
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
+ vpxor 0(%rdi),%xmm1,%xmm2
+ vaesenc %xmm15,%xmm11,%xmm11
+ vpxor 16(%rdi),%xmm1,%xmm0
+ vaesenc %xmm15,%xmm12,%xmm12
+ vpxor 32(%rdi),%xmm1,%xmm5
+ vaesenc %xmm15,%xmm13,%xmm13
+ vpxor 48(%rdi),%xmm1,%xmm6
+ vaesenc %xmm15,%xmm14,%xmm14
+ vpxor 64(%rdi),%xmm1,%xmm7
+ vpxor 80(%rdi),%xmm1,%xmm3
+ vmovdqu (%r8),%xmm1
+
+ vaesenclast %xmm2,%xmm9,%xmm9
+ vmovdqu 32(%r11),%xmm2
+ vaesenclast %xmm0,%xmm10,%xmm10
+ vpaddb %xmm2,%xmm1,%xmm0
+ movq %r13,112+8(%rsp)
+ leaq 96(%rdi),%rdi
+ vaesenclast %xmm5,%xmm11,%xmm11
+ vpaddb %xmm2,%xmm0,%xmm5
+ movq %r12,120+8(%rsp)
+ leaq 96(%rsi),%rsi
+ vmovdqu 0-128(%rcx),%xmm15
+ vaesenclast %xmm6,%xmm12,%xmm12
+ vpaddb %xmm2,%xmm5,%xmm6
+ vaesenclast %xmm7,%xmm13,%xmm13
+ vpaddb %xmm2,%xmm6,%xmm7
+ vaesenclast %xmm3,%xmm14,%xmm14
+ vpaddb %xmm2,%xmm7,%xmm3
+
+ addq $0x60,%r10
+ subq $0x6,%rdx
+ jc .L6x_done
+
+ vmovups %xmm9,-96(%rsi)
+ vpxor %xmm15,%xmm1,%xmm9
+ vmovups %xmm10,-80(%rsi)
+ vmovdqa %xmm0,%xmm10
+ vmovups %xmm11,-64(%rsi)
+ vmovdqa %xmm5,%xmm11
+ vmovups %xmm12,-48(%rsi)
+ vmovdqa %xmm6,%xmm12
+ vmovups %xmm13,-32(%rsi)
+ vmovdqa %xmm7,%xmm13
+ vmovups %xmm14,-16(%rsi)
+ vmovdqa %xmm3,%xmm14
+ vmovdqu 32+8(%rsp),%xmm7
+ jmp .Loop6x
+
+.L6x_done:
+ vpxor 16+8(%rsp),%xmm8,%xmm8
+ vpxor %xmm4,%xmm8,%xmm8
+
+ .byte 0xf3,0xc3
+.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+.globl aesni_gcm_decrypt
+.type aesni_gcm_decrypt, at function
+.align 32
+aesni_gcm_decrypt:
+ xorq %r10,%r10
+ cmpq $0x60,%rdx
+ jb .Lgcm_dec_abort
+
+ leaq (%rsp),%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ vzeroupper
+
+ vmovdqu (%r8),%xmm1
+ addq $-128,%rsp
+ movl 12(%r8),%ebx
+ leaq .Lbswap_mask(%rip),%r11
+ leaq -128(%rcx),%r14
+ movq $0xf80,%r15
+ vmovdqu (%r9),%xmm8
+ andq $-128,%rsp
+ vmovdqu (%r11),%xmm0
+ leaq 128(%rcx),%rcx
+ leaq 32+32(%r9),%r9
+ movl 240-128(%rcx),%ebp
+ vpshufb %xmm0,%xmm8,%xmm8
+
+ andq %r15,%r14
+ andq %rsp,%r15
+ subq %r14,%r15
+ jc .Ldec_no_key_aliasing
+ cmpq $768,%r15
+ jnc .Ldec_no_key_aliasing
+ subq %r15,%rsp
+.Ldec_no_key_aliasing:
+
+ vmovdqu 80(%rdi),%xmm7
+ leaq (%rdi),%r14
+ vmovdqu 64(%rdi),%xmm4
+ leaq -192(%rdi,%rdx,1),%r15
+ vmovdqu 48(%rdi),%xmm5
+ shrq $4,%rdx
+ xorq %r10,%r10
+ vmovdqu 32(%rdi),%xmm6
+ vpshufb %xmm0,%xmm7,%xmm7
+ vmovdqu 16(%rdi),%xmm2
+ vpshufb %xmm0,%xmm4,%xmm4
+ vmovdqu (%rdi),%xmm3
+ vpshufb %xmm0,%xmm5,%xmm5
+ vmovdqu %xmm4,48(%rsp)
+ vpshufb %xmm0,%xmm6,%xmm6
+ vmovdqu %xmm5,64(%rsp)
+ vpshufb %xmm0,%xmm2,%xmm2
+ vmovdqu %xmm6,80(%rsp)
+ vpshufb %xmm0,%xmm3,%xmm3
+ vmovdqu %xmm2,96(%rsp)
+ vmovdqu %xmm3,112(%rsp)
+
+ call _aesni_ctr32_ghash_6x
+
+ vmovups %xmm9,-96(%rsi)
+ vmovups %xmm10,-80(%rsi)
+ vmovups %xmm11,-64(%rsi)
+ vmovups %xmm12,-48(%rsi)
+ vmovups %xmm13,-32(%rsi)
+ vmovups %xmm14,-16(%rsi)
+
+ vpshufb (%r11),%xmm8,%xmm8
+ vmovdqu %xmm8,-64(%r9)
+
+ vzeroupper
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lgcm_dec_abort:
+ movq %r10,%rax
+ .byte 0xf3,0xc3
+.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
+.type _aesni_ctr32_6x, at function
+.align 32
+_aesni_ctr32_6x:
+ vmovdqu 0-128(%rcx),%xmm4
+ vmovdqu 32(%r11),%xmm2
+ leaq -1(%rbp),%r13
+ vmovups 16-128(%rcx),%xmm15
+ leaq 32-128(%rcx),%r12
+ vpxor %xmm4,%xmm1,%xmm9
+ addl $100663296,%ebx
+ jc .Lhandle_ctr32_2
+ vpaddb %xmm2,%xmm1,%xmm10
+ vpaddb %xmm2,%xmm10,%xmm11
+ vpxor %xmm4,%xmm10,%xmm10
+ vpaddb %xmm2,%xmm11,%xmm12
+ vpxor %xmm4,%xmm11,%xmm11
+ vpaddb %xmm2,%xmm12,%xmm13
+ vpxor %xmm4,%xmm12,%xmm12
+ vpaddb %xmm2,%xmm13,%xmm14
+ vpxor %xmm4,%xmm13,%xmm13
+ vpaddb %xmm2,%xmm14,%xmm1
+ vpxor %xmm4,%xmm14,%xmm14
+ jmp .Loop_ctr32
+
+.align 16
+.Loop_ctr32:
+ vaesenc %xmm15,%xmm9,%xmm9
+ vaesenc %xmm15,%xmm10,%xmm10
+ vaesenc %xmm15,%xmm11,%xmm11
+ vaesenc %xmm15,%xmm12,%xmm12
+ vaesenc %xmm15,%xmm13,%xmm13
+ vaesenc %xmm15,%xmm14,%xmm14
+ vmovups (%r12),%xmm15
+ leaq 16(%r12),%r12
+ decl %r13d
+ jnz .Loop_ctr32
+
+ vmovdqu (%r12),%xmm3
+ vaesenc %xmm15,%xmm9,%xmm9
+ vpxor 0(%rdi),%xmm3,%xmm4
+ vaesenc %xmm15,%xmm10,%xmm10
+ vpxor 16(%rdi),%xmm3,%xmm5
+ vaesenc %xmm15,%xmm11,%xmm11
+ vpxor 32(%rdi),%xmm3,%xmm6
+ vaesenc %xmm15,%xmm12,%xmm12
+ vpxor 48(%rdi),%xmm3,%xmm8
+ vaesenc %xmm15,%xmm13,%xmm13
+ vpxor 64(%rdi),%xmm3,%xmm2
+ vaesenc %xmm15,%xmm14,%xmm14
+ vpxor 80(%rdi),%xmm3,%xmm3
+ leaq 96(%rdi),%rdi
+
+ vaesenclast %xmm4,%xmm9,%xmm9
+ vaesenclast %xmm5,%xmm10,%xmm10
+ vaesenclast %xmm6,%xmm11,%xmm11
+ vaesenclast %xmm8,%xmm12,%xmm12
+ vaesenclast %xmm2,%xmm13,%xmm13
+ vaesenclast %xmm3,%xmm14,%xmm14
+ vmovups %xmm9,0(%rsi)
+ vmovups %xmm10,16(%rsi)
+ vmovups %xmm11,32(%rsi)
+ vmovups %xmm12,48(%rsi)
+ vmovups %xmm13,64(%rsi)
+ vmovups %xmm14,80(%rsi)
+ leaq 96(%rsi),%rsi
+
+ .byte 0xf3,0xc3
+.align 32
+.Lhandle_ctr32_2:
+ vpshufb %xmm0,%xmm1,%xmm6
+ vmovdqu 48(%r11),%xmm5
+ vpaddd 64(%r11),%xmm6,%xmm10
+ vpaddd %xmm5,%xmm6,%xmm11
+ vpaddd %xmm5,%xmm10,%xmm12
+ vpshufb %xmm0,%xmm10,%xmm10
+ vpaddd %xmm5,%xmm11,%xmm13
+ vpshufb %xmm0,%xmm11,%xmm11
+ vpxor %xmm4,%xmm10,%xmm10
+ vpaddd %xmm5,%xmm12,%xmm14
+ vpshufb %xmm0,%xmm12,%xmm12
+ vpxor %xmm4,%xmm11,%xmm11
+ vpaddd %xmm5,%xmm13,%xmm1
+ vpshufb %xmm0,%xmm13,%xmm13
+ vpxor %xmm4,%xmm12,%xmm12
+ vpshufb %xmm0,%xmm14,%xmm14
+ vpxor %xmm4,%xmm13,%xmm13
+ vpshufb %xmm0,%xmm1,%xmm1
+ vpxor %xmm4,%xmm14,%xmm14
+ jmp .Loop_ctr32
+.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
+
+.globl aesni_gcm_encrypt
+.type aesni_gcm_encrypt, at function
+.align 32
+aesni_gcm_encrypt:
+ xorq %r10,%r10
+ cmpq $288,%rdx
+ jb .Lgcm_enc_abort
+
+ leaq (%rsp),%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ vzeroupper
+
+ vmovdqu (%r8),%xmm1
+ addq $-128,%rsp
+ movl 12(%r8),%ebx
+ leaq .Lbswap_mask(%rip),%r11
+ leaq -128(%rcx),%r14
+ movq $0xf80,%r15
+ leaq 128(%rcx),%rcx
+ vmovdqu (%r11),%xmm0
+ andq $-128,%rsp
+ movl 240-128(%rcx),%ebp
+
+ andq %r15,%r14
+ andq %rsp,%r15
+ subq %r14,%r15
+ jc .Lenc_no_key_aliasing
+ cmpq $768,%r15
+ jnc .Lenc_no_key_aliasing
+ subq %r15,%rsp
+.Lenc_no_key_aliasing:
+
+ leaq (%rsi),%r14
+ leaq -192(%rsi,%rdx,1),%r15
+ shrq $4,%rdx
+
+ call _aesni_ctr32_6x
+ vpshufb %xmm0,%xmm9,%xmm8
+ vpshufb %xmm0,%xmm10,%xmm2
+ vmovdqu %xmm8,112(%rsp)
+ vpshufb %xmm0,%xmm11,%xmm4
+ vmovdqu %xmm2,96(%rsp)
+ vpshufb %xmm0,%xmm12,%xmm5
+ vmovdqu %xmm4,80(%rsp)
+ vpshufb %xmm0,%xmm13,%xmm6
+ vmovdqu %xmm5,64(%rsp)
+ vpshufb %xmm0,%xmm14,%xmm7
+ vmovdqu %xmm6,48(%rsp)
+
+ call _aesni_ctr32_6x
+
+ vmovdqu (%r9),%xmm8
+ leaq 32+32(%r9),%r9
+ subq $12,%rdx
+ movq $192,%r10
+ vpshufb %xmm0,%xmm8,%xmm8
+
+ call _aesni_ctr32_ghash_6x
+ vmovdqu 32(%rsp),%xmm7
+ vmovdqu (%r11),%xmm0
+ vmovdqu 0-32(%r9),%xmm3
+ vpunpckhqdq %xmm7,%xmm7,%xmm1
+ vmovdqu 32-32(%r9),%xmm15
+ vmovups %xmm9,-96(%rsi)
+ vpshufb %xmm0,%xmm9,%xmm9
+ vpxor %xmm7,%xmm1,%xmm1
+ vmovups %xmm10,-80(%rsi)
+ vpshufb %xmm0,%xmm10,%xmm10
+ vmovups %xmm11,-64(%rsi)
+ vpshufb %xmm0,%xmm11,%xmm11
+ vmovups %xmm12,-48(%rsi)
+ vpshufb %xmm0,%xmm12,%xmm12
+ vmovups %xmm13,-32(%rsi)
+ vpshufb %xmm0,%xmm13,%xmm13
+ vmovups %xmm14,-16(%rsi)
+ vpshufb %xmm0,%xmm14,%xmm14
+ vmovdqu %xmm9,16(%rsp)
+ vmovdqu 48(%rsp),%xmm6
+ vmovdqu 16-32(%r9),%xmm0
+ vpunpckhqdq %xmm6,%xmm6,%xmm2
+ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
+ vpxor %xmm6,%xmm2,%xmm2
+ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
+ vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
+
+ vmovdqu 64(%rsp),%xmm9
+ vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
+ vmovdqu 48-32(%r9),%xmm3
+ vpxor %xmm5,%xmm4,%xmm4
+ vpunpckhqdq %xmm9,%xmm9,%xmm5
+ vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
+ vpxor %xmm9,%xmm5,%xmm5
+ vpxor %xmm7,%xmm6,%xmm6
+ vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
+ vmovdqu 80-32(%r9),%xmm15
+ vpxor %xmm1,%xmm2,%xmm2
+
+ vmovdqu 80(%rsp),%xmm1
+ vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
+ vmovdqu 64-32(%r9),%xmm0
+ vpxor %xmm4,%xmm7,%xmm7
+ vpunpckhqdq %xmm1,%xmm1,%xmm4
+ vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
+ vpxor %xmm1,%xmm4,%xmm4
+ vpxor %xmm6,%xmm9,%xmm9
+ vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
+ vpxor %xmm2,%xmm5,%xmm5
+
+ vmovdqu 96(%rsp),%xmm2
+ vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
+ vmovdqu 96-32(%r9),%xmm3
+ vpxor %xmm7,%xmm6,%xmm6
+ vpunpckhqdq %xmm2,%xmm2,%xmm7
+ vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpxor %xmm9,%xmm1,%xmm1
+ vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
+ vmovdqu 128-32(%r9),%xmm15
+ vpxor %xmm5,%xmm4,%xmm4
+
+ vpxor 112(%rsp),%xmm8,%xmm8
+ vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
+ vmovdqu 112-32(%r9),%xmm0
+ vpunpckhqdq %xmm8,%xmm8,%xmm9
+ vpxor %xmm6,%xmm5,%xmm5
+ vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
+ vpxor %xmm8,%xmm9,%xmm9
+ vpxor %xmm1,%xmm2,%xmm2
+ vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
+ vpxor %xmm4,%xmm7,%xmm4
+
+ vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
+ vmovdqu 0-32(%r9),%xmm3
+ vpunpckhqdq %xmm14,%xmm14,%xmm1
+ vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
+ vpxor %xmm14,%xmm1,%xmm1
+ vpxor %xmm5,%xmm6,%xmm5
+ vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
+ vmovdqu 32-32(%r9),%xmm15
+ vpxor %xmm2,%xmm8,%xmm7
+ vpxor %xmm4,%xmm9,%xmm6
+
+ vmovdqu 16-32(%r9),%xmm0
+ vpxor %xmm5,%xmm7,%xmm9
+ vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
+ vpxor %xmm9,%xmm6,%xmm6
+ vpunpckhqdq %xmm13,%xmm13,%xmm2
+ vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
+ vpxor %xmm13,%xmm2,%xmm2
+ vpslldq $8,%xmm6,%xmm9
+ vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
+ vpxor %xmm9,%xmm5,%xmm8
+ vpsrldq $8,%xmm6,%xmm6
+ vpxor %xmm6,%xmm7,%xmm7
+
+ vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
+ vmovdqu 48-32(%r9),%xmm3
+ vpxor %xmm4,%xmm5,%xmm5
+ vpunpckhqdq %xmm12,%xmm12,%xmm9
+ vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
+ vpxor %xmm12,%xmm9,%xmm9
+ vpxor %xmm14,%xmm13,%xmm13
+ vpalignr $8,%xmm8,%xmm8,%xmm14
+ vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
+ vmovdqu 80-32(%r9),%xmm15
+ vpxor %xmm1,%xmm2,%xmm2
+
+ vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
+ vmovdqu 64-32(%r9),%xmm0
+ vpxor %xmm5,%xmm4,%xmm4
+ vpunpckhqdq %xmm11,%xmm11,%xmm1
+ vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
+ vpxor %xmm11,%xmm1,%xmm1
+ vpxor %xmm13,%xmm12,%xmm12
+ vxorps 16(%rsp),%xmm7,%xmm7
+ vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
+ vpxor %xmm2,%xmm9,%xmm9
+
+ vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
+ vxorps %xmm14,%xmm8,%xmm8
+
+ vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
+ vmovdqu 96-32(%r9),%xmm3
+ vpxor %xmm4,%xmm5,%xmm5
+ vpunpckhqdq %xmm10,%xmm10,%xmm2
+ vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
+ vpxor %xmm10,%xmm2,%xmm2
+ vpalignr $8,%xmm8,%xmm8,%xmm14
+ vpxor %xmm12,%xmm11,%xmm11
+ vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
+ vmovdqu 128-32(%r9),%xmm15
+ vpxor %xmm9,%xmm1,%xmm1
+
+ vxorps %xmm7,%xmm14,%xmm14
+ vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
+ vxorps %xmm14,%xmm8,%xmm8
+
+ vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
+ vmovdqu 112-32(%r9),%xmm0
+ vpxor %xmm5,%xmm4,%xmm4
+ vpunpckhqdq %xmm8,%xmm8,%xmm9
+ vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
+ vpxor %xmm8,%xmm9,%xmm9
+ vpxor %xmm11,%xmm10,%xmm10
+ vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
+ vpxor %xmm1,%xmm2,%xmm2
+
+ vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
+ vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
+ vpxor %xmm4,%xmm5,%xmm5
+ vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
+ vpxor %xmm10,%xmm7,%xmm7
+ vpxor %xmm2,%xmm6,%xmm6
+
+ vpxor %xmm5,%xmm7,%xmm4
+ vpxor %xmm4,%xmm6,%xmm6
+ vpslldq $8,%xmm6,%xmm1
+ vmovdqu 16(%r11),%xmm3
+ vpsrldq $8,%xmm6,%xmm6
+ vpxor %xmm1,%xmm5,%xmm8
+ vpxor %xmm6,%xmm7,%xmm7
+
+ vpalignr $8,%xmm8,%xmm8,%xmm2
+ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
+ vpxor %xmm2,%xmm8,%xmm8
+
+ vpalignr $8,%xmm8,%xmm8,%xmm2
+ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
+ vpxor %xmm7,%xmm2,%xmm2
+ vpxor %xmm2,%xmm8,%xmm8
+ vpshufb (%r11),%xmm8,%xmm8
+ vmovdqu %xmm8,-64(%r9)
+
+ vzeroupper
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lgcm_enc_abort:
+ movq %r10,%rax
+ .byte 0xf3,0xc3
+.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lpoly:
+.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.Lone_msb:
+.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+.Ltwo_lsb:
+.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lone_lsb:
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
Property changes on: trunk/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,1438 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. */
+.text
+
+
+
+.globl aesni_multi_cbc_encrypt
+.type aesni_multi_cbc_encrypt, at function
+.align 32
+aesni_multi_cbc_encrypt:
+ cmpl $2,%edx
+ jb .Lenc_non_avx
+ movl OPENSSL_ia32cap_P+4(%rip),%ecx
+ testl $268435456,%ecx
+ jnz _avx_cbc_enc_shortcut
+ jmp .Lenc_non_avx
+.align 16
+.Lenc_non_avx:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+
+
+
+
+ subq $48,%rsp
+ andq $-64,%rsp
+ movq %rax,16(%rsp)
+
+.Lenc4x_body:
+ movdqu (%rsi),%xmm12
+ leaq 120(%rsi),%rsi
+ leaq 80(%rdi),%rdi
+
+.Lenc4x_loop_grande:
+ movl %edx,24(%rsp)
+ xorl %edx,%edx
+ movl -64(%rdi),%ecx
+ movq -80(%rdi),%r8
+ cmpl %edx,%ecx
+ movq -72(%rdi),%r12
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu -56(%rdi),%xmm2
+ movl %ecx,32(%rsp)
+ cmovleq %rsp,%r8
+ movl -24(%rdi),%ecx
+ movq -40(%rdi),%r9
+ cmpl %edx,%ecx
+ movq -32(%rdi),%r13
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu -16(%rdi),%xmm3
+ movl %ecx,36(%rsp)
+ cmovleq %rsp,%r9
+ movl 16(%rdi),%ecx
+ movq 0(%rdi),%r10
+ cmpl %edx,%ecx
+ movq 8(%rdi),%r14
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu 24(%rdi),%xmm4
+ movl %ecx,40(%rsp)
+ cmovleq %rsp,%r10
+ movl 56(%rdi),%ecx
+ movq 40(%rdi),%r11
+ cmpl %edx,%ecx
+ movq 48(%rdi),%r15
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu 64(%rdi),%xmm5
+ movl %ecx,44(%rsp)
+ cmovleq %rsp,%r11
+ testl %edx,%edx
+ jz .Lenc4x_done
+
+ movups 16-120(%rsi),%xmm1
+ pxor %xmm12,%xmm2
+ movups 32-120(%rsi),%xmm0
+ pxor %xmm12,%xmm3
+ movl 240-120(%rsi),%eax
+ pxor %xmm12,%xmm4
+ movdqu (%r8),%xmm6
+ pxor %xmm12,%xmm5
+ movdqu (%r9),%xmm7
+ pxor %xmm6,%xmm2
+ movdqu (%r10),%xmm8
+ pxor %xmm7,%xmm3
+ movdqu (%r11),%xmm9
+ pxor %xmm8,%xmm4
+ pxor %xmm9,%xmm5
+ movdqa 32(%rsp),%xmm10
+ xorq %rbx,%rbx
+ jmp .Loop_enc4x
+
+.align 32
+.Loop_enc4x:
+ addq $16,%rbx
+ leaq 16(%rsp),%rbp
+ movl $1,%ecx
+ subq %rbx,%rbp
+
+.byte 102,15,56,220,209
+ prefetcht0 31(%r8,%rbx,1)
+ prefetcht0 31(%r9,%rbx,1)
+.byte 102,15,56,220,217
+ prefetcht0 31(%r10,%rbx,1)
+ prefetcht0 31(%r10,%rbx,1)
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 48-120(%rsi),%xmm1
+ cmpl 32(%rsp),%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+ cmovgeq %rbp,%r8
+ cmovgq %rbp,%r12
+.byte 102,15,56,220,232
+ movups -56(%rsi),%xmm0
+ cmpl 36(%rsp),%ecx
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+ cmovgeq %rbp,%r9
+ cmovgq %rbp,%r13
+.byte 102,15,56,220,233
+ movups -40(%rsi),%xmm1
+ cmpl 40(%rsp),%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+ cmovgeq %rbp,%r10
+ cmovgq %rbp,%r14
+.byte 102,15,56,220,232
+ movups -24(%rsi),%xmm0
+ cmpl 44(%rsp),%ecx
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+ cmovgeq %rbp,%r11
+ cmovgq %rbp,%r15
+.byte 102,15,56,220,233
+ movups -8(%rsi),%xmm1
+ movdqa %xmm10,%xmm11
+.byte 102,15,56,220,208
+ prefetcht0 15(%r12,%rbx,1)
+ prefetcht0 15(%r13,%rbx,1)
+.byte 102,15,56,220,216
+ prefetcht0 15(%r14,%rbx,1)
+ prefetcht0 15(%r15,%rbx,1)
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 128-120(%rsi),%xmm0
+ pxor %xmm12,%xmm12
+
+.byte 102,15,56,220,209
+ pcmpgtd %xmm12,%xmm11
+ movdqu -120(%rsi),%xmm12
+.byte 102,15,56,220,217
+ paddd %xmm11,%xmm10
+ movdqa %xmm10,32(%rsp)
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 144-120(%rsi),%xmm1
+
+ cmpl $11,%eax
+
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 160-120(%rsi),%xmm0
+
+ jb .Lenc4x_tail
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 176-120(%rsi),%xmm1
+
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 192-120(%rsi),%xmm0
+
+ je .Lenc4x_tail
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 208-120(%rsi),%xmm1
+
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups 224-120(%rsi),%xmm0
+ jmp .Lenc4x_tail
+
+.align 32
+.Lenc4x_tail:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movdqu (%r8,%rbx,1),%xmm6
+ movdqu 16-120(%rsi),%xmm1
+
+.byte 102,15,56,221,208
+ movdqu (%r9,%rbx,1),%xmm7
+ pxor %xmm12,%xmm6
+.byte 102,15,56,221,216
+ movdqu (%r10,%rbx,1),%xmm8
+ pxor %xmm12,%xmm7
+.byte 102,15,56,221,224
+ movdqu (%r11,%rbx,1),%xmm9
+ pxor %xmm12,%xmm8
+.byte 102,15,56,221,232
+ movdqu 32-120(%rsi),%xmm0
+ pxor %xmm12,%xmm9
+
+ movups %xmm2,-16(%r12,%rbx,1)
+ pxor %xmm6,%xmm2
+ movups %xmm3,-16(%r13,%rbx,1)
+ pxor %xmm7,%xmm3
+ movups %xmm4,-16(%r14,%rbx,1)
+ pxor %xmm8,%xmm4
+ movups %xmm5,-16(%r15,%rbx,1)
+ pxor %xmm9,%xmm5
+
+ decl %edx
+ jnz .Loop_enc4x
+
+ movq 16(%rsp),%rax
+ movl 24(%rsp),%edx
+
+
+
+
+
+
+
+
+
+
+ leaq 160(%rdi),%rdi
+ decl %edx
+ jnz .Lenc4x_loop_grande
+
+.Lenc4x_done:
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lenc4x_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
+
+.globl aesni_multi_cbc_decrypt
+.type aesni_multi_cbc_decrypt, at function
+.align 32
+aesni_multi_cbc_decrypt:
+ cmpl $2,%edx
+ jb .Ldec_non_avx
+ movl OPENSSL_ia32cap_P+4(%rip),%ecx
+ testl $268435456,%ecx
+ jnz _avx_cbc_dec_shortcut
+ jmp .Ldec_non_avx
+.align 16
+.Ldec_non_avx:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+
+
+
+
+ subq $48,%rsp
+ andq $-64,%rsp
+ movq %rax,16(%rsp)
+
+.Ldec4x_body:
+ movdqu (%rsi),%xmm12
+ leaq 120(%rsi),%rsi
+ leaq 80(%rdi),%rdi
+
+.Ldec4x_loop_grande:
+ movl %edx,24(%rsp)
+ xorl %edx,%edx
+ movl -64(%rdi),%ecx
+ movq -80(%rdi),%r8
+ cmpl %edx,%ecx
+ movq -72(%rdi),%r12
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu -56(%rdi),%xmm6
+ movl %ecx,32(%rsp)
+ cmovleq %rsp,%r8
+ movl -24(%rdi),%ecx
+ movq -40(%rdi),%r9
+ cmpl %edx,%ecx
+ movq -32(%rdi),%r13
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu -16(%rdi),%xmm7
+ movl %ecx,36(%rsp)
+ cmovleq %rsp,%r9
+ movl 16(%rdi),%ecx
+ movq 0(%rdi),%r10
+ cmpl %edx,%ecx
+ movq 8(%rdi),%r14
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu 24(%rdi),%xmm8
+ movl %ecx,40(%rsp)
+ cmovleq %rsp,%r10
+ movl 56(%rdi),%ecx
+ movq 40(%rdi),%r11
+ cmpl %edx,%ecx
+ movq 48(%rdi),%r15
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movdqu 64(%rdi),%xmm9
+ movl %ecx,44(%rsp)
+ cmovleq %rsp,%r11
+ testl %edx,%edx
+ jz .Ldec4x_done
+
+ movups 16-120(%rsi),%xmm1
+ movups 32-120(%rsi),%xmm0
+ movl 240-120(%rsi),%eax
+ movdqu (%r8),%xmm2
+ movdqu (%r9),%xmm3
+ pxor %xmm12,%xmm2
+ movdqu (%r10),%xmm4
+ pxor %xmm12,%xmm3
+ movdqu (%r11),%xmm5
+ pxor %xmm12,%xmm4
+ pxor %xmm12,%xmm5
+ movdqa 32(%rsp),%xmm10
+ xorq %rbx,%rbx
+ jmp .Loop_dec4x
+
+.align 32
+.Loop_dec4x:
+ addq $16,%rbx
+ leaq 16(%rsp),%rbp
+ movl $1,%ecx
+ subq %rbx,%rbp
+
+.byte 102,15,56,222,209
+ prefetcht0 31(%r8,%rbx,1)
+ prefetcht0 31(%r9,%rbx,1)
+.byte 102,15,56,222,217
+ prefetcht0 31(%r10,%rbx,1)
+ prefetcht0 31(%r11,%rbx,1)
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 48-120(%rsi),%xmm1
+ cmpl 32(%rsp),%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+ cmovgeq %rbp,%r8
+ cmovgq %rbp,%r12
+.byte 102,15,56,222,232
+ movups -56(%rsi),%xmm0
+ cmpl 36(%rsp),%ecx
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+ cmovgeq %rbp,%r9
+ cmovgq %rbp,%r13
+.byte 102,15,56,222,233
+ movups -40(%rsi),%xmm1
+ cmpl 40(%rsp),%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+ cmovgeq %rbp,%r10
+ cmovgq %rbp,%r14
+.byte 102,15,56,222,232
+ movups -24(%rsi),%xmm0
+ cmpl 44(%rsp),%ecx
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+ cmovgeq %rbp,%r11
+ cmovgq %rbp,%r15
+.byte 102,15,56,222,233
+ movups -8(%rsi),%xmm1
+ movdqa %xmm10,%xmm11
+.byte 102,15,56,222,208
+ prefetcht0 15(%r12,%rbx,1)
+ prefetcht0 15(%r13,%rbx,1)
+.byte 102,15,56,222,216
+ prefetcht0 15(%r14,%rbx,1)
+ prefetcht0 15(%r15,%rbx,1)
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups 128-120(%rsi),%xmm0
+ pxor %xmm12,%xmm12
+
+.byte 102,15,56,222,209
+ pcmpgtd %xmm12,%xmm11
+ movdqu -120(%rsi),%xmm12
+.byte 102,15,56,222,217
+ paddd %xmm11,%xmm10
+ movdqa %xmm10,32(%rsp)
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 144-120(%rsi),%xmm1
+
+ cmpl $11,%eax
+
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups 160-120(%rsi),%xmm0
+
+ jb .Ldec4x_tail
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 176-120(%rsi),%xmm1
+
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups 192-120(%rsi),%xmm0
+
+ je .Ldec4x_tail
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 208-120(%rsi),%xmm1
+
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups 224-120(%rsi),%xmm0
+ jmp .Ldec4x_tail
+
+.align 32
+.Ldec4x_tail:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
+.byte 102,15,56,222,233
+ movdqu 16-120(%rsi),%xmm1
+ pxor %xmm0,%xmm8
+ pxor %xmm0,%xmm9
+ movdqu 32-120(%rsi),%xmm0
+
+.byte 102,15,56,223,214
+.byte 102,15,56,223,223
+ movdqu -16(%r8,%rbx,1),%xmm6
+ movdqu -16(%r9,%rbx,1),%xmm7
+.byte 102,65,15,56,223,224
+.byte 102,65,15,56,223,233
+ movdqu -16(%r10,%rbx,1),%xmm8
+ movdqu -16(%r11,%rbx,1),%xmm9
+
+ movups %xmm2,-16(%r12,%rbx,1)
+ movdqu (%r8,%rbx,1),%xmm2
+ movups %xmm3,-16(%r13,%rbx,1)
+ movdqu (%r9,%rbx,1),%xmm3
+ pxor %xmm12,%xmm2
+ movups %xmm4,-16(%r14,%rbx,1)
+ movdqu (%r10,%rbx,1),%xmm4
+ pxor %xmm12,%xmm3
+ movups %xmm5,-16(%r15,%rbx,1)
+ movdqu (%r11,%rbx,1),%xmm5
+ pxor %xmm12,%xmm4
+ pxor %xmm12,%xmm5
+
+ decl %edx
+ jnz .Loop_dec4x
+
+ movq 16(%rsp),%rax
+ movl 24(%rsp),%edx
+
+ leaq 160(%rdi),%rdi
+ decl %edx
+ jnz .Ldec4x_loop_grande
+
+.Ldec4x_done:
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Ldec4x_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
+.type aesni_multi_cbc_encrypt_avx, at function
+.align 32
+aesni_multi_cbc_encrypt_avx:
+_avx_cbc_enc_shortcut:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+
+
+
+
+
+
+ subq $192,%rsp
+ andq $-128,%rsp
+ movq %rax,16(%rsp)
+
+.Lenc8x_body:
+ vzeroupper
+ vmovdqu (%rsi),%xmm15
+ leaq 120(%rsi),%rsi
+ leaq 160(%rdi),%rdi
+ shrl $1,%edx
+
+.Lenc8x_loop_grande:
+
+ xorl %edx,%edx
+ movl -144(%rdi),%ecx
+ movq -160(%rdi),%r8
+ cmpl %edx,%ecx
+ movq -152(%rdi),%rbx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -136(%rdi),%xmm2
+ movl %ecx,32(%rsp)
+ cmovleq %rsp,%r8
+ subq %r8,%rbx
+ movq %rbx,64(%rsp)
+ movl -104(%rdi),%ecx
+ movq -120(%rdi),%r9
+ cmpl %edx,%ecx
+ movq -112(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -96(%rdi),%xmm3
+ movl %ecx,36(%rsp)
+ cmovleq %rsp,%r9
+ subq %r9,%rbp
+ movq %rbp,72(%rsp)
+ movl -64(%rdi),%ecx
+ movq -80(%rdi),%r10
+ cmpl %edx,%ecx
+ movq -72(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -56(%rdi),%xmm4
+ movl %ecx,40(%rsp)
+ cmovleq %rsp,%r10
+ subq %r10,%rbp
+ movq %rbp,80(%rsp)
+ movl -24(%rdi),%ecx
+ movq -40(%rdi),%r11
+ cmpl %edx,%ecx
+ movq -32(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -16(%rdi),%xmm5
+ movl %ecx,44(%rsp)
+ cmovleq %rsp,%r11
+ subq %r11,%rbp
+ movq %rbp,88(%rsp)
+ movl 16(%rdi),%ecx
+ movq 0(%rdi),%r12
+ cmpl %edx,%ecx
+ movq 8(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 24(%rdi),%xmm6
+ movl %ecx,48(%rsp)
+ cmovleq %rsp,%r12
+ subq %r12,%rbp
+ movq %rbp,96(%rsp)
+ movl 56(%rdi),%ecx
+ movq 40(%rdi),%r13
+ cmpl %edx,%ecx
+ movq 48(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 64(%rdi),%xmm7
+ movl %ecx,52(%rsp)
+ cmovleq %rsp,%r13
+ subq %r13,%rbp
+ movq %rbp,104(%rsp)
+ movl 96(%rdi),%ecx
+ movq 80(%rdi),%r14
+ cmpl %edx,%ecx
+ movq 88(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 104(%rdi),%xmm8
+ movl %ecx,56(%rsp)
+ cmovleq %rsp,%r14
+ subq %r14,%rbp
+ movq %rbp,112(%rsp)
+ movl 136(%rdi),%ecx
+ movq 120(%rdi),%r15
+ cmpl %edx,%ecx
+ movq 128(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 144(%rdi),%xmm9
+ movl %ecx,60(%rsp)
+ cmovleq %rsp,%r15
+ subq %r15,%rbp
+ movq %rbp,120(%rsp)
+ testl %edx,%edx
+ jz .Lenc8x_done
+
+ vmovups 16-120(%rsi),%xmm1
+ vmovups 32-120(%rsi),%xmm0
+ movl 240-120(%rsi),%eax
+
+ vpxor (%r8),%xmm15,%xmm10
+ leaq 128(%rsp),%rbp
+ vpxor (%r9),%xmm15,%xmm11
+ vpxor (%r10),%xmm15,%xmm12
+ vpxor (%r11),%xmm15,%xmm13
+ vpxor %xmm10,%xmm2,%xmm2
+ vpxor (%r12),%xmm15,%xmm10
+ vpxor %xmm11,%xmm3,%xmm3
+ vpxor (%r13),%xmm15,%xmm11
+ vpxor %xmm12,%xmm4,%xmm4
+ vpxor (%r14),%xmm15,%xmm12
+ vpxor %xmm13,%xmm5,%xmm5
+ vpxor (%r15),%xmm15,%xmm13
+ vpxor %xmm10,%xmm6,%xmm6
+ movl $1,%ecx
+ vpxor %xmm11,%xmm7,%xmm7
+ vpxor %xmm12,%xmm8,%xmm8
+ vpxor %xmm13,%xmm9,%xmm9
+ jmp .Loop_enc8x
+
+.align 32
+.Loop_enc8x:
+ vaesenc %xmm1,%xmm2,%xmm2
+ cmpl 32+0(%rsp),%ecx
+ vaesenc %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r8)
+ vaesenc %xmm1,%xmm4,%xmm4
+ vaesenc %xmm1,%xmm5,%xmm5
+ leaq (%r8,%rbx,1),%rbx
+ cmovgeq %rsp,%r8
+ vaesenc %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm1,%xmm7,%xmm7
+ subq %r8,%rbx
+ vaesenc %xmm1,%xmm8,%xmm8
+ vpxor 16(%r8),%xmm15,%xmm10
+ movq %rbx,64+0(%rsp)
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups -72(%rsi),%xmm1
+ leaq 16(%r8,%rbx,1),%r8
+ vmovdqu %xmm10,0(%rbp)
+ vaesenc %xmm0,%xmm2,%xmm2
+ cmpl 32+4(%rsp),%ecx
+ movq 64+8(%rsp),%rbx
+ vaesenc %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r9)
+ vaesenc %xmm0,%xmm4,%xmm4
+ vaesenc %xmm0,%xmm5,%xmm5
+ leaq (%r9,%rbx,1),%rbx
+ cmovgeq %rsp,%r9
+ vaesenc %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm0,%xmm7,%xmm7
+ subq %r9,%rbx
+ vaesenc %xmm0,%xmm8,%xmm8
+ vpxor 16(%r9),%xmm15,%xmm11
+ movq %rbx,64+8(%rsp)
+ vaesenc %xmm0,%xmm9,%xmm9
+ vmovups -56(%rsi),%xmm0
+ leaq 16(%r9,%rbx,1),%r9
+ vmovdqu %xmm11,16(%rbp)
+ vaesenc %xmm1,%xmm2,%xmm2
+ cmpl 32+8(%rsp),%ecx
+ movq 64+16(%rsp),%rbx
+ vaesenc %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r10)
+ vaesenc %xmm1,%xmm4,%xmm4
+ prefetcht0 15(%r8)
+ vaesenc %xmm1,%xmm5,%xmm5
+ leaq (%r10,%rbx,1),%rbx
+ cmovgeq %rsp,%r10
+ vaesenc %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm1,%xmm7,%xmm7
+ subq %r10,%rbx
+ vaesenc %xmm1,%xmm8,%xmm8
+ vpxor 16(%r10),%xmm15,%xmm12
+ movq %rbx,64+16(%rsp)
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups -40(%rsi),%xmm1
+ leaq 16(%r10,%rbx,1),%r10
+ vmovdqu %xmm12,32(%rbp)
+ vaesenc %xmm0,%xmm2,%xmm2
+ cmpl 32+12(%rsp),%ecx
+ movq 64+24(%rsp),%rbx
+ vaesenc %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r11)
+ vaesenc %xmm0,%xmm4,%xmm4
+ prefetcht0 15(%r9)
+ vaesenc %xmm0,%xmm5,%xmm5
+ leaq (%r11,%rbx,1),%rbx
+ cmovgeq %rsp,%r11
+ vaesenc %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm0,%xmm7,%xmm7
+ subq %r11,%rbx
+ vaesenc %xmm0,%xmm8,%xmm8
+ vpxor 16(%r11),%xmm15,%xmm13
+ movq %rbx,64+24(%rsp)
+ vaesenc %xmm0,%xmm9,%xmm9
+ vmovups -24(%rsi),%xmm0
+ leaq 16(%r11,%rbx,1),%r11
+ vmovdqu %xmm13,48(%rbp)
+ vaesenc %xmm1,%xmm2,%xmm2
+ cmpl 32+16(%rsp),%ecx
+ movq 64+32(%rsp),%rbx
+ vaesenc %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r12)
+ vaesenc %xmm1,%xmm4,%xmm4
+ prefetcht0 15(%r10)
+ vaesenc %xmm1,%xmm5,%xmm5
+ leaq (%r12,%rbx,1),%rbx
+ cmovgeq %rsp,%r12
+ vaesenc %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm1,%xmm7,%xmm7
+ subq %r12,%rbx
+ vaesenc %xmm1,%xmm8,%xmm8
+ vpxor 16(%r12),%xmm15,%xmm10
+ movq %rbx,64+32(%rsp)
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups -8(%rsi),%xmm1
+ leaq 16(%r12,%rbx,1),%r12
+ vaesenc %xmm0,%xmm2,%xmm2
+ cmpl 32+20(%rsp),%ecx
+ movq 64+40(%rsp),%rbx
+ vaesenc %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r13)
+ vaesenc %xmm0,%xmm4,%xmm4
+ prefetcht0 15(%r11)
+ vaesenc %xmm0,%xmm5,%xmm5
+ leaq (%rbx,%r13,1),%rbx
+ cmovgeq %rsp,%r13
+ vaesenc %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm0,%xmm7,%xmm7
+ subq %r13,%rbx
+ vaesenc %xmm0,%xmm8,%xmm8
+ vpxor 16(%r13),%xmm15,%xmm11
+ movq %rbx,64+40(%rsp)
+ vaesenc %xmm0,%xmm9,%xmm9
+ vmovups 8(%rsi),%xmm0
+ leaq 16(%r13,%rbx,1),%r13
+ vaesenc %xmm1,%xmm2,%xmm2
+ cmpl 32+24(%rsp),%ecx
+ movq 64+48(%rsp),%rbx
+ vaesenc %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r14)
+ vaesenc %xmm1,%xmm4,%xmm4
+ prefetcht0 15(%r12)
+ vaesenc %xmm1,%xmm5,%xmm5
+ leaq (%r14,%rbx,1),%rbx
+ cmovgeq %rsp,%r14
+ vaesenc %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm1,%xmm7,%xmm7
+ subq %r14,%rbx
+ vaesenc %xmm1,%xmm8,%xmm8
+ vpxor 16(%r14),%xmm15,%xmm12
+ movq %rbx,64+48(%rsp)
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups 24(%rsi),%xmm1
+ leaq 16(%r14,%rbx,1),%r14
+ vaesenc %xmm0,%xmm2,%xmm2
+ cmpl 32+28(%rsp),%ecx
+ movq 64+56(%rsp),%rbx
+ vaesenc %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r15)
+ vaesenc %xmm0,%xmm4,%xmm4
+ prefetcht0 15(%r13)
+ vaesenc %xmm0,%xmm5,%xmm5
+ leaq (%r15,%rbx,1),%rbx
+ cmovgeq %rsp,%r15
+ vaesenc %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesenc %xmm0,%xmm7,%xmm7
+ subq %r15,%rbx
+ vaesenc %xmm0,%xmm8,%xmm8
+ vpxor 16(%r15),%xmm15,%xmm13
+ movq %rbx,64+56(%rsp)
+ vaesenc %xmm0,%xmm9,%xmm9
+ vmovups 40(%rsi),%xmm0
+ leaq 16(%r15,%rbx,1),%r15
+ vmovdqu 32(%rsp),%xmm14
+ prefetcht0 15(%r14)
+ prefetcht0 15(%r15)
+ cmpl $11,%eax
+ jb .Lenc8x_tail
+
+ vaesenc %xmm1,%xmm2,%xmm2
+ vaesenc %xmm1,%xmm3,%xmm3
+ vaesenc %xmm1,%xmm4,%xmm4
+ vaesenc %xmm1,%xmm5,%xmm5
+ vaesenc %xmm1,%xmm6,%xmm6
+ vaesenc %xmm1,%xmm7,%xmm7
+ vaesenc %xmm1,%xmm8,%xmm8
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups 176-120(%rsi),%xmm1
+
+ vaesenc %xmm0,%xmm2,%xmm2
+ vaesenc %xmm0,%xmm3,%xmm3
+ vaesenc %xmm0,%xmm4,%xmm4
+ vaesenc %xmm0,%xmm5,%xmm5
+ vaesenc %xmm0,%xmm6,%xmm6
+ vaesenc %xmm0,%xmm7,%xmm7
+ vaesenc %xmm0,%xmm8,%xmm8
+ vaesenc %xmm0,%xmm9,%xmm9
+ vmovups 192-120(%rsi),%xmm0
+ je .Lenc8x_tail
+
+ vaesenc %xmm1,%xmm2,%xmm2
+ vaesenc %xmm1,%xmm3,%xmm3
+ vaesenc %xmm1,%xmm4,%xmm4
+ vaesenc %xmm1,%xmm5,%xmm5
+ vaesenc %xmm1,%xmm6,%xmm6
+ vaesenc %xmm1,%xmm7,%xmm7
+ vaesenc %xmm1,%xmm8,%xmm8
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups 208-120(%rsi),%xmm1
+
+ vaesenc %xmm0,%xmm2,%xmm2
+ vaesenc %xmm0,%xmm3,%xmm3
+ vaesenc %xmm0,%xmm4,%xmm4
+ vaesenc %xmm0,%xmm5,%xmm5
+ vaesenc %xmm0,%xmm6,%xmm6
+ vaesenc %xmm0,%xmm7,%xmm7
+ vaesenc %xmm0,%xmm8,%xmm8
+ vaesenc %xmm0,%xmm9,%xmm9
+ vmovups 224-120(%rsi),%xmm0
+
+.Lenc8x_tail:
+ vaesenc %xmm1,%xmm2,%xmm2
+ vpxor %xmm15,%xmm15,%xmm15
+ vaesenc %xmm1,%xmm3,%xmm3
+ vaesenc %xmm1,%xmm4,%xmm4
+ vpcmpgtd %xmm15,%xmm14,%xmm15
+ vaesenc %xmm1,%xmm5,%xmm5
+ vaesenc %xmm1,%xmm6,%xmm6
+ vpaddd %xmm14,%xmm15,%xmm15
+ vmovdqu 48(%rsp),%xmm14
+ vaesenc %xmm1,%xmm7,%xmm7
+ movq 64(%rsp),%rbx
+ vaesenc %xmm1,%xmm8,%xmm8
+ vaesenc %xmm1,%xmm9,%xmm9
+ vmovups 16-120(%rsi),%xmm1
+
+ vaesenclast %xmm0,%xmm2,%xmm2
+ vmovdqa %xmm15,32(%rsp)
+ vpxor %xmm15,%xmm15,%xmm15
+ vaesenclast %xmm0,%xmm3,%xmm3
+ vaesenclast %xmm0,%xmm4,%xmm4
+ vpcmpgtd %xmm15,%xmm14,%xmm15
+ vaesenclast %xmm0,%xmm5,%xmm5
+ vaesenclast %xmm0,%xmm6,%xmm6
+ vpaddd %xmm15,%xmm14,%xmm14
+ vmovdqu -120(%rsi),%xmm15
+ vaesenclast %xmm0,%xmm7,%xmm7
+ vaesenclast %xmm0,%xmm8,%xmm8
+ vmovdqa %xmm14,48(%rsp)
+ vaesenclast %xmm0,%xmm9,%xmm9
+ vmovups 32-120(%rsi),%xmm0
+
+ vmovups %xmm2,-16(%r8)
+ subq %rbx,%r8
+ vpxor 0(%rbp),%xmm2,%xmm2
+ vmovups %xmm3,-16(%r9)
+ subq 72(%rsp),%r9
+ vpxor 16(%rbp),%xmm3,%xmm3
+ vmovups %xmm4,-16(%r10)
+ subq 80(%rsp),%r10
+ vpxor 32(%rbp),%xmm4,%xmm4
+ vmovups %xmm5,-16(%r11)
+ subq 88(%rsp),%r11
+ vpxor 48(%rbp),%xmm5,%xmm5
+ vmovups %xmm6,-16(%r12)
+ subq 96(%rsp),%r12
+ vpxor %xmm10,%xmm6,%xmm6
+ vmovups %xmm7,-16(%r13)
+ subq 104(%rsp),%r13
+ vpxor %xmm11,%xmm7,%xmm7
+ vmovups %xmm8,-16(%r14)
+ subq 112(%rsp),%r14
+ vpxor %xmm12,%xmm8,%xmm8
+ vmovups %xmm9,-16(%r15)
+ subq 120(%rsp),%r15
+ vpxor %xmm13,%xmm9,%xmm9
+
+ decl %edx
+ jnz .Loop_enc8x
+
+ movq 16(%rsp),%rax
+
+
+
+
+
+.Lenc8x_done:
+ vzeroupper
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lenc8x_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
+
+.type aesni_multi_cbc_decrypt_avx, at function
+.align 32
+aesni_multi_cbc_decrypt_avx:
+_avx_cbc_dec_shortcut:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+
+
+
+
+
+
+
+ subq $256,%rsp
+ andq $-256,%rsp
+ subq $192,%rsp
+ movq %rax,16(%rsp)
+
+.Ldec8x_body:
+ vzeroupper
+ vmovdqu (%rsi),%xmm15
+ leaq 120(%rsi),%rsi
+ leaq 160(%rdi),%rdi
+ shrl $1,%edx
+
+.Ldec8x_loop_grande:
+
+ xorl %edx,%edx
+ movl -144(%rdi),%ecx
+ movq -160(%rdi),%r8
+ cmpl %edx,%ecx
+ movq -152(%rdi),%rbx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -136(%rdi),%xmm2
+ movl %ecx,32(%rsp)
+ cmovleq %rsp,%r8
+ subq %r8,%rbx
+ movq %rbx,64(%rsp)
+ vmovdqu %xmm2,192(%rsp)
+ movl -104(%rdi),%ecx
+ movq -120(%rdi),%r9
+ cmpl %edx,%ecx
+ movq -112(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -96(%rdi),%xmm3
+ movl %ecx,36(%rsp)
+ cmovleq %rsp,%r9
+ subq %r9,%rbp
+ movq %rbp,72(%rsp)
+ vmovdqu %xmm3,208(%rsp)
+ movl -64(%rdi),%ecx
+ movq -80(%rdi),%r10
+ cmpl %edx,%ecx
+ movq -72(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -56(%rdi),%xmm4
+ movl %ecx,40(%rsp)
+ cmovleq %rsp,%r10
+ subq %r10,%rbp
+ movq %rbp,80(%rsp)
+ vmovdqu %xmm4,224(%rsp)
+ movl -24(%rdi),%ecx
+ movq -40(%rdi),%r11
+ cmpl %edx,%ecx
+ movq -32(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu -16(%rdi),%xmm5
+ movl %ecx,44(%rsp)
+ cmovleq %rsp,%r11
+ subq %r11,%rbp
+ movq %rbp,88(%rsp)
+ vmovdqu %xmm5,240(%rsp)
+ movl 16(%rdi),%ecx
+ movq 0(%rdi),%r12
+ cmpl %edx,%ecx
+ movq 8(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 24(%rdi),%xmm6
+ movl %ecx,48(%rsp)
+ cmovleq %rsp,%r12
+ subq %r12,%rbp
+ movq %rbp,96(%rsp)
+ vmovdqu %xmm6,256(%rsp)
+ movl 56(%rdi),%ecx
+ movq 40(%rdi),%r13
+ cmpl %edx,%ecx
+ movq 48(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 64(%rdi),%xmm7
+ movl %ecx,52(%rsp)
+ cmovleq %rsp,%r13
+ subq %r13,%rbp
+ movq %rbp,104(%rsp)
+ vmovdqu %xmm7,272(%rsp)
+ movl 96(%rdi),%ecx
+ movq 80(%rdi),%r14
+ cmpl %edx,%ecx
+ movq 88(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 104(%rdi),%xmm8
+ movl %ecx,56(%rsp)
+ cmovleq %rsp,%r14
+ subq %r14,%rbp
+ movq %rbp,112(%rsp)
+ vmovdqu %xmm8,288(%rsp)
+ movl 136(%rdi),%ecx
+ movq 120(%rdi),%r15
+ cmpl %edx,%ecx
+ movq 128(%rdi),%rbp
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ vmovdqu 144(%rdi),%xmm9
+ movl %ecx,60(%rsp)
+ cmovleq %rsp,%r15
+ subq %r15,%rbp
+ movq %rbp,120(%rsp)
+ vmovdqu %xmm9,304(%rsp)
+ testl %edx,%edx
+ jz .Ldec8x_done
+
+ vmovups 16-120(%rsi),%xmm1
+ vmovups 32-120(%rsi),%xmm0
+ movl 240-120(%rsi),%eax
+ leaq 192+128(%rsp),%rbp
+
+ vmovdqu (%r8),%xmm2
+ vmovdqu (%r9),%xmm3
+ vmovdqu (%r10),%xmm4
+ vmovdqu (%r11),%xmm5
+ vmovdqu (%r12),%xmm6
+ vmovdqu (%r13),%xmm7
+ vmovdqu (%r14),%xmm8
+ vmovdqu (%r15),%xmm9
+ vmovdqu %xmm2,0(%rbp)
+ vpxor %xmm15,%xmm2,%xmm2
+ vmovdqu %xmm3,16(%rbp)
+ vpxor %xmm15,%xmm3,%xmm3
+ vmovdqu %xmm4,32(%rbp)
+ vpxor %xmm15,%xmm4,%xmm4
+ vmovdqu %xmm5,48(%rbp)
+ vpxor %xmm15,%xmm5,%xmm5
+ vmovdqu %xmm6,64(%rbp)
+ vpxor %xmm15,%xmm6,%xmm6
+ vmovdqu %xmm7,80(%rbp)
+ vpxor %xmm15,%xmm7,%xmm7
+ vmovdqu %xmm8,96(%rbp)
+ vpxor %xmm15,%xmm8,%xmm8
+ vmovdqu %xmm9,112(%rbp)
+ vpxor %xmm15,%xmm9,%xmm9
+ xorq $0x80,%rbp
+ movl $1,%ecx
+ jmp .Loop_dec8x
+
+.align 32
+.Loop_dec8x:
+ vaesdec %xmm1,%xmm2,%xmm2
+ cmpl 32+0(%rsp),%ecx
+ vaesdec %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r8)
+ vaesdec %xmm1,%xmm4,%xmm4
+ vaesdec %xmm1,%xmm5,%xmm5
+ leaq (%r8,%rbx,1),%rbx
+ cmovgeq %rsp,%r8
+ vaesdec %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm1,%xmm7,%xmm7
+ subq %r8,%rbx
+ vaesdec %xmm1,%xmm8,%xmm8
+ vmovdqu 16(%r8),%xmm10
+ movq %rbx,64+0(%rsp)
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups -72(%rsi),%xmm1
+ leaq 16(%r8,%rbx,1),%r8
+ vmovdqu %xmm10,128(%rsp)
+ vaesdec %xmm0,%xmm2,%xmm2
+ cmpl 32+4(%rsp),%ecx
+ movq 64+8(%rsp),%rbx
+ vaesdec %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r9)
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ leaq (%r9,%rbx,1),%rbx
+ cmovgeq %rsp,%r9
+ vaesdec %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm0,%xmm7,%xmm7
+ subq %r9,%rbx
+ vaesdec %xmm0,%xmm8,%xmm8
+ vmovdqu 16(%r9),%xmm11
+ movq %rbx,64+8(%rsp)
+ vaesdec %xmm0,%xmm9,%xmm9
+ vmovups -56(%rsi),%xmm0
+ leaq 16(%r9,%rbx,1),%r9
+ vmovdqu %xmm11,144(%rsp)
+ vaesdec %xmm1,%xmm2,%xmm2
+ cmpl 32+8(%rsp),%ecx
+ movq 64+16(%rsp),%rbx
+ vaesdec %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r10)
+ vaesdec %xmm1,%xmm4,%xmm4
+ prefetcht0 15(%r8)
+ vaesdec %xmm1,%xmm5,%xmm5
+ leaq (%r10,%rbx,1),%rbx
+ cmovgeq %rsp,%r10
+ vaesdec %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm1,%xmm7,%xmm7
+ subq %r10,%rbx
+ vaesdec %xmm1,%xmm8,%xmm8
+ vmovdqu 16(%r10),%xmm12
+ movq %rbx,64+16(%rsp)
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups -40(%rsi),%xmm1
+ leaq 16(%r10,%rbx,1),%r10
+ vmovdqu %xmm12,160(%rsp)
+ vaesdec %xmm0,%xmm2,%xmm2
+ cmpl 32+12(%rsp),%ecx
+ movq 64+24(%rsp),%rbx
+ vaesdec %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r11)
+ vaesdec %xmm0,%xmm4,%xmm4
+ prefetcht0 15(%r9)
+ vaesdec %xmm0,%xmm5,%xmm5
+ leaq (%r11,%rbx,1),%rbx
+ cmovgeq %rsp,%r11
+ vaesdec %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm0,%xmm7,%xmm7
+ subq %r11,%rbx
+ vaesdec %xmm0,%xmm8,%xmm8
+ vmovdqu 16(%r11),%xmm13
+ movq %rbx,64+24(%rsp)
+ vaesdec %xmm0,%xmm9,%xmm9
+ vmovups -24(%rsi),%xmm0
+ leaq 16(%r11,%rbx,1),%r11
+ vmovdqu %xmm13,176(%rsp)
+ vaesdec %xmm1,%xmm2,%xmm2
+ cmpl 32+16(%rsp),%ecx
+ movq 64+32(%rsp),%rbx
+ vaesdec %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r12)
+ vaesdec %xmm1,%xmm4,%xmm4
+ prefetcht0 15(%r10)
+ vaesdec %xmm1,%xmm5,%xmm5
+ leaq (%r12,%rbx,1),%rbx
+ cmovgeq %rsp,%r12
+ vaesdec %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm1,%xmm7,%xmm7
+ subq %r12,%rbx
+ vaesdec %xmm1,%xmm8,%xmm8
+ vmovdqu 16(%r12),%xmm10
+ movq %rbx,64+32(%rsp)
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups -8(%rsi),%xmm1
+ leaq 16(%r12,%rbx,1),%r12
+ vaesdec %xmm0,%xmm2,%xmm2
+ cmpl 32+20(%rsp),%ecx
+ movq 64+40(%rsp),%rbx
+ vaesdec %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r13)
+ vaesdec %xmm0,%xmm4,%xmm4
+ prefetcht0 15(%r11)
+ vaesdec %xmm0,%xmm5,%xmm5
+ leaq (%rbx,%r13,1),%rbx
+ cmovgeq %rsp,%r13
+ vaesdec %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm0,%xmm7,%xmm7
+ subq %r13,%rbx
+ vaesdec %xmm0,%xmm8,%xmm8
+ vmovdqu 16(%r13),%xmm11
+ movq %rbx,64+40(%rsp)
+ vaesdec %xmm0,%xmm9,%xmm9
+ vmovups 8(%rsi),%xmm0
+ leaq 16(%r13,%rbx,1),%r13
+ vaesdec %xmm1,%xmm2,%xmm2
+ cmpl 32+24(%rsp),%ecx
+ movq 64+48(%rsp),%rbx
+ vaesdec %xmm1,%xmm3,%xmm3
+ prefetcht0 31(%r14)
+ vaesdec %xmm1,%xmm4,%xmm4
+ prefetcht0 15(%r12)
+ vaesdec %xmm1,%xmm5,%xmm5
+ leaq (%r14,%rbx,1),%rbx
+ cmovgeq %rsp,%r14
+ vaesdec %xmm1,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm1,%xmm7,%xmm7
+ subq %r14,%rbx
+ vaesdec %xmm1,%xmm8,%xmm8
+ vmovdqu 16(%r14),%xmm12
+ movq %rbx,64+48(%rsp)
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups 24(%rsi),%xmm1
+ leaq 16(%r14,%rbx,1),%r14
+ vaesdec %xmm0,%xmm2,%xmm2
+ cmpl 32+28(%rsp),%ecx
+ movq 64+56(%rsp),%rbx
+ vaesdec %xmm0,%xmm3,%xmm3
+ prefetcht0 31(%r15)
+ vaesdec %xmm0,%xmm4,%xmm4
+ prefetcht0 15(%r13)
+ vaesdec %xmm0,%xmm5,%xmm5
+ leaq (%r15,%rbx,1),%rbx
+ cmovgeq %rsp,%r15
+ vaesdec %xmm0,%xmm6,%xmm6
+ cmovgq %rsp,%rbx
+ vaesdec %xmm0,%xmm7,%xmm7
+ subq %r15,%rbx
+ vaesdec %xmm0,%xmm8,%xmm8
+ vmovdqu 16(%r15),%xmm13
+ movq %rbx,64+56(%rsp)
+ vaesdec %xmm0,%xmm9,%xmm9
+ vmovups 40(%rsi),%xmm0
+ leaq 16(%r15,%rbx,1),%r15
+ vmovdqu 32(%rsp),%xmm14
+ prefetcht0 15(%r14)
+ prefetcht0 15(%r15)
+ cmpl $11,%eax
+ jb .Ldec8x_tail
+
+ vaesdec %xmm1,%xmm2,%xmm2
+ vaesdec %xmm1,%xmm3,%xmm3
+ vaesdec %xmm1,%xmm4,%xmm4
+ vaesdec %xmm1,%xmm5,%xmm5
+ vaesdec %xmm1,%xmm6,%xmm6
+ vaesdec %xmm1,%xmm7,%xmm7
+ vaesdec %xmm1,%xmm8,%xmm8
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups 176-120(%rsi),%xmm1
+
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vaesdec %xmm0,%xmm8,%xmm8
+ vaesdec %xmm0,%xmm9,%xmm9
+ vmovups 192-120(%rsi),%xmm0
+ je .Ldec8x_tail
+
+ vaesdec %xmm1,%xmm2,%xmm2
+ vaesdec %xmm1,%xmm3,%xmm3
+ vaesdec %xmm1,%xmm4,%xmm4
+ vaesdec %xmm1,%xmm5,%xmm5
+ vaesdec %xmm1,%xmm6,%xmm6
+ vaesdec %xmm1,%xmm7,%xmm7
+ vaesdec %xmm1,%xmm8,%xmm8
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups 208-120(%rsi),%xmm1
+
+ vaesdec %xmm0,%xmm2,%xmm2
+ vaesdec %xmm0,%xmm3,%xmm3
+ vaesdec %xmm0,%xmm4,%xmm4
+ vaesdec %xmm0,%xmm5,%xmm5
+ vaesdec %xmm0,%xmm6,%xmm6
+ vaesdec %xmm0,%xmm7,%xmm7
+ vaesdec %xmm0,%xmm8,%xmm8
+ vaesdec %xmm0,%xmm9,%xmm9
+ vmovups 224-120(%rsi),%xmm0
+
+.Ldec8x_tail:
+ vaesdec %xmm1,%xmm2,%xmm2
+ vpxor %xmm15,%xmm15,%xmm15
+ vaesdec %xmm1,%xmm3,%xmm3
+ vaesdec %xmm1,%xmm4,%xmm4
+ vpcmpgtd %xmm15,%xmm14,%xmm15
+ vaesdec %xmm1,%xmm5,%xmm5
+ vaesdec %xmm1,%xmm6,%xmm6
+ vpaddd %xmm14,%xmm15,%xmm15
+ vmovdqu 48(%rsp),%xmm14
+ vaesdec %xmm1,%xmm7,%xmm7
+ movq 64(%rsp),%rbx
+ vaesdec %xmm1,%xmm8,%xmm8
+ vaesdec %xmm1,%xmm9,%xmm9
+ vmovups 16-120(%rsi),%xmm1
+
+ vaesdeclast %xmm0,%xmm2,%xmm2
+ vmovdqa %xmm15,32(%rsp)
+ vpxor %xmm15,%xmm15,%xmm15
+ vaesdeclast %xmm0,%xmm3,%xmm3
+ vpxor 0(%rbp),%xmm2,%xmm2
+ vaesdeclast %xmm0,%xmm4,%xmm4
+ vpxor 16(%rbp),%xmm3,%xmm3
+ vpcmpgtd %xmm15,%xmm14,%xmm15
+ vaesdeclast %xmm0,%xmm5,%xmm5
+ vpxor 32(%rbp),%xmm4,%xmm4
+ vaesdeclast %xmm0,%xmm6,%xmm6
+ vpxor 48(%rbp),%xmm5,%xmm5
+ vpaddd %xmm15,%xmm14,%xmm14
+ vmovdqu -120(%rsi),%xmm15
+ vaesdeclast %xmm0,%xmm7,%xmm7
+ vpxor 64(%rbp),%xmm6,%xmm6
+ vaesdeclast %xmm0,%xmm8,%xmm8
+ vpxor 80(%rbp),%xmm7,%xmm7
+ vmovdqa %xmm14,48(%rsp)
+ vaesdeclast %xmm0,%xmm9,%xmm9
+ vpxor 96(%rbp),%xmm8,%xmm8
+ vmovups 32-120(%rsi),%xmm0
+
+ vmovups %xmm2,-16(%r8)
+ subq %rbx,%r8
+ vmovdqu 128+0(%rsp),%xmm2
+ vpxor 112(%rbp),%xmm9,%xmm9
+ vmovups %xmm3,-16(%r9)
+ subq 72(%rsp),%r9
+ vmovdqu %xmm2,0(%rbp)
+ vpxor %xmm15,%xmm2,%xmm2
+ vmovdqu 128+16(%rsp),%xmm3
+ vmovups %xmm4,-16(%r10)
+ subq 80(%rsp),%r10
+ vmovdqu %xmm3,16(%rbp)
+ vpxor %xmm15,%xmm3,%xmm3
+ vmovdqu 128+32(%rsp),%xmm4
+ vmovups %xmm5,-16(%r11)
+ subq 88(%rsp),%r11
+ vmovdqu %xmm4,32(%rbp)
+ vpxor %xmm15,%xmm4,%xmm4
+ vmovdqu 128+48(%rsp),%xmm5
+ vmovups %xmm6,-16(%r12)
+ subq 96(%rsp),%r12
+ vmovdqu %xmm5,48(%rbp)
+ vpxor %xmm15,%xmm5,%xmm5
+ vmovdqu %xmm10,64(%rbp)
+ vpxor %xmm10,%xmm15,%xmm6
+ vmovups %xmm7,-16(%r13)
+ subq 104(%rsp),%r13
+ vmovdqu %xmm11,80(%rbp)
+ vpxor %xmm11,%xmm15,%xmm7
+ vmovups %xmm8,-16(%r14)
+ subq 112(%rsp),%r14
+ vmovdqu %xmm12,96(%rbp)
+ vpxor %xmm12,%xmm15,%xmm8
+ vmovups %xmm9,-16(%r15)
+ subq 120(%rsp),%r15
+ vmovdqu %xmm13,112(%rbp)
+ vpxor %xmm13,%xmm15,%xmm9
+
+ xorq $128,%rbp
+ decl %edx
+ jnz .Loop_dec8x
+
+ movq 16(%rsp),%rax
+
+
+
+
+
+.Ldec8x_done:
+ vzeroupper
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Ldec8x_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
Property changes on: trunk/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,4358 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S 325335 2017-11-02 18:22:53Z jkim $ */
+/* Do not modify. This file is auto-generated from aesni-sha256-x86_64.pl. */
+.text
+
+
+.globl aesni_cbc_sha256_enc
+.type aesni_cbc_sha256_enc, at function
+.align 16
+aesni_cbc_sha256_enc:
+ leaq OPENSSL_ia32cap_P(%rip),%r11
+ movl $1,%eax
+ cmpq $0,%rdi
+ je .Lprobe
+ movl 0(%r11),%eax
+ movq 4(%r11),%r10
+ btq $61,%r10
+ jc aesni_cbc_sha256_enc_shaext
+ movq %r10,%r11
+ shrq $32,%r11
+
+ testl $2048,%r10d
+ jnz aesni_cbc_sha256_enc_xop
+ andl $296,%r11d
+ cmpl $296,%r11d
+ je aesni_cbc_sha256_enc_avx2
+ andl $268435456,%r10d
+ jnz aesni_cbc_sha256_enc_avx
+ ud2
+ xorl %eax,%eax
+ cmpq $0,%rdi
+ je .Lprobe
+ ud2
+.Lprobe:
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc
+
+.align 64
+.type K256, at object
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1
+.long 0,0,0,0, 0,0,0,0
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+.type aesni_cbc_sha256_enc_xop, at function
+.align 64
+aesni_cbc_sha256_enc_xop:
+.Lxop_shortcut:
+ movq 8(%rsp),%r10
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ subq $128,%rsp
+ andq $-64,%rsp
+
+ shlq $6,%rdx
+ subq %rdi,%rsi
+ subq %rdi,%r10
+ addq %rdi,%rdx
+
+
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+
+ movq %r8,64+32(%rsp)
+ movq %r9,64+40(%rsp)
+ movq %r10,64+48(%rsp)
+ movq %r11,64+56(%rsp)
+.Lprologue_xop:
+ vzeroall
+
+ movq %rdi,%r12
+ leaq 128(%rcx),%rdi
+ leaq K256+544(%rip),%r13
+ movl 240-128(%rdi),%r14d
+ movq %r9,%r15
+ movq %r10,%rsi
+ vmovdqu (%r8),%xmm8
+ subq $9,%r14
+
+ movl 0(%r15),%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ movl 12(%r15),%edx
+ movl 16(%r15),%r8d
+ movl 20(%r15),%r9d
+ movl 24(%r15),%r10d
+ movl 28(%r15),%r11d
+
+ vmovdqa 0(%r13,%r14,8),%xmm14
+ vmovdqa 16(%r13,%r14,8),%xmm13
+ vmovdqa 32(%r13,%r14,8),%xmm12
+ vmovdqu 0-128(%rdi),%xmm10
+ jmp .Lloop_xop
+.align 16
+.Lloop_xop:
+ vmovdqa K256+512(%rip),%xmm7
+ vmovdqu 0(%rsi,%r12,1),%xmm0
+ vmovdqu 16(%rsi,%r12,1),%xmm1
+ vmovdqu 32(%rsi,%r12,1),%xmm2
+ vmovdqu 48(%rsi,%r12,1),%xmm3
+ vpshufb %xmm7,%xmm0,%xmm0
+ leaq K256(%rip),%rbp
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd 0(%rbp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 32(%rbp),%xmm1,%xmm5
+ vpaddd 64(%rbp),%xmm2,%xmm6
+ vpaddd 96(%rbp),%xmm3,%xmm7
+ vmovdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ vmovdqa %xmm5,16(%rsp)
+ movl %ebx,%esi
+ vmovdqa %xmm6,32(%rsp)
+ xorl %ecx,%esi
+ vmovdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp .Lxop_00_47
+
+.align 16
+.Lxop_00_47:
+ subq $-32*4,%rbp
+ vmovdqu (%r12),%xmm9
+ movq %r12,64+0(%rsp)
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ rorl $14,%r13d
+ movl %r14d,%eax
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ movl %r9d,%r12d
+ xorl %r8d,%r13d
+.byte 143,232,120,194,236,14
+ rorl $9,%r14d
+ xorl %r10d,%r12d
+ vpsrld $3,%xmm4,%xmm4
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ vpaddd %xmm7,%xmm0,%xmm0
+ andl %r8d,%r12d
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+.byte 143,232,120,194,245,11
+ rorl $11,%r14d
+ xorl %r10d,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+.byte 143,232,120,194,251,13
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ebx,%esi
+ addl %r11d,%edx
+ vpsrld $10,%xmm3,%xmm6
+ rorl $2,%r14d
+ addl %esi,%r11d
+ vpaddd %xmm4,%xmm0,%xmm0
+ movl %edx,%r13d
+ addl %r11d,%r14d
+.byte 143,232,120,194,239,2
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%r12d
+ vpxor %xmm5,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %edx,%r13d
+ vpsrldq $8,%xmm7,%xmm7
+ addl 4(%rsp),%r10d
+ movl %r11d,%esi
+ rorl $11,%r14d
+ xorl %r9d,%r12d
+ vpaddd %xmm7,%xmm0,%xmm0
+ xorl %eax,%esi
+ rorl $6,%r13d
+ addl %r12d,%r10d
+ andl %esi,%r15d
+.byte 143,232,120,194,248,13
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ vpsrld $10,%xmm0,%xmm6
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+.byte 143,232,120,194,239,2
+ rorl $2,%r14d
+ addl %r15d,%r10d
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ vpxor %xmm5,%xmm7,%xmm7
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r12d
+ vpslldq $8,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ vpaddd %xmm7,%xmm0,%xmm0
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r12d
+ vpaddd 0(%rbp),%xmm0,%xmm6
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r9d
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ rorl $2,%r14d
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%esi
+ rorl $11,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ rorl $6,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ rorl $2,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ vmovdqa %xmm6,0(%rsp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ movl %ebx,%r12d
+ xorl %eax,%r13d
+.byte 143,232,120,194,236,14
+ rorl $9,%r14d
+ xorl %ecx,%r12d
+ vpsrld $3,%xmm4,%xmm4
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ vpaddd %xmm7,%xmm1,%xmm1
+ andl %eax,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+.byte 143,232,120,194,245,11
+ rorl $11,%r14d
+ xorl %ecx,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+.byte 143,232,120,194,248,13
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %r9d,%esi
+ addl %edx,%r11d
+ vpsrld $10,%xmm0,%xmm6
+ rorl $2,%r14d
+ addl %esi,%edx
+ vpaddd %xmm4,%xmm1,%xmm1
+ movl %r11d,%r13d
+ addl %edx,%r14d
+.byte 143,232,120,194,239,2
+ rorl $14,%r13d
+ movl %r14d,%edx
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%r12d
+ vpxor %xmm5,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ vpsrldq $8,%xmm7,%xmm7
+ addl 20(%rsp),%ecx
+ movl %edx,%esi
+ rorl $11,%r14d
+ xorl %ebx,%r12d
+ vpaddd %xmm7,%xmm1,%xmm1
+ xorl %r8d,%esi
+ rorl $6,%r13d
+ addl %r12d,%ecx
+ andl %esi,%r15d
+.byte 143,232,120,194,249,13
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ vpsrld $10,%xmm1,%xmm6
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+.byte 143,232,120,194,239,2
+ rorl $2,%r14d
+ addl %r15d,%ecx
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ vpxor %xmm5,%xmm7,%xmm7
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r12d
+ vpslldq $8,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ vpaddd %xmm7,%xmm1,%xmm1
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r12d
+ vpaddd 32(%rbp),%xmm1,%xmm6
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%ebx
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ rorl $2,%r14d
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%esi
+ rorl $11,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ rorl $6,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ rorl $2,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ vmovdqa %xmm6,16(%rsp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ rorl $14,%r13d
+ movl %r14d,%eax
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ movl %r9d,%r12d
+ xorl %r8d,%r13d
+.byte 143,232,120,194,236,14
+ rorl $9,%r14d
+ xorl %r10d,%r12d
+ vpsrld $3,%xmm4,%xmm4
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ vpaddd %xmm7,%xmm2,%xmm2
+ andl %r8d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+.byte 143,232,120,194,245,11
+ rorl $11,%r14d
+ xorl %r10d,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+.byte 143,232,120,194,249,13
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %ebx,%esi
+ addl %r11d,%edx
+ vpsrld $10,%xmm1,%xmm6
+ rorl $2,%r14d
+ addl %esi,%r11d
+ vpaddd %xmm4,%xmm2,%xmm2
+ movl %edx,%r13d
+ addl %r11d,%r14d
+.byte 143,232,120,194,239,2
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%r12d
+ vpxor %xmm5,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %edx,%r13d
+ vpsrldq $8,%xmm7,%xmm7
+ addl 36(%rsp),%r10d
+ movl %r11d,%esi
+ rorl $11,%r14d
+ xorl %r9d,%r12d
+ vpaddd %xmm7,%xmm2,%xmm2
+ xorl %eax,%esi
+ rorl $6,%r13d
+ addl %r12d,%r10d
+ andl %esi,%r15d
+.byte 143,232,120,194,250,13
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ vpsrld $10,%xmm2,%xmm6
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+.byte 143,232,120,194,239,2
+ rorl $2,%r14d
+ addl %r15d,%r10d
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ vpxor %xmm5,%xmm7,%xmm7
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r12d
+ vpslldq $8,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ vpaddd %xmm7,%xmm2,%xmm2
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r12d
+ vpaddd 64(%rbp),%xmm2,%xmm6
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r9d
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ rorl $2,%r14d
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%esi
+ rorl $11,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ rorl $6,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ rorl $2,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ vmovdqa %xmm6,32(%rsp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ movl %ebx,%r12d
+ xorl %eax,%r13d
+.byte 143,232,120,194,236,14
+ rorl $9,%r14d
+ xorl %ecx,%r12d
+ vpsrld $3,%xmm4,%xmm4
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ vpaddd %xmm7,%xmm3,%xmm3
+ andl %eax,%r12d
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+.byte 143,232,120,194,245,11
+ rorl $11,%r14d
+ xorl %ecx,%r12d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+.byte 143,232,120,194,250,13
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %r9d,%esi
+ addl %edx,%r11d
+ vpsrld $10,%xmm2,%xmm6
+ rorl $2,%r14d
+ addl %esi,%edx
+ vpaddd %xmm4,%xmm3,%xmm3
+ movl %r11d,%r13d
+ addl %edx,%r14d
+.byte 143,232,120,194,239,2
+ rorl $14,%r13d
+ movl %r14d,%edx
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%r12d
+ vpxor %xmm5,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ vpsrldq $8,%xmm7,%xmm7
+ addl 52(%rsp),%ecx
+ movl %edx,%esi
+ rorl $11,%r14d
+ xorl %ebx,%r12d
+ vpaddd %xmm7,%xmm3,%xmm3
+ xorl %r8d,%esi
+ rorl $6,%r13d
+ addl %r12d,%ecx
+ andl %esi,%r15d
+.byte 143,232,120,194,251,13
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ vpsrld $10,%xmm3,%xmm6
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+.byte 143,232,120,194,239,2
+ rorl $2,%r14d
+ addl %r15d,%ecx
+ vpxor %xmm6,%xmm7,%xmm7
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ vpxor %xmm5,%xmm7,%xmm7
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r12d
+ vpslldq $8,%xmm7,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ vpaddd %xmm7,%xmm3,%xmm3
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r12d
+ vpaddd 96(%rbp),%xmm3,%xmm6
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%ebx
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ rorl $2,%r14d
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%esi
+ rorl $11,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ rorl $6,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ rorl $2,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ vmovdqa %xmm6,48(%rsp)
+ movq 64+0(%rsp),%r12
+ vpand %xmm14,%xmm11,%xmm11
+ movq 64+8(%rsp),%r15
+ vpor %xmm11,%xmm8,%xmm8
+ vmovdqu %xmm8,(%r15,%r12,1)
+ leaq 16(%r12),%r12
+ cmpb $0,131(%rbp)
+ jne .Lxop_00_47
+ vmovdqu (%r12),%xmm9
+ movq %r12,64+0(%rsp)
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ rorl $11,%r14d
+ xorl %r10d,%r12d
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%esi
+ addl %r11d,%edx
+ rorl $2,%r14d
+ addl %esi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%esi
+ rorl $11,%r14d
+ xorl %r9d,%r12d
+ xorl %eax,%esi
+ rorl $6,%r13d
+ addl %r12d,%r10d
+ andl %esi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+ rorl $2,%r14d
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r12d
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r9d
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ rorl $2,%r14d
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%esi
+ rorl $11,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ rorl $6,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ rorl $2,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ rorl $11,%r14d
+ xorl %ecx,%r12d
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%esi
+ addl %edx,%r11d
+ rorl $2,%r14d
+ addl %esi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%esi
+ rorl $11,%r14d
+ xorl %ebx,%r12d
+ xorl %r8d,%esi
+ rorl $6,%r13d
+ addl %r12d,%ecx
+ andl %esi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+ rorl $2,%r14d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r12d
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%ebx
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ rorl $2,%r14d
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%esi
+ rorl $11,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ rorl $6,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ rorl $2,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ rorl $11,%r14d
+ xorl %r10d,%r12d
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%esi
+ addl %r11d,%edx
+ rorl $2,%r14d
+ addl %esi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%esi
+ rorl $11,%r14d
+ xorl %r9d,%r12d
+ xorl %eax,%esi
+ rorl $6,%r13d
+ addl %r12d,%r10d
+ andl %esi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+ rorl $2,%r14d
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ rorl $11,%r14d
+ xorl %r8d,%r12d
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%r9d
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ rorl $2,%r14d
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%esi
+ rorl $11,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ rorl $6,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ rorl $2,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ rorl $11,%r14d
+ xorl %ecx,%r12d
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%esi
+ addl %edx,%r11d
+ rorl $2,%r14d
+ addl %esi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%esi
+ rorl $11,%r14d
+ xorl %ebx,%r12d
+ xorl %r8d,%esi
+ rorl $6,%r13d
+ addl %r12d,%ecx
+ andl %esi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+ rorl $2,%r14d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ rorl $11,%r14d
+ xorl %eax,%r12d
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ addl %r12d,%ebx
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ rorl $2,%r14d
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%esi
+ rorl $11,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ rorl $6,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ rorl $2,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%r12
+ movq 64+8(%rsp),%r13
+ movq 64+40(%rsp),%r15
+ movq 64+48(%rsp),%rsi
+
+ vpand %xmm14,%xmm11,%xmm11
+ movl %r14d,%eax
+ vpor %xmm11,%xmm8,%xmm8
+ vmovdqu %xmm8,(%r12,%r13,1)
+ leaq 16(%r12),%r12
+
+ addl 0(%r15),%eax
+ addl 4(%r15),%ebx
+ addl 8(%r15),%ecx
+ addl 12(%r15),%edx
+ addl 16(%r15),%r8d
+ addl 20(%r15),%r9d
+ addl 24(%r15),%r10d
+ addl 28(%r15),%r11d
+
+ cmpq 64+16(%rsp),%r12
+
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ movl %r8d,16(%r15)
+ movl %r9d,20(%r15)
+ movl %r10d,24(%r15)
+ movl %r11d,28(%r15)
+
+ jb .Lloop_xop
+
+ movq 64+32(%rsp),%r8
+ movq 64+56(%rsp),%rsi
+ vmovdqu %xmm8,(%r8)
+ vzeroall
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_xop:
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
+.type aesni_cbc_sha256_enc_avx, at function
+.align 64
+aesni_cbc_sha256_enc_avx:
+.Lavx_shortcut:
+ movq 8(%rsp),%r10
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ subq $128,%rsp
+ andq $-64,%rsp
+
+ shlq $6,%rdx
+ subq %rdi,%rsi
+ subq %rdi,%r10
+ addq %rdi,%rdx
+
+
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+
+ movq %r8,64+32(%rsp)
+ movq %r9,64+40(%rsp)
+ movq %r10,64+48(%rsp)
+ movq %r11,64+56(%rsp)
+.Lprologue_avx:
+ vzeroall
+
+ movq %rdi,%r12
+ leaq 128(%rcx),%rdi
+ leaq K256+544(%rip),%r13
+ movl 240-128(%rdi),%r14d
+ movq %r9,%r15
+ movq %r10,%rsi
+ vmovdqu (%r8),%xmm8
+ subq $9,%r14
+
+ movl 0(%r15),%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ movl 12(%r15),%edx
+ movl 16(%r15),%r8d
+ movl 20(%r15),%r9d
+ movl 24(%r15),%r10d
+ movl 28(%r15),%r11d
+
+ vmovdqa 0(%r13,%r14,8),%xmm14
+ vmovdqa 16(%r13,%r14,8),%xmm13
+ vmovdqa 32(%r13,%r14,8),%xmm12
+ vmovdqu 0-128(%rdi),%xmm10
+ jmp .Lloop_avx
+.align 16
+.Lloop_avx:
+ vmovdqa K256+512(%rip),%xmm7
+ vmovdqu 0(%rsi,%r12,1),%xmm0
+ vmovdqu 16(%rsi,%r12,1),%xmm1
+ vmovdqu 32(%rsi,%r12,1),%xmm2
+ vmovdqu 48(%rsi,%r12,1),%xmm3
+ vpshufb %xmm7,%xmm0,%xmm0
+ leaq K256(%rip),%rbp
+ vpshufb %xmm7,%xmm1,%xmm1
+ vpshufb %xmm7,%xmm2,%xmm2
+ vpaddd 0(%rbp),%xmm0,%xmm4
+ vpshufb %xmm7,%xmm3,%xmm3
+ vpaddd 32(%rbp),%xmm1,%xmm5
+ vpaddd 64(%rbp),%xmm2,%xmm6
+ vpaddd 96(%rbp),%xmm3,%xmm7
+ vmovdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ vmovdqa %xmm5,16(%rsp)
+ movl %ebx,%esi
+ vmovdqa %xmm6,32(%rsp)
+ xorl %ecx,%esi
+ vmovdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp .Lavx_00_47
+
+.align 16
+.Lavx_00_47:
+ subq $-32*4,%rbp
+ vmovdqu (%r12),%xmm9
+ movq %r12,64+0(%rsp)
+ vpalignr $4,%xmm0,%xmm1,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ vpalignr $4,%xmm2,%xmm3,%xmm7
+ xorl %r8d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vpaddd %xmm7,%xmm0,%xmm0
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%r12d
+ xorl %ebx,%r15d
+ vpslld $14,%xmm4,%xmm5
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%esi
+ vpshufd $250,%xmm3,%xmm7
+ addl %r11d,%edx
+ shrdl $2,%r14d,%r14d
+ addl %esi,%r11d
+ vpsrld $11,%xmm6,%xmm6
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ vpslld $11,%xmm5,%xmm5
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %edx,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 4(%rsp),%r10d
+ movl %r11d,%esi
+ shrdl $11,%r14d,%r14d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %r9d,%r12d
+ xorl %eax,%esi
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ addl %r12d,%r10d
+ andl %esi,%r15d
+ xorl %r11d,%r14d
+ vpaddd %xmm4,%xmm0,%xmm0
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ shrdl $9,%r14d,%r14d
+ vpshufd $132,%xmm6,%xmm6
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ vpsrldq $8,%xmm6,%xmm6
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ vpaddd %xmm6,%xmm0,%xmm0
+ movl %r10d,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%r12d
+ vpshufd $80,%xmm0,%xmm7
+ xorl %r11d,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r9d
+ vpsrld $10,%xmm7,%xmm6
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ vpsrlq $17,%xmm7,%xmm7
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ vpsrlq $2,%xmm7,%xmm7
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %ebx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %edx,%r12d
+ vpshufd $232,%xmm6,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vpslldq $8,%xmm6,%xmm6
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%esi
+ vpaddd %xmm6,%xmm0,%xmm0
+ shrdl $11,%r14d,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ vpaddd 0(%rbp),%xmm0,%xmm6
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ vmovdqa %xmm6,0(%rsp)
+ vpalignr $4,%xmm1,%xmm2,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ vpalignr $4,%xmm3,%xmm0,%xmm7
+ xorl %eax,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vpaddd %xmm7,%xmm1,%xmm1
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%r12d
+ xorl %r9d,%r15d
+ vpslld $14,%xmm4,%xmm5
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%esi
+ vpshufd $250,%xmm0,%xmm7
+ addl %edx,%r11d
+ shrdl $2,%r14d,%r14d
+ addl %esi,%edx
+ vpsrld $11,%xmm6,%xmm6
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%edx
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ vpslld $11,%xmm5,%xmm5
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 20(%rsp),%ecx
+ movl %edx,%esi
+ shrdl $11,%r14d,%r14d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ebx,%r12d
+ xorl %r8d,%esi
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ addl %r12d,%ecx
+ andl %esi,%r15d
+ xorl %edx,%r14d
+ vpaddd %xmm4,%xmm1,%xmm1
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ shrdl $9,%r14d,%r14d
+ vpshufd $132,%xmm6,%xmm6
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ vpsrldq $8,%xmm6,%xmm6
+ andl %r10d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ vpaddd %xmm6,%xmm1,%xmm1
+ movl %ecx,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%r12d
+ vpshufd $80,%xmm1,%xmm7
+ xorl %edx,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%ebx
+ vpsrld $10,%xmm7,%xmm6
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ vpsrlq $17,%xmm7,%xmm7
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ vpsrlq $2,%xmm7,%xmm7
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %r9d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r11d,%r12d
+ vpshufd $232,%xmm6,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpslldq $8,%xmm6,%xmm6
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%esi
+ vpaddd %xmm6,%xmm1,%xmm1
+ shrdl $11,%r14d,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ vpaddd 32(%rbp),%xmm1,%xmm6
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ vmovdqa %xmm6,16(%rsp)
+ vpalignr $4,%xmm2,%xmm3,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ vpalignr $4,%xmm0,%xmm1,%xmm7
+ xorl %r8d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vpaddd %xmm7,%xmm2,%xmm2
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%r12d
+ xorl %ebx,%r15d
+ vpslld $14,%xmm4,%xmm5
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%esi
+ vpshufd $250,%xmm1,%xmm7
+ addl %r11d,%edx
+ shrdl $2,%r14d,%r14d
+ addl %esi,%r11d
+ vpsrld $11,%xmm6,%xmm6
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ vpslld $11,%xmm5,%xmm5
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %edx,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 36(%rsp),%r10d
+ movl %r11d,%esi
+ shrdl $11,%r14d,%r14d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %r9d,%r12d
+ xorl %eax,%esi
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ addl %r12d,%r10d
+ andl %esi,%r15d
+ xorl %r11d,%r14d
+ vpaddd %xmm4,%xmm2,%xmm2
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ shrdl $9,%r14d,%r14d
+ vpshufd $132,%xmm6,%xmm6
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ vpsrldq $8,%xmm6,%xmm6
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ vpaddd %xmm6,%xmm2,%xmm2
+ movl %r10d,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%r12d
+ vpshufd $80,%xmm2,%xmm7
+ xorl %r11d,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r9d
+ vpsrld $10,%xmm7,%xmm6
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ vpsrlq $17,%xmm7,%xmm7
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ vpsrlq $2,%xmm7,%xmm7
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %ebx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %edx,%r12d
+ vpshufd $232,%xmm6,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vpslldq $8,%xmm6,%xmm6
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%esi
+ vpaddd %xmm6,%xmm2,%xmm2
+ shrdl $11,%r14d,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ vpaddd 64(%rbp),%xmm2,%xmm6
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ vmovdqa %xmm6,32(%rsp)
+ vpalignr $4,%xmm3,%xmm0,%xmm4
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ vpalignr $4,%xmm1,%xmm2,%xmm7
+ xorl %eax,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r12d
+ vpsrld $7,%xmm4,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vpaddd %xmm7,%xmm3,%xmm3
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ vpsrld $3,%xmm4,%xmm7
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%r12d
+ xorl %r9d,%r15d
+ vpslld $14,%xmm4,%xmm5
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+ vpxor %xmm6,%xmm7,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%esi
+ vpshufd $250,%xmm2,%xmm7
+ addl %edx,%r11d
+ shrdl $2,%r14d,%r14d
+ addl %esi,%edx
+ vpsrld $11,%xmm6,%xmm6
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ vpxor %xmm5,%xmm4,%xmm4
+ movl %r14d,%edx
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ vpslld $11,%xmm5,%xmm5
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ vpxor %xmm6,%xmm4,%xmm4
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ vpsrld $10,%xmm7,%xmm6
+ addl 52(%rsp),%ecx
+ movl %edx,%esi
+ shrdl $11,%r14d,%r14d
+ vpxor %xmm5,%xmm4,%xmm4
+ xorl %ebx,%r12d
+ xorl %r8d,%esi
+ shrdl $6,%r13d,%r13d
+ vpsrlq $17,%xmm7,%xmm7
+ addl %r12d,%ecx
+ andl %esi,%r15d
+ xorl %edx,%r14d
+ vpaddd %xmm4,%xmm3,%xmm3
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+ vpxor %xmm7,%xmm6,%xmm6
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ vpsrlq $2,%xmm7,%xmm7
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ vpxor %xmm7,%xmm6,%xmm6
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ shrdl $9,%r14d,%r14d
+ vpshufd $132,%xmm6,%xmm6
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ vpsrldq $8,%xmm6,%xmm6
+ andl %r10d,%r12d
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ vpaddd %xmm6,%xmm3,%xmm3
+ movl %ecx,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%r12d
+ vpshufd $80,%xmm3,%xmm7
+ xorl %edx,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%ebx
+ vpsrld $10,%xmm7,%xmm6
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ vpsrlq $17,%xmm7,%xmm7
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ shrdl $2,%r14d,%r14d
+ vpxor %xmm7,%xmm6,%xmm6
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ vpsrlq $2,%xmm7,%xmm7
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ vpxor %xmm7,%xmm6,%xmm6
+ xorl %r9d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r11d,%r12d
+ vpshufd $232,%xmm6,%xmm6
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpslldq $8,%xmm6,%xmm6
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%esi
+ vpaddd %xmm6,%xmm3,%xmm3
+ shrdl $11,%r14d,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ vpaddd 96(%rbp),%xmm3,%xmm6
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ vmovdqa %xmm6,48(%rsp)
+ movq 64+0(%rsp),%r12
+ vpand %xmm14,%xmm11,%xmm11
+ movq 64+8(%rsp),%r15
+ vpor %xmm11,%xmm8,%xmm8
+ vmovdqu %xmm8,(%r15,%r12,1)
+ leaq 16(%r12),%r12
+ cmpb $0,131(%rbp)
+ jne .Lavx_00_47
+ vmovdqu (%r12),%xmm9
+ movq %r12,64+0(%rsp)
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ xorl %r8d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%r12d
+ xorl %ebx,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%esi
+ addl %r11d,%edx
+ shrdl $2,%r14d,%r14d
+ addl %esi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %r9d,%r12d
+ xorl %eax,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r10d
+ andl %esi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%r12d
+ xorl %r11d,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r9d
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ shrdl $2,%r14d,%r14d
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ xorl %ebx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %edx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ xorl %eax,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%r12d
+ xorl %r9d,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%esi
+ addl %edx,%r11d
+ shrdl $2,%r14d,%r14d
+ addl %esi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %ebx,%r12d
+ xorl %r8d,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%ecx
+ andl %esi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%r12d
+ xorl %edx,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%ebx
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ shrdl $2,%r14d,%r14d
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ xorl %r9d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r11d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ xorl %r8d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r10d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %r10d,%r12d
+ xorl %ebx,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r11d
+ andl %r15d,%esi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%esi
+ addl %r11d,%edx
+ shrdl $2,%r14d,%r14d
+ addl %esi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ xorl %edx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r9d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %r9d,%r12d
+ xorl %eax,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r10d
+ andl %esi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ addl %r10d,%ecx
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ xorl %ecx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r8d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %r8d,%r12d
+ xorl %r11d,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r9d
+ andl %r15d,%esi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%esi
+ addl %r9d,%ebx
+ shrdl $2,%r14d,%r14d
+ addl %esi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ xorl %ebx,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %edx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %edx,%r12d
+ xorl %r10d,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%r8d
+ andl %esi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ addl %r8d,%eax
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ xorl %eax,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %ecx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %ecx,%r12d
+ xorl %r9d,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%edx
+ andl %r15d,%esi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%esi
+ addl %edx,%r11d
+ shrdl $2,%r14d,%r14d
+ addl %esi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ xorl %r11d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %ebx,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %ebx,%r12d
+ xorl %r8d,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%ecx
+ andl %esi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ addl %ecx,%r10d
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ xorl %r10d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %eax,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ shrdl $11,%r14d,%r14d
+ xorl %eax,%r12d
+ xorl %edx,%r15d
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%ebx
+ andl %r15d,%esi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%esi
+ addl %ebx,%r9d
+ shrdl $2,%r14d,%r14d
+ addl %esi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ shrdl $14,%r13d,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ xorl %r9d,%r13d
+ shrdl $9,%r14d,%r14d
+ xorl %r11d,%r12d
+ shrdl $5,%r13d,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%esi
+ shrdl $11,%r14d,%r14d
+ xorl %r11d,%r12d
+ xorl %ecx,%esi
+ shrdl $6,%r13d,%r13d
+ addl %r12d,%eax
+ andl %esi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ addl %eax,%r8d
+ shrdl $2,%r14d,%r14d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%r12
+ movq 64+8(%rsp),%r13
+ movq 64+40(%rsp),%r15
+ movq 64+48(%rsp),%rsi
+
+ vpand %xmm14,%xmm11,%xmm11
+ movl %r14d,%eax
+ vpor %xmm11,%xmm8,%xmm8
+ vmovdqu %xmm8,(%r12,%r13,1)
+ leaq 16(%r12),%r12
+
+ addl 0(%r15),%eax
+ addl 4(%r15),%ebx
+ addl 8(%r15),%ecx
+ addl 12(%r15),%edx
+ addl 16(%r15),%r8d
+ addl 20(%r15),%r9d
+ addl 24(%r15),%r10d
+ addl 28(%r15),%r11d
+
+ cmpq 64+16(%rsp),%r12
+
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ movl %r8d,16(%r15)
+ movl %r9d,20(%r15)
+ movl %r10d,24(%r15)
+ movl %r11d,28(%r15)
+ jb .Lloop_avx
+
+ movq 64+32(%rsp),%r8
+ movq 64+56(%rsp),%rsi
+ vmovdqu %xmm8,(%r8)
+ vzeroall
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_avx:
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
+.type aesni_cbc_sha256_enc_avx2, at function
+.align 64
+aesni_cbc_sha256_enc_avx2:
+.Lavx2_shortcut:
+ movq 8(%rsp),%r10
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ subq $576,%rsp
+ andq $-1024,%rsp
+ addq $448,%rsp
+
+ shlq $6,%rdx
+ subq %rdi,%rsi
+ subq %rdi,%r10
+ addq %rdi,%rdx
+
+
+
+ movq %rdx,64+16(%rsp)
+
+ movq %r8,64+32(%rsp)
+ movq %r9,64+40(%rsp)
+ movq %r10,64+48(%rsp)
+ movq %r11,64+56(%rsp)
+.Lprologue_avx2:
+ vzeroall
+
+ movq %rdi,%r13
+ vpinsrq $1,%rsi,%xmm15,%xmm15
+ leaq 128(%rcx),%rdi
+ leaq K256+544(%rip),%r12
+ movl 240-128(%rdi),%r14d
+ movq %r9,%r15
+ movq %r10,%rsi
+ vmovdqu (%r8),%xmm8
+ leaq -9(%r14),%r14
+
+ vmovdqa 0(%r12,%r14,8),%xmm14
+ vmovdqa 16(%r12,%r14,8),%xmm13
+ vmovdqa 32(%r12,%r14,8),%xmm12
+
+ subq $-64,%r13
+ movl 0(%r15),%eax
+ leaq (%rsi,%r13,1),%r12
+ movl 4(%r15),%ebx
+ cmpq %rdx,%r13
+ movl 8(%r15),%ecx
+ cmoveq %rsp,%r12
+ movl 12(%r15),%edx
+ movl 16(%r15),%r8d
+ movl 20(%r15),%r9d
+ movl 24(%r15),%r10d
+ movl 28(%r15),%r11d
+ vmovdqu 0-128(%rdi),%xmm10
+ jmp .Loop_avx2
+.align 16
+.Loop_avx2:
+ vmovdqa K256+512(%rip),%ymm7
+ vmovdqu -64+0(%rsi,%r13,1),%xmm0
+ vmovdqu -64+16(%rsi,%r13,1),%xmm1
+ vmovdqu -64+32(%rsi,%r13,1),%xmm2
+ vmovdqu -64+48(%rsi,%r13,1),%xmm3
+
+ vinserti128 $1,(%r12),%ymm0,%ymm0
+ vinserti128 $1,16(%r12),%ymm1,%ymm1
+ vpshufb %ymm7,%ymm0,%ymm0
+ vinserti128 $1,32(%r12),%ymm2,%ymm2
+ vpshufb %ymm7,%ymm1,%ymm1
+ vinserti128 $1,48(%r12),%ymm3,%ymm3
+
+ leaq K256(%rip),%rbp
+ vpshufb %ymm7,%ymm2,%ymm2
+ leaq -64(%r13),%r13
+ vpaddd 0(%rbp),%ymm0,%ymm4
+ vpshufb %ymm7,%ymm3,%ymm3
+ vpaddd 32(%rbp),%ymm1,%ymm5
+ vpaddd 64(%rbp),%ymm2,%ymm6
+ vpaddd 96(%rbp),%ymm3,%ymm7
+ vmovdqa %ymm4,0(%rsp)
+ xorl %r14d,%r14d
+ vmovdqa %ymm5,32(%rsp)
+ leaq -64(%rsp),%rsp
+ movl %ebx,%esi
+ vmovdqa %ymm6,0(%rsp)
+ xorl %ecx,%esi
+ vmovdqa %ymm7,32(%rsp)
+ movl %r9d,%r12d
+ subq $-32*4,%rbp
+ jmp .Lavx2_00_47
+
+.align 16
+.Lavx2_00_47:
+ vmovdqu (%r13),%xmm9
+ vpinsrq $0,%r13,%xmm15,%xmm15
+ leaq -64(%rsp),%rsp
+ vpalignr $4,%ymm0,%ymm1,%ymm4
+ addl 0+128(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ vpalignr $4,%ymm2,%ymm3,%ymm7
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ vpsrld $7,%ymm4,%ymm6
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ vpaddd %ymm7,%ymm0,%ymm0
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%esi
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ebx,%esi
+ vpshufd $250,%ymm3,%ymm7
+ xorl %r13d,%r14d
+ leal (%r11,%rsi,1),%r11d
+ movl %r8d,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 4+128(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%edx,%esi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ vpslld $11,%ymm5,%ymm5
+ andnl %r9d,%edx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%edx,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%esi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%esi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ vpsrlq $17,%ymm7,%ymm7
+ andl %esi,%r15d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ vpaddd %ymm4,%ymm0,%ymm0
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 8+128(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ vpshufd $132,%ymm6,%ymm6
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ vpsrldq $8,%ymm6,%ymm6
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ vpaddd %ymm6,%ymm0,%ymm0
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ vpshufd $80,%ymm0,%ymm7
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r11d,%esi
+ vpsrld $10,%ymm7,%ymm6
+ xorl %r13d,%r14d
+ leal (%r9,%rsi,1),%r9d
+ movl %ecx,%r12d
+ vpsrlq $17,%ymm7,%ymm7
+ addl 12+128(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ vpxor %ymm7,%ymm6,%ymm6
+ rorxl $11,%ebx,%esi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ vpsrlq $2,%ymm7,%ymm7
+ andnl %edx,%ebx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%ebx,%r14d
+ vpxor %ymm7,%ymm6,%ymm6
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%esi
+ vpshufd $232,%ymm6,%ymm6
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%esi
+ vpslldq $8,%ymm6,%ymm6
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ vpaddd %ymm6,%ymm0,%ymm0
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ vpaddd 0(%rbp),%ymm0,%ymm6
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ vmovdqa %ymm6,0(%rsp)
+ vpalignr $4,%ymm1,%ymm2,%ymm4
+ addl 32+128(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ vpalignr $4,%ymm3,%ymm0,%ymm7
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ vpsrld $7,%ymm4,%ymm6
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ vpaddd %ymm7,%ymm1,%ymm1
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r9d,%esi
+ vpshufd $250,%ymm0,%ymm7
+ xorl %r13d,%r14d
+ leal (%rdx,%rsi,1),%edx
+ movl %eax,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 36+128(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%r11d,%esi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ vpslld $11,%ymm5,%ymm5
+ andnl %ebx,%r11d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r11d,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%esi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%esi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ vpsrlq $17,%ymm7,%ymm7
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ vpaddd %ymm4,%ymm1,%ymm1
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 40+128(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ vpshufd $132,%ymm6,%ymm6
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ vpsrldq $8,%ymm6,%ymm6
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ vpaddd %ymm6,%ymm1,%ymm1
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ vpshufd $80,%ymm1,%ymm7
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %edx,%esi
+ vpsrld $10,%ymm7,%ymm6
+ xorl %r13d,%r14d
+ leal (%rbx,%rsi,1),%ebx
+ movl %r10d,%r12d
+ vpsrlq $17,%ymm7,%ymm7
+ addl 44+128(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ vpxor %ymm7,%ymm6,%ymm6
+ rorxl $11,%r9d,%esi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ vpsrlq $2,%ymm7,%ymm7
+ andnl %r11d,%r9d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r9d,%r14d
+ vpxor %ymm7,%ymm6,%ymm6
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%esi
+ vpshufd $232,%ymm6,%ymm6
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%esi
+ vpslldq $8,%ymm6,%ymm6
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ vpaddd %ymm6,%ymm1,%ymm1
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ vpaddd 32(%rbp),%ymm1,%ymm6
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ vmovdqa %ymm6,32(%rsp)
+ leaq -64(%rsp),%rsp
+ vpalignr $4,%ymm2,%ymm3,%ymm4
+ addl 0+128(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ vpalignr $4,%ymm0,%ymm1,%ymm7
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ vpsrld $7,%ymm4,%ymm6
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ vpaddd %ymm7,%ymm2,%ymm2
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ebx,%esi
+ vpshufd $250,%ymm1,%ymm7
+ xorl %r13d,%r14d
+ leal (%r11,%rsi,1),%r11d
+ movl %r8d,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 4+128(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%edx,%esi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ vpslld $11,%ymm5,%ymm5
+ andnl %r9d,%edx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%edx,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%esi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%esi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ vpsrlq $17,%ymm7,%ymm7
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ vpaddd %ymm4,%ymm2,%ymm2
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 8+128(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ vpshufd $132,%ymm6,%ymm6
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ vpsrldq $8,%ymm6,%ymm6
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ vpaddd %ymm6,%ymm2,%ymm2
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ vpshufd $80,%ymm2,%ymm7
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r11d,%esi
+ vpsrld $10,%ymm7,%ymm6
+ xorl %r13d,%r14d
+ leal (%r9,%rsi,1),%r9d
+ movl %ecx,%r12d
+ vpsrlq $17,%ymm7,%ymm7
+ addl 12+128(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ vpxor %ymm7,%ymm6,%ymm6
+ rorxl $11,%ebx,%esi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ vpsrlq $2,%ymm7,%ymm7
+ andnl %edx,%ebx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%ebx,%r14d
+ vpxor %ymm7,%ymm6,%ymm6
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%esi
+ vpshufd $232,%ymm6,%ymm6
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%esi
+ vpslldq $8,%ymm6,%ymm6
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ vpaddd %ymm6,%ymm2,%ymm2
+ andl %esi,%r15d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ vpaddd 64(%rbp),%ymm2,%ymm6
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ vmovdqa %ymm6,0(%rsp)
+ vpalignr $4,%ymm3,%ymm0,%ymm4
+ addl 32+128(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ vpalignr $4,%ymm1,%ymm2,%ymm7
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ vpsrld $7,%ymm4,%ymm6
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ vpaddd %ymm7,%ymm3,%ymm3
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ vpsrld $3,%ymm4,%ymm7
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ vpslld $14,%ymm4,%ymm5
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ vpxor %ymm6,%ymm7,%ymm4
+ andl %r15d,%esi
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r9d,%esi
+ vpshufd $250,%ymm2,%ymm7
+ xorl %r13d,%r14d
+ leal (%rdx,%rsi,1),%edx
+ movl %eax,%r12d
+ vpsrld $11,%ymm6,%ymm6
+ addl 36+128(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $11,%r11d,%esi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ vpslld $11,%ymm5,%ymm5
+ andnl %ebx,%r11d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r11d,%r14d
+ vpxor %ymm6,%ymm4,%ymm4
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%esi
+ vpsrld $10,%ymm7,%ymm6
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%esi
+ vpxor %ymm5,%ymm4,%ymm4
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ vpsrlq $17,%ymm7,%ymm7
+ andl %esi,%r15d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ vpaddd %ymm4,%ymm3,%ymm3
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ vpxor %ymm7,%ymm6,%ymm6
+ addl 40+128(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ vpsrlq $2,%ymm7,%ymm7
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ vpxor %ymm7,%ymm6,%ymm6
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ vpshufd $132,%ymm6,%ymm6
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ vpsrldq $8,%ymm6,%ymm6
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ vpaddd %ymm6,%ymm3,%ymm3
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ vpshufd $80,%ymm3,%ymm7
+ andl %r15d,%esi
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %edx,%esi
+ vpsrld $10,%ymm7,%ymm6
+ xorl %r13d,%r14d
+ leal (%rbx,%rsi,1),%ebx
+ movl %r10d,%r12d
+ vpsrlq $17,%ymm7,%ymm7
+ addl 44+128(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ vpxor %ymm7,%ymm6,%ymm6
+ rorxl $11,%r9d,%esi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ vpsrlq $2,%ymm7,%ymm7
+ andnl %r11d,%r9d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r9d,%r14d
+ vpxor %ymm7,%ymm6,%ymm6
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%esi
+ vpshufd $232,%ymm6,%ymm6
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%esi
+ vpslldq $8,%ymm6,%ymm6
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ vpaddd %ymm6,%ymm3,%ymm3
+ andl %esi,%r15d
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ vpaddd 96(%rbp),%ymm3,%ymm6
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ vmovdqa %ymm6,32(%rsp)
+ vmovq %xmm15,%r13
+ vpextrq $1,%xmm15,%r15
+ vpand %xmm14,%xmm11,%xmm11
+ vpor %xmm11,%xmm8,%xmm8
+ vmovdqu %xmm8,(%r15,%r13,1)
+ leaq 16(%r13),%r13
+ leaq 128(%rbp),%rbp
+ cmpb $0,3(%rbp)
+ jne .Lavx2_00_47
+ vmovdqu (%r13),%xmm9
+ vpinsrq $0,%r13,%xmm15,%xmm15
+ addl 0+64(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%esi
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ebx,%esi
+ xorl %r13d,%r14d
+ leal (%r11,%rsi,1),%r11d
+ movl %r8d,%r12d
+ addl 4+64(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%esi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%esi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%esi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %esi,%r15d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8+64(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r11d,%esi
+ xorl %r13d,%r14d
+ leal (%r9,%rsi,1),%r9d
+ movl %ecx,%r12d
+ addl 12+64(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%esi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%esi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%esi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32+64(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r9d,%esi
+ xorl %r13d,%r14d
+ leal (%rdx,%rsi,1),%edx
+ movl %eax,%r12d
+ addl 36+64(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%esi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%esi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%esi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40+64(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %edx,%esi
+ xorl %r13d,%r14d
+ leal (%rbx,%rsi,1),%ebx
+ movl %r10d,%r12d
+ addl 44+64(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%esi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%esi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%esi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ addl 0(%rsp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ebx,%esi
+ xorl %r13d,%r14d
+ leal (%r11,%rsi,1),%r11d
+ movl %r8d,%r12d
+ addl 4(%rsp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%esi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%esi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%esi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8(%rsp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r11d,%esi
+ xorl %r13d,%r14d
+ leal (%r9,%rsi,1),%r9d
+ movl %ecx,%r12d
+ addl 12(%rsp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%esi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%esi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%esi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %esi,%r15d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32(%rsp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%esi
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r9d,%esi
+ xorl %r13d,%r14d
+ leal (%rdx,%rsi,1),%edx
+ movl %eax,%r12d
+ addl 36(%rsp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%esi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%esi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%esi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %esi,%r15d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40(%rsp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%esi
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %edx,%esi
+ xorl %r13d,%r14d
+ leal (%rbx,%rsi,1),%ebx
+ movl %r10d,%r12d
+ addl 44(%rsp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%esi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%esi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%esi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %esi,%r15d
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ vpextrq $1,%xmm15,%r12
+ vmovq %xmm15,%r13
+ movq 552(%rsp),%r15
+ addl %r14d,%eax
+ leaq 448(%rsp),%rbp
+
+ vpand %xmm14,%xmm11,%xmm11
+ vpor %xmm11,%xmm8,%xmm8
+ vmovdqu %xmm8,(%r12,%r13,1)
+ leaq 16(%r13),%r13
+
+ addl 0(%r15),%eax
+ addl 4(%r15),%ebx
+ addl 8(%r15),%ecx
+ addl 12(%r15),%edx
+ addl 16(%r15),%r8d
+ addl 20(%r15),%r9d
+ addl 24(%r15),%r10d
+ addl 28(%r15),%r11d
+
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ movl %r8d,16(%r15)
+ movl %r9d,20(%r15)
+ movl %r10d,24(%r15)
+ movl %r11d,28(%r15)
+
+ cmpq 80(%rbp),%r13
+ je .Ldone_avx2
+
+ xorl %r14d,%r14d
+ movl %ebx,%esi
+ movl %r9d,%r12d
+ xorl %ecx,%esi
+ jmp .Lower_avx2
+.align 16
+.Lower_avx2:
+ vmovdqu (%r13),%xmm9
+ vpinsrq $0,%r13,%xmm15,%xmm15
+ addl 0+16(%rbp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%esi
+ vpxor %xmm10,%xmm9,%xmm9
+ vmovdqu 16-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ebx,%esi
+ xorl %r13d,%r14d
+ leal (%r11,%rsi,1),%r11d
+ movl %r8d,%r12d
+ addl 4+16(%rbp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%esi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%esi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%esi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %esi,%r15d
+ vpxor %xmm8,%xmm9,%xmm9
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8+16(%rbp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 32-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r11d,%esi
+ xorl %r13d,%r14d
+ leal (%r9,%rsi,1),%r9d
+ movl %ecx,%r12d
+ addl 12+16(%rbp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%esi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%esi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%esi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 48-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32+16(%rbp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r9d,%esi
+ xorl %r13d,%r14d
+ leal (%rdx,%rsi,1),%edx
+ movl %eax,%r12d
+ addl 36+16(%rbp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%esi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%esi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%esi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 80-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40+16(%rbp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 96-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %edx,%esi
+ xorl %r13d,%r14d
+ leal (%rbx,%rsi,1),%ebx
+ movl %r10d,%r12d
+ addl 44+16(%rbp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%esi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%esi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%esi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 112-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ leaq -64(%rbp),%rbp
+ addl 0+16(%rbp),%r11d
+ andl %r8d,%r12d
+ rorxl $25,%r8d,%r13d
+ rorxl $11,%r8d,%r15d
+ leal (%rax,%r14,1),%eax
+ leal (%r11,%r12,1),%r11d
+ andnl %r10d,%r8d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r8d,%r14d
+ leal (%r11,%r12,1),%r11d
+ xorl %r14d,%r13d
+ movl %eax,%r15d
+ rorxl $22,%eax,%r12d
+ leal (%r11,%r13,1),%r11d
+ xorl %ebx,%r15d
+ rorxl $13,%eax,%r14d
+ rorxl $2,%eax,%r13d
+ leal (%rdx,%r11,1),%edx
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 128-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ebx,%esi
+ xorl %r13d,%r14d
+ leal (%r11,%rsi,1),%r11d
+ movl %r8d,%r12d
+ addl 4+16(%rbp),%r10d
+ andl %edx,%r12d
+ rorxl $25,%edx,%r13d
+ rorxl $11,%edx,%esi
+ leal (%r11,%r14,1),%r11d
+ leal (%r10,%r12,1),%r10d
+ andnl %r9d,%edx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%edx,%r14d
+ leal (%r10,%r12,1),%r10d
+ xorl %r14d,%r13d
+ movl %r11d,%esi
+ rorxl $22,%r11d,%r12d
+ leal (%r10,%r13,1),%r10d
+ xorl %eax,%esi
+ rorxl $13,%r11d,%r14d
+ rorxl $2,%r11d,%r13d
+ leal (%rcx,%r10,1),%ecx
+ andl %esi,%r15d
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 144-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %eax,%r15d
+ xorl %r13d,%r14d
+ leal (%r10,%r15,1),%r10d
+ movl %edx,%r12d
+ addl 8+16(%rbp),%r9d
+ andl %ecx,%r12d
+ rorxl $25,%ecx,%r13d
+ rorxl $11,%ecx,%r15d
+ leal (%r10,%r14,1),%r10d
+ leal (%r9,%r12,1),%r9d
+ andnl %r8d,%ecx,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%ecx,%r14d
+ leal (%r9,%r12,1),%r9d
+ xorl %r14d,%r13d
+ movl %r10d,%r15d
+ rorxl $22,%r10d,%r12d
+ leal (%r9,%r13,1),%r9d
+ xorl %r11d,%r15d
+ rorxl $13,%r10d,%r14d
+ rorxl $2,%r10d,%r13d
+ leal (%rbx,%r9,1),%ebx
+ andl %r15d,%esi
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r11d,%esi
+ xorl %r13d,%r14d
+ leal (%r9,%rsi,1),%r9d
+ movl %ecx,%r12d
+ addl 12+16(%rbp),%r8d
+ andl %ebx,%r12d
+ rorxl $25,%ebx,%r13d
+ rorxl $11,%ebx,%esi
+ leal (%r9,%r14,1),%r9d
+ leal (%r8,%r12,1),%r8d
+ andnl %edx,%ebx,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%ebx,%r14d
+ leal (%r8,%r12,1),%r8d
+ xorl %r14d,%r13d
+ movl %r9d,%esi
+ rorxl $22,%r9d,%r12d
+ leal (%r8,%r13,1),%r8d
+ xorl %r10d,%esi
+ rorxl $13,%r9d,%r14d
+ rorxl $2,%r9d,%r13d
+ leal (%rax,%r8,1),%eax
+ andl %esi,%r15d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 176-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r10d,%r15d
+ xorl %r13d,%r14d
+ leal (%r8,%r15,1),%r8d
+ movl %ebx,%r12d
+ addl 32+16(%rbp),%edx
+ andl %eax,%r12d
+ rorxl $25,%eax,%r13d
+ rorxl $11,%eax,%r15d
+ leal (%r8,%r14,1),%r8d
+ leal (%rdx,%r12,1),%edx
+ andnl %ecx,%eax,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%eax,%r14d
+ leal (%rdx,%r12,1),%edx
+ xorl %r14d,%r13d
+ movl %r8d,%r15d
+ rorxl $22,%r8d,%r12d
+ leal (%rdx,%r13,1),%edx
+ xorl %r9d,%r15d
+ rorxl $13,%r8d,%r14d
+ rorxl $2,%r8d,%r13d
+ leal (%r11,%rdx,1),%r11d
+ andl %r15d,%esi
+ vpand %xmm12,%xmm11,%xmm8
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 192-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r9d,%esi
+ xorl %r13d,%r14d
+ leal (%rdx,%rsi,1),%edx
+ movl %eax,%r12d
+ addl 36+16(%rbp),%ecx
+ andl %r11d,%r12d
+ rorxl $25,%r11d,%r13d
+ rorxl $11,%r11d,%esi
+ leal (%rdx,%r14,1),%edx
+ leal (%rcx,%r12,1),%ecx
+ andnl %ebx,%r11d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r11d,%r14d
+ leal (%rcx,%r12,1),%ecx
+ xorl %r14d,%r13d
+ movl %edx,%esi
+ rorxl $22,%edx,%r12d
+ leal (%rcx,%r13,1),%ecx
+ xorl %r8d,%esi
+ rorxl $13,%edx,%r14d
+ rorxl $2,%edx,%r13d
+ leal (%r10,%rcx,1),%r10d
+ andl %esi,%r15d
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 208-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %r8d,%r15d
+ xorl %r13d,%r14d
+ leal (%rcx,%r15,1),%ecx
+ movl %r11d,%r12d
+ addl 40+16(%rbp),%ebx
+ andl %r10d,%r12d
+ rorxl $25,%r10d,%r13d
+ rorxl $11,%r10d,%r15d
+ leal (%rcx,%r14,1),%ecx
+ leal (%rbx,%r12,1),%ebx
+ andnl %eax,%r10d,%r12d
+ xorl %r15d,%r13d
+ rorxl $6,%r10d,%r14d
+ leal (%rbx,%r12,1),%ebx
+ xorl %r14d,%r13d
+ movl %ecx,%r15d
+ rorxl $22,%ecx,%r12d
+ leal (%rbx,%r13,1),%ebx
+ xorl %edx,%r15d
+ rorxl $13,%ecx,%r14d
+ rorxl $2,%ecx,%r13d
+ leal (%r9,%rbx,1),%r9d
+ andl %r15d,%esi
+ vpand %xmm13,%xmm11,%xmm11
+ vaesenc %xmm10,%xmm9,%xmm9
+ vmovdqu 224-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %edx,%esi
+ xorl %r13d,%r14d
+ leal (%rbx,%rsi,1),%ebx
+ movl %r10d,%r12d
+ addl 44+16(%rbp),%eax
+ andl %r9d,%r12d
+ rorxl $25,%r9d,%r13d
+ rorxl $11,%r9d,%esi
+ leal (%rbx,%r14,1),%ebx
+ leal (%rax,%r12,1),%eax
+ andnl %r11d,%r9d,%r12d
+ xorl %esi,%r13d
+ rorxl $6,%r9d,%r14d
+ leal (%rax,%r12,1),%eax
+ xorl %r14d,%r13d
+ movl %ebx,%esi
+ rorxl $22,%ebx,%r12d
+ leal (%rax,%r13,1),%eax
+ xorl %ecx,%esi
+ rorxl $13,%ebx,%r14d
+ rorxl $2,%ebx,%r13d
+ leal (%r8,%rax,1),%r8d
+ andl %esi,%r15d
+ vpor %xmm11,%xmm8,%xmm8
+ vaesenclast %xmm10,%xmm9,%xmm11
+ vmovdqu 0-128(%rdi),%xmm10
+ xorl %r12d,%r14d
+ xorl %ecx,%r15d
+ xorl %r13d,%r14d
+ leal (%rax,%r15,1),%eax
+ movl %r9d,%r12d
+ vmovq %xmm15,%r13
+ vpextrq $1,%xmm15,%r15
+ vpand %xmm14,%xmm11,%xmm11
+ vpor %xmm11,%xmm8,%xmm8
+ leaq -64(%rbp),%rbp
+ vmovdqu %xmm8,(%r15,%r13,1)
+ leaq 16(%r13),%r13
+ cmpq %rsp,%rbp
+ jae .Lower_avx2
+
+ movq 552(%rsp),%r15
+ leaq 64(%r13),%r13
+ movq 560(%rsp),%rsi
+ addl %r14d,%eax
+ leaq 448(%rsp),%rsp
+
+ addl 0(%r15),%eax
+ addl 4(%r15),%ebx
+ addl 8(%r15),%ecx
+ addl 12(%r15),%edx
+ addl 16(%r15),%r8d
+ addl 20(%r15),%r9d
+ addl 24(%r15),%r10d
+ leaq (%rsi,%r13,1),%r12
+ addl 28(%r15),%r11d
+
+ cmpq 64+16(%rsp),%r13
+
+ movl %eax,0(%r15)
+ cmoveq %rsp,%r12
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ movl %r8d,16(%r15)
+ movl %r9d,20(%r15)
+ movl %r10d,24(%r15)
+ movl %r11d,28(%r15)
+
+ jbe .Loop_avx2
+ leaq (%rsp),%rbp
+
+.Ldone_avx2:
+ leaq (%rbp),%rsp
+ movq 64+32(%rsp),%r8
+ movq 64+56(%rsp),%rsi
+ vmovdqu %xmm8,(%r8)
+ vzeroall
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_avx2:
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
+.type aesni_cbc_sha256_enc_shaext, at function
+.align 32
+aesni_cbc_sha256_enc_shaext:
+ movq 8(%rsp),%r10
+ leaq K256+128(%rip),%rax
+ movdqu (%r9),%xmm1
+ movdqu 16(%r9),%xmm2
+ movdqa 512-128(%rax),%xmm3
+
+ movl 240(%rcx),%r11d
+ subq %rdi,%rsi
+ movups (%rcx),%xmm15
+ movups (%r8),%xmm6
+ movups 16(%rcx),%xmm4
+ leaq 112(%rcx),%rcx
+
+ pshufd $0x1b,%xmm1,%xmm0
+ pshufd $0xb1,%xmm1,%xmm1
+ pshufd $0x1b,%xmm2,%xmm2
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+
+ jmp .Loop_shaext
+
+.align 16
+.Loop_shaext:
+ movdqu (%r10),%xmm10
+ movdqu 16(%r10),%xmm11
+ movdqu 32(%r10),%xmm12
+.byte 102,68,15,56,0,211
+ movdqu 48(%r10),%xmm13
+
+ movdqa 0-128(%rax),%xmm0
+ paddd %xmm10,%xmm0
+.byte 102,68,15,56,0,219
+ movdqa %xmm2,%xmm9
+ movdqa %xmm1,%xmm8
+ movups 0(%rdi),%xmm14
+ xorps %xmm15,%xmm14
+ xorps %xmm14,%xmm6
+ movups -80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movups -64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,202
+
+ movdqa 32-128(%rax),%xmm0
+ paddd %xmm11,%xmm0
+.byte 102,68,15,56,0,227
+ leaq 64(%r10),%r10
+ movups -48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movups -32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,202
+
+ movdqa 64-128(%rax),%xmm0
+ paddd %xmm12,%xmm0
+.byte 102,68,15,56,0,235
+.byte 69,15,56,204,211
+ movups -16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm13,%xmm3
+.byte 102,65,15,58,15,220,4
+ paddd %xmm3,%xmm10
+ movups 0(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,202
+
+ movdqa 96-128(%rax),%xmm0
+ paddd %xmm13,%xmm0
+.byte 69,15,56,205,213
+.byte 69,15,56,204,220
+ movups 16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movups 32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movdqa %xmm10,%xmm3
+.byte 102,65,15,58,15,221,4
+ paddd %xmm3,%xmm11
+.byte 15,56,203,202
+ movdqa 128-128(%rax),%xmm0
+ paddd %xmm10,%xmm0
+.byte 69,15,56,205,218
+.byte 69,15,56,204,229
+ movups 48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm11,%xmm3
+.byte 102,65,15,58,15,218,4
+ paddd %xmm3,%xmm12
+ cmpl $11,%r11d
+ jb .Laesenclast1
+ movups 64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ je .Laesenclast1
+ movups 96(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 112(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.Laesenclast1:
+ aesenclast %xmm5,%xmm6
+ movups 16-112(%rcx),%xmm4
+ nop
+.byte 15,56,203,202
+ movups 16(%rdi),%xmm14
+ xorps %xmm15,%xmm14
+ movups %xmm6,0(%rsi,%rdi,1)
+ xorps %xmm14,%xmm6
+ movups -80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ movdqa 160-128(%rax),%xmm0
+ paddd %xmm11,%xmm0
+.byte 69,15,56,205,227
+.byte 69,15,56,204,234
+ movups -64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm12,%xmm3
+.byte 102,65,15,58,15,219,4
+ paddd %xmm3,%xmm13
+ movups -48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 192-128(%rax),%xmm0
+ paddd %xmm12,%xmm0
+.byte 69,15,56,205,236
+.byte 69,15,56,204,211
+ movups -32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm13,%xmm3
+.byte 102,65,15,58,15,220,4
+ paddd %xmm3,%xmm10
+ movups -16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 224-128(%rax),%xmm0
+ paddd %xmm13,%xmm0
+.byte 69,15,56,205,213
+.byte 69,15,56,204,220
+ movups 0(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,65,15,58,15,221,4
+ paddd %xmm3,%xmm11
+ movups 16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 256-128(%rax),%xmm0
+ paddd %xmm10,%xmm0
+.byte 69,15,56,205,218
+.byte 69,15,56,204,229
+ movups 32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm11,%xmm3
+.byte 102,65,15,58,15,218,4
+ paddd %xmm3,%xmm12
+ movups 48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ cmpl $11,%r11d
+ jb .Laesenclast2
+ movups 64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ je .Laesenclast2
+ movups 96(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 112(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.Laesenclast2:
+ aesenclast %xmm5,%xmm6
+ movups 16-112(%rcx),%xmm4
+ nop
+.byte 15,56,203,202
+ movups 32(%rdi),%xmm14
+ xorps %xmm15,%xmm14
+ movups %xmm6,16(%rsi,%rdi,1)
+ xorps %xmm14,%xmm6
+ movups -80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ movdqa 288-128(%rax),%xmm0
+ paddd %xmm11,%xmm0
+.byte 69,15,56,205,227
+.byte 69,15,56,204,234
+ movups -64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm12,%xmm3
+.byte 102,65,15,58,15,219,4
+ paddd %xmm3,%xmm13
+ movups -48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 320-128(%rax),%xmm0
+ paddd %xmm12,%xmm0
+.byte 69,15,56,205,236
+.byte 69,15,56,204,211
+ movups -32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm13,%xmm3
+.byte 102,65,15,58,15,220,4
+ paddd %xmm3,%xmm10
+ movups -16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 352-128(%rax),%xmm0
+ paddd %xmm13,%xmm0
+.byte 69,15,56,205,213
+.byte 69,15,56,204,220
+ movups 0(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,65,15,58,15,221,4
+ paddd %xmm3,%xmm11
+ movups 16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 384-128(%rax),%xmm0
+ paddd %xmm10,%xmm0
+.byte 69,15,56,205,218
+.byte 69,15,56,204,229
+ movups 32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm11,%xmm3
+.byte 102,65,15,58,15,218,4
+ paddd %xmm3,%xmm12
+ movups 48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+ movdqa 416-128(%rax),%xmm0
+ paddd %xmm11,%xmm0
+.byte 69,15,56,205,227
+.byte 69,15,56,204,234
+ cmpl $11,%r11d
+ jb .Laesenclast3
+ movups 64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ je .Laesenclast3
+ movups 96(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 112(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.Laesenclast3:
+ aesenclast %xmm5,%xmm6
+ movups 16-112(%rcx),%xmm4
+ nop
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movdqa %xmm12,%xmm3
+.byte 102,65,15,58,15,219,4
+ paddd %xmm3,%xmm13
+ movups 48(%rdi),%xmm14
+ xorps %xmm15,%xmm14
+ movups %xmm6,32(%rsi,%rdi,1)
+ xorps %xmm14,%xmm6
+ movups -80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ movups -64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,202
+
+ movdqa 448-128(%rax),%xmm0
+ paddd %xmm12,%xmm0
+.byte 69,15,56,205,236
+ movdqa %xmm7,%xmm3
+ movups -48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movups -32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,202
+
+ movdqa 480-128(%rax),%xmm0
+ paddd %xmm13,%xmm0
+ movups -16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ movups 0(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+.byte 15,56,203,209
+ pshufd $0x0e,%xmm0,%xmm0
+ movups 16(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.byte 15,56,203,202
+
+ movups 32(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 48(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ cmpl $11,%r11d
+ jb .Laesenclast4
+ movups 64(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 80(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+ je .Laesenclast4
+ movups 96(%rcx),%xmm4
+ aesenc %xmm5,%xmm6
+ movups 112(%rcx),%xmm5
+ aesenc %xmm4,%xmm6
+.Laesenclast4:
+ aesenclast %xmm5,%xmm6
+ movups 16-112(%rcx),%xmm4
+ nop
+
+ paddd %xmm9,%xmm2
+ paddd %xmm8,%xmm1
+
+ decq %rdx
+ movups %xmm6,48(%rsi,%rdi,1)
+ leaq 64(%rdi),%rdi
+ jnz .Loop_shaext
+
+ pshufd $0xb1,%xmm2,%xmm2
+ pshufd $0x1b,%xmm1,%xmm3
+ pshufd $0xb1,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,211,8
+
+ movups %xmm6,(%r8)
+ movdqu %xmm1,(%r9)
+ movdqu %xmm2,16(%r9)
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext
Property changes on: trunk/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,3520 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S 325337 2017-11-02 18:30:41Z jkim $ */
+/* Do not modify. This file is auto-generated from ecp_nistz256-x86_64.pl. */
+.text
+
+
+
+.align 64
+.Lpoly:
+.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
+
+
+.LRR:
+.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
+
+.LOne:
+.long 1,1,1,1,1,1,1,1
+.LTwo:
+.long 2,2,2,2,2,2,2,2
+.LThree:
+.long 3,3,3,3,3,3,3,3
+.LONE_mont:
+.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
+
+.globl ecp_nistz256_mul_by_2
+.type ecp_nistz256_mul_by_2, at function
+.align 64
+ecp_nistz256_mul_by_2:
+ pushq %r12
+ pushq %r13
+
+ movq 0(%rsi),%r8
+ xorq %r13,%r13
+ movq 8(%rsi),%r9
+ addq %r8,%r8
+ movq 16(%rsi),%r10
+ adcq %r9,%r9
+ movq 24(%rsi),%r11
+ leaq .Lpoly(%rip),%rsi
+ movq %r8,%rax
+ adcq %r10,%r10
+ adcq %r11,%r11
+ movq %r9,%rdx
+ adcq $0,%r13
+
+ subq 0(%rsi),%r8
+ movq %r10,%rcx
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r11,%r12
+ sbbq 24(%rsi),%r11
+ sbbq $0,%r13
+
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ movq %r8,0(%rdi)
+ cmovcq %rcx,%r10
+ movq %r9,8(%rdi)
+ cmovcq %r12,%r11
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
+
+
+
+.globl ecp_nistz256_div_by_2
+.type ecp_nistz256_div_by_2, at function
+.align 32
+ecp_nistz256_div_by_2:
+ pushq %r12
+ pushq %r13
+
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq %r8,%rax
+ movq 24(%rsi),%r11
+ leaq .Lpoly(%rip),%rsi
+
+ movq %r9,%rdx
+ xorq %r13,%r13
+ addq 0(%rsi),%r8
+ movq %r10,%rcx
+ adcq 8(%rsi),%r9
+ adcq 16(%rsi),%r10
+ movq %r11,%r12
+ adcq 24(%rsi),%r11
+ adcq $0,%r13
+ xorq %rsi,%rsi
+ testq $1,%rax
+
+ cmovzq %rax,%r8
+ cmovzq %rdx,%r9
+ cmovzq %rcx,%r10
+ cmovzq %r12,%r11
+ cmovzq %rsi,%r13
+
+ movq %r9,%rax
+ shrq $1,%r8
+ shlq $63,%rax
+ movq %r10,%rdx
+ shrq $1,%r9
+ orq %rax,%r8
+ shlq $63,%rdx
+ movq %r11,%rcx
+ shrq $1,%r10
+ orq %rdx,%r9
+ shlq $63,%rcx
+ shrq $1,%r11
+ shlq $63,%r13
+ orq %rcx,%r10
+ orq %r13,%r11
+
+ movq %r8,0(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
+
+
+
+.globl ecp_nistz256_mul_by_3
+.type ecp_nistz256_mul_by_3, at function
+.align 32
+ecp_nistz256_mul_by_3:
+ pushq %r12
+ pushq %r13
+
+ movq 0(%rsi),%r8
+ xorq %r13,%r13
+ movq 8(%rsi),%r9
+ addq %r8,%r8
+ movq 16(%rsi),%r10
+ adcq %r9,%r9
+ movq 24(%rsi),%r11
+ movq %r8,%rax
+ adcq %r10,%r10
+ adcq %r11,%r11
+ movq %r9,%rdx
+ adcq $0,%r13
+
+ subq $-1,%r8
+ movq %r10,%rcx
+ sbbq .Lpoly+8(%rip),%r9
+ sbbq $0,%r10
+ movq %r11,%r12
+ sbbq .Lpoly+24(%rip),%r11
+ sbbq $0,%r13
+
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ cmovcq %rcx,%r10
+ cmovcq %r12,%r11
+
+ xorq %r13,%r13
+ addq 0(%rsi),%r8
+ adcq 8(%rsi),%r9
+ movq %r8,%rax
+ adcq 16(%rsi),%r10
+ adcq 24(%rsi),%r11
+ movq %r9,%rdx
+ adcq $0,%r13
+
+ subq $-1,%r8
+ movq %r10,%rcx
+ sbbq .Lpoly+8(%rip),%r9
+ sbbq $0,%r10
+ movq %r11,%r12
+ sbbq .Lpoly+24(%rip),%r11
+ sbbq $0,%r13
+
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ movq %r8,0(%rdi)
+ cmovcq %rcx,%r10
+ movq %r9,8(%rdi)
+ cmovcq %r12,%r11
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
+
+
+
+.globl ecp_nistz256_add
+.type ecp_nistz256_add, at function
+.align 32
+ecp_nistz256_add:
+ pushq %r12
+ pushq %r13
+
+ movq 0(%rsi),%r8
+ xorq %r13,%r13
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+ leaq .Lpoly(%rip),%rsi
+
+ addq 0(%rdx),%r8
+ adcq 8(%rdx),%r9
+ movq %r8,%rax
+ adcq 16(%rdx),%r10
+ adcq 24(%rdx),%r11
+ movq %r9,%rdx
+ adcq $0,%r13
+
+ subq 0(%rsi),%r8
+ movq %r10,%rcx
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r11,%r12
+ sbbq 24(%rsi),%r11
+ sbbq $0,%r13
+
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ movq %r8,0(%rdi)
+ cmovcq %rcx,%r10
+ movq %r9,8(%rdi)
+ cmovcq %r12,%r11
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_add,.-ecp_nistz256_add
+
+
+
+.globl ecp_nistz256_sub
+.type ecp_nistz256_sub, at function
+.align 32
+ecp_nistz256_sub:
+ pushq %r12
+ pushq %r13
+
+ movq 0(%rsi),%r8
+ xorq %r13,%r13
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+ leaq .Lpoly(%rip),%rsi
+
+ subq 0(%rdx),%r8
+ sbbq 8(%rdx),%r9
+ movq %r8,%rax
+ sbbq 16(%rdx),%r10
+ sbbq 24(%rdx),%r11
+ movq %r9,%rdx
+ sbbq $0,%r13
+
+ addq 0(%rsi),%r8
+ movq %r10,%rcx
+ adcq 8(%rsi),%r9
+ adcq 16(%rsi),%r10
+ movq %r11,%r12
+ adcq 24(%rsi),%r11
+ testq %r13,%r13
+
+ cmovzq %rax,%r8
+ cmovzq %rdx,%r9
+ movq %r8,0(%rdi)
+ cmovzq %rcx,%r10
+ movq %r9,8(%rdi)
+ cmovzq %r12,%r11
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_sub,.-ecp_nistz256_sub
+
+
+
+.globl ecp_nistz256_neg
+.type ecp_nistz256_neg, at function
+.align 32
+ecp_nistz256_neg:
+ pushq %r12
+ pushq %r13
+
+ xorq %r8,%r8
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
+ xorq %r13,%r13
+
+ subq 0(%rsi),%r8
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r8,%rax
+ sbbq 24(%rsi),%r11
+ leaq .Lpoly(%rip),%rsi
+ movq %r9,%rdx
+ sbbq $0,%r13
+
+ addq 0(%rsi),%r8
+ movq %r10,%rcx
+ adcq 8(%rsi),%r9
+ adcq 16(%rsi),%r10
+ movq %r11,%r12
+ adcq 24(%rsi),%r11
+ testq %r13,%r13
+
+ cmovzq %rax,%r8
+ cmovzq %rdx,%r9
+ movq %r8,0(%rdi)
+ cmovzq %rcx,%r10
+ movq %r9,8(%rdi)
+ cmovzq %r12,%r11
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_neg,.-ecp_nistz256_neg
+
+
+
+
+.globl ecp_nistz256_to_mont
+.type ecp_nistz256_to_mont, at function
+.align 32
+ecp_nistz256_to_mont:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ leaq .LRR(%rip),%rdx
+ jmp .Lmul_mont
+.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
+
+
+
+
+
+
+
+.globl ecp_nistz256_mul_mont
+.type ecp_nistz256_mul_mont, at function
+.align 32
+ecp_nistz256_mul_mont:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+.Lmul_mont:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ cmpl $0x80100,%ecx
+ je .Lmul_montx
+ movq %rdx,%rbx
+ movq 0(%rdx),%rax
+ movq 0(%rsi),%r9
+ movq 8(%rsi),%r10
+ movq 16(%rsi),%r11
+ movq 24(%rsi),%r12
+
+ call __ecp_nistz256_mul_montq
+ jmp .Lmul_mont_done
+
+.align 32
+.Lmul_montx:
+ movq %rdx,%rbx
+ movq 0(%rdx),%rdx
+ movq 0(%rsi),%r9
+ movq 8(%rsi),%r10
+ movq 16(%rsi),%r11
+ movq 24(%rsi),%r12
+ leaq -128(%rsi),%rsi
+
+ call __ecp_nistz256_mul_montx
+.Lmul_mont_done:
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
+
+.type __ecp_nistz256_mul_montq, at function
+.align 32
+__ecp_nistz256_mul_montq:
+
+
+ movq %rax,%rbp
+ mulq %r9
+ movq .Lpoly+8(%rip),%r14
+ movq %rax,%r8
+ movq %rbp,%rax
+ movq %rdx,%r9
+
+ mulq %r10
+ movq .Lpoly+24(%rip),%r15
+ addq %rax,%r9
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%r10
+
+ mulq %r11
+ addq %rax,%r10
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%r11
+
+ mulq %r12
+ addq %rax,%r11
+ movq %r8,%rax
+ adcq $0,%rdx
+ xorq %r13,%r13
+ movq %rdx,%r12
+
+
+
+
+
+
+
+
+
+
+ movq %r8,%rbp
+ shlq $32,%r8
+ mulq %r15
+ shrq $32,%rbp
+ addq %r8,%r9
+ adcq %rbp,%r10
+ adcq %rax,%r11
+ movq 8(%rbx),%rax
+ adcq %rdx,%r12
+ adcq $0,%r13
+ xorq %r8,%r8
+
+
+
+ movq %rax,%rbp
+ mulq 0(%rsi)
+ addq %rax,%r9
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 8(%rsi)
+ addq %rcx,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 16(%rsi)
+ addq %rcx,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 24(%rsi)
+ addq %rcx,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %r9,%rax
+ adcq %rdx,%r13
+ adcq $0,%r8
+
+
+
+ movq %r9,%rbp
+ shlq $32,%r9
+ mulq %r15
+ shrq $32,%rbp
+ addq %r9,%r10
+ adcq %rbp,%r11
+ adcq %rax,%r12
+ movq 16(%rbx),%rax
+ adcq %rdx,%r13
+ adcq $0,%r8
+ xorq %r9,%r9
+
+
+
+ movq %rax,%rbp
+ mulq 0(%rsi)
+ addq %rax,%r10
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 8(%rsi)
+ addq %rcx,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 16(%rsi)
+ addq %rcx,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 24(%rsi)
+ addq %rcx,%r13
+ adcq $0,%rdx
+ addq %rax,%r13
+ movq %r10,%rax
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+
+
+ movq %r10,%rbp
+ shlq $32,%r10
+ mulq %r15
+ shrq $32,%rbp
+ addq %r10,%r11
+ adcq %rbp,%r12
+ adcq %rax,%r13
+ movq 24(%rbx),%rax
+ adcq %rdx,%r8
+ adcq $0,%r9
+ xorq %r10,%r10
+
+
+
+ movq %rax,%rbp
+ mulq 0(%rsi)
+ addq %rax,%r11
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 8(%rsi)
+ addq %rcx,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 16(%rsi)
+ addq %rcx,%r13
+ adcq $0,%rdx
+ addq %rax,%r13
+ movq %rbp,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq 24(%rsi)
+ addq %rcx,%r8
+ adcq $0,%rdx
+ addq %rax,%r8
+ movq %r11,%rax
+ adcq %rdx,%r9
+ adcq $0,%r10
+
+
+
+ movq %r11,%rbp
+ shlq $32,%r11
+ mulq %r15
+ shrq $32,%rbp
+ addq %r11,%r12
+ adcq %rbp,%r13
+ movq %r12,%rcx
+ adcq %rax,%r8
+ adcq %rdx,%r9
+ movq %r13,%rbp
+ adcq $0,%r10
+
+
+
+ subq $-1,%r12
+ movq %r8,%rbx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%rdx
+ sbbq %r15,%r9
+ sbbq $0,%r10
+
+ cmovcq %rcx,%r12
+ cmovcq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rbx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %rdx,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
+
+
+
+
+
+
+
+
+.globl ecp_nistz256_sqr_mont
+.type ecp_nistz256_sqr_mont, at function
+.align 32
+ecp_nistz256_sqr_mont:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ cmpl $0x80100,%ecx
+ je .Lsqr_montx
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r15
+ movq 24(%rsi),%r8
+
+ call __ecp_nistz256_sqr_montq
+ jmp .Lsqr_mont_done
+
+.align 32
+.Lsqr_montx:
+ movq 0(%rsi),%rdx
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r15
+ movq 24(%rsi),%r8
+ leaq -128(%rsi),%rsi
+
+ call __ecp_nistz256_sqr_montx
+.Lsqr_mont_done:
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
+
+.type __ecp_nistz256_sqr_montq, at function
+.align 32
+__ecp_nistz256_sqr_montq:
+ movq %rax,%r13
+ mulq %r14
+ movq %rax,%r9
+ movq %r15,%rax
+ movq %rdx,%r10
+
+ mulq %r13
+ addq %rax,%r10
+ movq %r8,%rax
+ adcq $0,%rdx
+ movq %rdx,%r11
+
+ mulq %r13
+ addq %rax,%r11
+ movq %r15,%rax
+ adcq $0,%rdx
+ movq %rdx,%r12
+
+
+ mulq %r14
+ addq %rax,%r11
+ movq %r8,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq %r14
+ addq %rax,%r12
+ movq %r8,%rax
+ adcq $0,%rdx
+ addq %rbp,%r12
+ movq %rdx,%r13
+ adcq $0,%r13
+
+
+ mulq %r15
+ xorq %r15,%r15
+ addq %rax,%r13
+ movq 0(%rsi),%rax
+ movq %rdx,%r14
+ adcq $0,%r14
+
+ addq %r9,%r9
+ adcq %r10,%r10
+ adcq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ adcq %r14,%r14
+ adcq $0,%r15
+
+ mulq %rax
+ movq %rax,%r8
+ movq 8(%rsi),%rax
+ movq %rdx,%rcx
+
+ mulq %rax
+ addq %rcx,%r9
+ adcq %rax,%r10
+ movq 16(%rsi),%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq %rax
+ addq %rcx,%r11
+ adcq %rax,%r12
+ movq 24(%rsi),%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ mulq %rax
+ addq %rcx,%r13
+ adcq %rax,%r14
+ movq %r8,%rax
+ adcq %rdx,%r15
+
+ movq .Lpoly+8(%rip),%rsi
+ movq .Lpoly+24(%rip),%rbp
+
+
+
+
+ movq %r8,%rcx
+ shlq $32,%r8
+ mulq %rbp
+ shrq $32,%rcx
+ addq %r8,%r9
+ adcq %rcx,%r10
+ adcq %rax,%r11
+ movq %r9,%rax
+ adcq $0,%rdx
+
+
+
+ movq %r9,%rcx
+ shlq $32,%r9
+ movq %rdx,%r8
+ mulq %rbp
+ shrq $32,%rcx
+ addq %r9,%r10
+ adcq %rcx,%r11
+ adcq %rax,%r8
+ movq %r10,%rax
+ adcq $0,%rdx
+
+
+
+ movq %r10,%rcx
+ shlq $32,%r10
+ movq %rdx,%r9
+ mulq %rbp
+ shrq $32,%rcx
+ addq %r10,%r11
+ adcq %rcx,%r8
+ adcq %rax,%r9
+ movq %r11,%rax
+ adcq $0,%rdx
+
+
+
+ movq %r11,%rcx
+ shlq $32,%r11
+ movq %rdx,%r10
+ mulq %rbp
+ shrq $32,%rcx
+ addq %r11,%r8
+ adcq %rcx,%r9
+ adcq %rax,%r10
+ adcq $0,%rdx
+ xorq %r11,%r11
+
+
+
+ addq %r8,%r12
+ adcq %r9,%r13
+ movq %r12,%r8
+ adcq %r10,%r14
+ adcq %rdx,%r15
+ movq %r13,%r9
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r14,%r10
+ sbbq %rsi,%r13
+ sbbq $0,%r14
+ movq %r15,%rcx
+ sbbq %rbp,%r15
+ sbbq $0,%r11
+
+ cmovcq %r8,%r12
+ cmovcq %r9,%r13
+ movq %r12,0(%rdi)
+ cmovcq %r10,%r14
+ movq %r13,8(%rdi)
+ cmovcq %rcx,%r15
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
+.type __ecp_nistz256_mul_montx, at function
+.align 32
+__ecp_nistz256_mul_montx:
+
+
+ mulxq %r9,%r8,%r9
+ mulxq %r10,%rcx,%r10
+ movq $32,%r14
+ xorq %r13,%r13
+ mulxq %r11,%rbp,%r11
+ movq .Lpoly+24(%rip),%r15
+ adcq %rcx,%r9
+ mulxq %r12,%rcx,%r12
+ movq %r8,%rdx
+ adcq %rbp,%r10
+ shlxq %r14,%r8,%rbp
+ adcq %rcx,%r11
+ shrxq %r14,%r8,%rcx
+ adcq $0,%r12
+
+
+
+ addq %rbp,%r9
+ adcq %rcx,%r10
+
+ mulxq %r15,%rcx,%rbp
+ movq 8(%rbx),%rdx
+ adcq %rcx,%r11
+ adcq %rbp,%r12
+ adcq $0,%r13
+ xorq %r8,%r8
+
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r9,%rdx
+ adcxq %rcx,%r12
+ shlxq %r14,%r9,%rcx
+ adoxq %rbp,%r13
+ shrxq %r14,%r9,%rbp
+
+ adcxq %r8,%r13
+ adoxq %r8,%r8
+ adcq $0,%r8
+
+
+
+ addq %rcx,%r10
+ adcq %rbp,%r11
+
+ mulxq %r15,%rcx,%rbp
+ movq 16(%rbx),%rdx
+ adcq %rcx,%r12
+ adcq %rbp,%r13
+ adcq $0,%r8
+ xorq %r9,%r9
+
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r10,%rdx
+ adcxq %rcx,%r13
+ shlxq %r14,%r10,%rcx
+ adoxq %rbp,%r8
+ shrxq %r14,%r10,%rbp
+
+ adcxq %r9,%r8
+ adoxq %r9,%r9
+ adcq $0,%r9
+
+
+
+ addq %rcx,%r11
+ adcq %rbp,%r12
+
+ mulxq %r15,%rcx,%rbp
+ movq 24(%rbx),%rdx
+ adcq %rcx,%r13
+ adcq %rbp,%r8
+ adcq $0,%r9
+ xorq %r10,%r10
+
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r11,%rdx
+ adcxq %rcx,%r8
+ shlxq %r14,%r11,%rcx
+ adoxq %rbp,%r9
+ shrxq %r14,%r11,%rbp
+
+ adcxq %r10,%r9
+ adoxq %r10,%r10
+ adcq $0,%r10
+
+
+
+ addq %rcx,%r12
+ adcq %rbp,%r13
+
+ mulxq %r15,%rcx,%rbp
+ movq %r12,%rbx
+ movq .Lpoly+8(%rip),%r14
+ adcq %rcx,%r8
+ movq %r13,%rdx
+ adcq %rbp,%r9
+ adcq $0,%r10
+
+
+
+ xorl %eax,%eax
+ movq %r8,%rcx
+ sbbq $-1,%r12
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%rbp
+ sbbq %r15,%r9
+ sbbq $0,%r10
+
+ cmovcq %rbx,%r12
+ cmovcq %rdx,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %rbp,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
+
+.type __ecp_nistz256_sqr_montx, at function
+.align 32
+__ecp_nistz256_sqr_montx:
+ mulxq %r14,%r9,%r10
+ mulxq %r15,%rcx,%r11
+ xorl %eax,%eax
+ adcq %rcx,%r10
+ mulxq %r8,%rbp,%r12
+ movq %r14,%rdx
+ adcq %rbp,%r11
+ adcq $0,%r12
+ xorq %r13,%r13
+
+
+ mulxq %r15,%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq %r8,%rcx,%rbp
+ movq %r15,%rdx
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+ adcq $0,%r13
+
+
+ mulxq %r8,%rcx,%r14
+ movq 0+128(%rsi),%rdx
+ xorq %r15,%r15
+ adcxq %r9,%r9
+ adoxq %rcx,%r13
+ adcxq %r10,%r10
+ adoxq %r15,%r14
+
+ mulxq %rdx,%r8,%rbp
+ movq 8+128(%rsi),%rdx
+ adcxq %r11,%r11
+ adoxq %rbp,%r9
+ adcxq %r12,%r12
+ mulxq %rdx,%rcx,%rax
+ movq 16+128(%rsi),%rdx
+ adcxq %r13,%r13
+ adoxq %rcx,%r10
+ adcxq %r14,%r14
+.byte 0x67
+ mulxq %rdx,%rcx,%rbp
+ movq 24+128(%rsi),%rdx
+ adoxq %rax,%r11
+ adcxq %r15,%r15
+ adoxq %rcx,%r12
+ movq $32,%rsi
+ adoxq %rbp,%r13
+.byte 0x67,0x67
+ mulxq %rdx,%rcx,%rax
+ movq .Lpoly+24(%rip),%rdx
+ adoxq %rcx,%r14
+ shlxq %rsi,%r8,%rcx
+ adoxq %rax,%r15
+ shrxq %rsi,%r8,%rax
+ movq %rdx,%rbp
+
+
+ addq %rcx,%r9
+ adcq %rax,%r10
+
+ mulxq %r8,%rcx,%r8
+ adcq %rcx,%r11
+ shlxq %rsi,%r9,%rcx
+ adcq $0,%r8
+ shrxq %rsi,%r9,%rax
+
+
+ addq %rcx,%r10
+ adcq %rax,%r11
+
+ mulxq %r9,%rcx,%r9
+ adcq %rcx,%r8
+ shlxq %rsi,%r10,%rcx
+ adcq $0,%r9
+ shrxq %rsi,%r10,%rax
+
+
+ addq %rcx,%r11
+ adcq %rax,%r8
+
+ mulxq %r10,%rcx,%r10
+ adcq %rcx,%r9
+ shlxq %rsi,%r11,%rcx
+ adcq $0,%r10
+ shrxq %rsi,%r11,%rax
+
+
+ addq %rcx,%r8
+ adcq %rax,%r9
+
+ mulxq %r11,%rcx,%r11
+ adcq %rcx,%r10
+ adcq $0,%r11
+
+ xorq %rdx,%rdx
+ addq %r8,%r12
+ movq .Lpoly+8(%rip),%rsi
+ adcq %r9,%r13
+ movq %r12,%r8
+ adcq %r10,%r14
+ adcq %r11,%r15
+ movq %r13,%r9
+ adcq $0,%rdx
+
+ subq $-1,%r12
+ movq %r14,%r10
+ sbbq %rsi,%r13
+ sbbq $0,%r14
+ movq %r15,%r11
+ sbbq %rbp,%r15
+ sbbq $0,%rdx
+
+ cmovcq %r8,%r12
+ cmovcq %r9,%r13
+ movq %r12,0(%rdi)
+ cmovcq %r10,%r14
+ movq %r13,8(%rdi)
+ cmovcq %r11,%r15
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
+
+
+
+
+
+
+.globl ecp_nistz256_from_mont
+.type ecp_nistz256_from_mont, at function
+.align 32
+ecp_nistz256_from_mont:
+ pushq %r12
+ pushq %r13
+
+ movq 0(%rsi),%rax
+ movq .Lpoly+24(%rip),%r13
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+ movq %rax,%r8
+ movq .Lpoly+8(%rip),%r12
+
+
+
+ movq %rax,%rcx
+ shlq $32,%r8
+ mulq %r13
+ shrq $32,%rcx
+ addq %r8,%r9
+ adcq %rcx,%r10
+ adcq %rax,%r11
+ movq %r9,%rax
+ adcq $0,%rdx
+
+
+
+ movq %r9,%rcx
+ shlq $32,%r9
+ movq %rdx,%r8
+ mulq %r13
+ shrq $32,%rcx
+ addq %r9,%r10
+ adcq %rcx,%r11
+ adcq %rax,%r8
+ movq %r10,%rax
+ adcq $0,%rdx
+
+
+
+ movq %r10,%rcx
+ shlq $32,%r10
+ movq %rdx,%r9
+ mulq %r13
+ shrq $32,%rcx
+ addq %r10,%r11
+ adcq %rcx,%r8
+ adcq %rax,%r9
+ movq %r11,%rax
+ adcq $0,%rdx
+
+
+
+ movq %r11,%rcx
+ shlq $32,%r11
+ movq %rdx,%r10
+ mulq %r13
+ shrq $32,%rcx
+ addq %r11,%r8
+ adcq %rcx,%r9
+ movq %r8,%rcx
+ adcq %rax,%r10
+ movq %r9,%rsi
+ adcq $0,%rdx
+
+
+
+ subq $-1,%r8
+ movq %r10,%rax
+ sbbq %r12,%r9
+ sbbq $0,%r10
+ movq %rdx,%r11
+ sbbq %r13,%rdx
+ sbbq %r13,%r13
+
+ cmovnzq %rcx,%r8
+ cmovnzq %rsi,%r9
+ movq %r8,0(%rdi)
+ cmovnzq %rax,%r10
+ movq %r9,8(%rdi)
+ cmovzq %rdx,%r11
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+ popq %r13
+ popq %r12
+ .byte 0xf3,0xc3
+.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
+
+
+.globl ecp_nistz256_select_w5
+.type ecp_nistz256_select_w5, at function
+.align 32
+ecp_nistz256_select_w5:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
+ testl $32,%eax
+ jnz .Lavx2_select_w5
+ movdqa .LOne(%rip),%xmm0
+ movd %edx,%xmm1
+
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+
+ movdqa %xmm0,%xmm8
+ pshufd $0,%xmm1,%xmm1
+
+ movq $16,%rax
+.Lselect_loop_sse_w5:
+
+ movdqa %xmm8,%xmm15
+ paddd %xmm0,%xmm8
+ pcmpeqd %xmm1,%xmm15
+
+ movdqa 0(%rsi),%xmm9
+ movdqa 16(%rsi),%xmm10
+ movdqa 32(%rsi),%xmm11
+ movdqa 48(%rsi),%xmm12
+ movdqa 64(%rsi),%xmm13
+ movdqa 80(%rsi),%xmm14
+ leaq 96(%rsi),%rsi
+
+ pand %xmm15,%xmm9
+ pand %xmm15,%xmm10
+ por %xmm9,%xmm2
+ pand %xmm15,%xmm11
+ por %xmm10,%xmm3
+ pand %xmm15,%xmm12
+ por %xmm11,%xmm4
+ pand %xmm15,%xmm13
+ por %xmm12,%xmm5
+ pand %xmm15,%xmm14
+ por %xmm13,%xmm6
+ por %xmm14,%xmm7
+
+ decq %rax
+ jnz .Lselect_loop_sse_w5
+
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+ movdqu %xmm4,32(%rdi)
+ movdqu %xmm5,48(%rdi)
+ movdqu %xmm6,64(%rdi)
+ movdqu %xmm7,80(%rdi)
+ .byte 0xf3,0xc3
+.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
+
+
+
+.globl ecp_nistz256_select_w7
+.type ecp_nistz256_select_w7, at function
+.align 32
+ecp_nistz256_select_w7:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
+ testl $32,%eax
+ jnz .Lavx2_select_w7
+ movdqa .LOne(%rip),%xmm8
+ movd %edx,%xmm1
+
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+
+ movdqa %xmm8,%xmm0
+ pshufd $0,%xmm1,%xmm1
+ movq $64,%rax
+
+.Lselect_loop_sse_w7:
+ movdqa %xmm8,%xmm15
+ paddd %xmm0,%xmm8
+ movdqa 0(%rsi),%xmm9
+ movdqa 16(%rsi),%xmm10
+ pcmpeqd %xmm1,%xmm15
+ movdqa 32(%rsi),%xmm11
+ movdqa 48(%rsi),%xmm12
+ leaq 64(%rsi),%rsi
+
+ pand %xmm15,%xmm9
+ pand %xmm15,%xmm10
+ por %xmm9,%xmm2
+ pand %xmm15,%xmm11
+ por %xmm10,%xmm3
+ pand %xmm15,%xmm12
+ por %xmm11,%xmm4
+ prefetcht0 255(%rsi)
+ por %xmm12,%xmm5
+
+ decq %rax
+ jnz .Lselect_loop_sse_w7
+
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+ movdqu %xmm4,32(%rdi)
+ movdqu %xmm5,48(%rdi)
+ .byte 0xf3,0xc3
+.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
+
+
+.type ecp_nistz256_avx2_select_w5, at function
+.align 32
+ecp_nistz256_avx2_select_w5:
+.Lavx2_select_w5:
+ vzeroupper
+ vmovdqa .LTwo(%rip),%ymm0
+
+ vpxor %ymm2,%ymm2,%ymm2
+ vpxor %ymm3,%ymm3,%ymm3
+ vpxor %ymm4,%ymm4,%ymm4
+
+ vmovdqa .LOne(%rip),%ymm5
+ vmovdqa .LTwo(%rip),%ymm10
+
+ vmovd %edx,%xmm1
+ vpermd %ymm1,%ymm2,%ymm1
+
+ movq $8,%rax
+.Lselect_loop_avx2_w5:
+
+ vmovdqa 0(%rsi),%ymm6
+ vmovdqa 32(%rsi),%ymm7
+ vmovdqa 64(%rsi),%ymm8
+
+ vmovdqa 96(%rsi),%ymm11
+ vmovdqa 128(%rsi),%ymm12
+ vmovdqa 160(%rsi),%ymm13
+
+ vpcmpeqd %ymm1,%ymm5,%ymm9
+ vpcmpeqd %ymm1,%ymm10,%ymm14
+
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpaddd %ymm0,%ymm10,%ymm10
+ leaq 192(%rsi),%rsi
+
+ vpand %ymm9,%ymm6,%ymm6
+ vpand %ymm9,%ymm7,%ymm7
+ vpand %ymm9,%ymm8,%ymm8
+ vpand %ymm14,%ymm11,%ymm11
+ vpand %ymm14,%ymm12,%ymm12
+ vpand %ymm14,%ymm13,%ymm13
+
+ vpxor %ymm6,%ymm2,%ymm2
+ vpxor %ymm7,%ymm3,%ymm3
+ vpxor %ymm8,%ymm4,%ymm4
+ vpxor %ymm11,%ymm2,%ymm2
+ vpxor %ymm12,%ymm3,%ymm3
+ vpxor %ymm13,%ymm4,%ymm4
+
+ decq %rax
+ jnz .Lselect_loop_avx2_w5
+
+ vmovdqu %ymm2,0(%rdi)
+ vmovdqu %ymm3,32(%rdi)
+ vmovdqu %ymm4,64(%rdi)
+ vzeroupper
+ .byte 0xf3,0xc3
+.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
+
+
+
+.globl ecp_nistz256_avx2_select_w7
+.type ecp_nistz256_avx2_select_w7, at function
+.align 32
+ecp_nistz256_avx2_select_w7:
+.Lavx2_select_w7:
+ vzeroupper
+ vmovdqa .LThree(%rip),%ymm0
+
+ vpxor %ymm2,%ymm2,%ymm2
+ vpxor %ymm3,%ymm3,%ymm3
+
+ vmovdqa .LOne(%rip),%ymm4
+ vmovdqa .LTwo(%rip),%ymm8
+ vmovdqa .LThree(%rip),%ymm12
+
+ vmovd %edx,%xmm1
+ vpermd %ymm1,%ymm2,%ymm1
+
+
+ movq $21,%rax
+.Lselect_loop_avx2_w7:
+
+ vmovdqa 0(%rsi),%ymm5
+ vmovdqa 32(%rsi),%ymm6
+
+ vmovdqa 64(%rsi),%ymm9
+ vmovdqa 96(%rsi),%ymm10
+
+ vmovdqa 128(%rsi),%ymm13
+ vmovdqa 160(%rsi),%ymm14
+
+ vpcmpeqd %ymm1,%ymm4,%ymm7
+ vpcmpeqd %ymm1,%ymm8,%ymm11
+ vpcmpeqd %ymm1,%ymm12,%ymm15
+
+ vpaddd %ymm0,%ymm4,%ymm4
+ vpaddd %ymm0,%ymm8,%ymm8
+ vpaddd %ymm0,%ymm12,%ymm12
+ leaq 192(%rsi),%rsi
+
+ vpand %ymm7,%ymm5,%ymm5
+ vpand %ymm7,%ymm6,%ymm6
+ vpand %ymm11,%ymm9,%ymm9
+ vpand %ymm11,%ymm10,%ymm10
+ vpand %ymm15,%ymm13,%ymm13
+ vpand %ymm15,%ymm14,%ymm14
+
+ vpxor %ymm5,%ymm2,%ymm2
+ vpxor %ymm6,%ymm3,%ymm3
+ vpxor %ymm9,%ymm2,%ymm2
+ vpxor %ymm10,%ymm3,%ymm3
+ vpxor %ymm13,%ymm2,%ymm2
+ vpxor %ymm14,%ymm3,%ymm3
+
+ decq %rax
+ jnz .Lselect_loop_avx2_w7
+
+
+ vmovdqa 0(%rsi),%ymm5
+ vmovdqa 32(%rsi),%ymm6
+
+ vpcmpeqd %ymm1,%ymm4,%ymm7
+
+ vpand %ymm7,%ymm5,%ymm5
+ vpand %ymm7,%ymm6,%ymm6
+
+ vpxor %ymm5,%ymm2,%ymm2
+ vpxor %ymm6,%ymm3,%ymm3
+
+ vmovdqu %ymm2,0(%rdi)
+ vmovdqu %ymm3,32(%rdi)
+ vzeroupper
+ .byte 0xf3,0xc3
+.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
+.type __ecp_nistz256_add_toq, at function
+.align 32
+__ecp_nistz256_add_toq:
+ xorq %r11,%r11
+ addq 0(%rbx),%r12
+ adcq 8(%rbx),%r13
+ movq %r12,%rax
+ adcq 16(%rbx),%r8
+ adcq 24(%rbx),%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
+
+.type __ecp_nistz256_sub_fromq, at function
+.align 32
+__ecp_nistz256_sub_fromq:
+ subq 0(%rbx),%r12
+ sbbq 8(%rbx),%r13
+ movq %r12,%rax
+ sbbq 16(%rbx),%r8
+ sbbq 24(%rbx),%r9
+ movq %r13,%rbp
+ sbbq %r11,%r11
+
+ addq $-1,%r12
+ movq %r8,%rcx
+ adcq %r14,%r13
+ adcq $0,%r8
+ movq %r9,%r10
+ adcq %r15,%r9
+ testq %r11,%r11
+
+ cmovzq %rax,%r12
+ cmovzq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovzq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovzq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
+
+.type __ecp_nistz256_subq, at function
+.align 32
+__ecp_nistz256_subq:
+ subq %r12,%rax
+ sbbq %r13,%rbp
+ movq %rax,%r12
+ sbbq %r8,%rcx
+ sbbq %r9,%r10
+ movq %rbp,%r13
+ sbbq %r11,%r11
+
+ addq $-1,%rax
+ movq %rcx,%r8
+ adcq %r14,%rbp
+ adcq $0,%rcx
+ movq %r10,%r9
+ adcq %r15,%r10
+ testq %r11,%r11
+
+ cmovnzq %rax,%r12
+ cmovnzq %rbp,%r13
+ cmovnzq %rcx,%r8
+ cmovnzq %r10,%r9
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_subq,.-__ecp_nistz256_subq
+
+.type __ecp_nistz256_mul_by_2q, at function
+.align 32
+__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
+ addq %r12,%r12
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
+.globl ecp_nistz256_point_double
+.type ecp_nistz256_point_double, at function
+.align 32
+ecp_nistz256_point_double:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lpoint_doublex
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $160+8,%rsp
+
+.Lpoint_double_shortcutq:
+ movdqu 0(%rsi),%xmm0
+ movq %rsi,%rbx
+ movdqu 16(%rsi),%xmm1
+ movq 32+0(%rsi),%r12
+ movq 32+8(%rsi),%r13
+ movq 32+16(%rsi),%r8
+ movq 32+24(%rsi),%r9
+ movq .Lpoly+8(%rip),%r14
+ movq .Lpoly+24(%rip),%r15
+ movdqa %xmm0,96(%rsp)
+ movdqa %xmm1,96+16(%rsp)
+ leaq 32(%rdi),%r10
+ leaq 64(%rdi),%r11
+.byte 102,72,15,110,199
+.byte 102,73,15,110,202
+.byte 102,73,15,110,211
+
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2q
+
+ movq 64+0(%rsi),%rax
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ leaq 64-0(%rsi),%rsi
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 0+0(%rsp),%rax
+ movq 8+0(%rsp),%r14
+ leaq 0+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 32(%rbx),%rax
+ movq 64+0(%rbx),%r9
+ movq 64+8(%rbx),%r10
+ movq 64+16(%rbx),%r11
+ movq 64+24(%rbx),%r12
+ leaq 64-0(%rbx),%rsi
+ leaq 32(%rbx),%rbx
+.byte 102,72,15,126,215
+ call __ecp_nistz256_mul_montq
+ call __ecp_nistz256_mul_by_2q
+
+ movq 96+0(%rsp),%r12
+ movq 96+8(%rsp),%r13
+ leaq 64(%rsp),%rbx
+ movq 96+16(%rsp),%r8
+ movq 96+24(%rsp),%r9
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_add_toq
+
+ movq 96+0(%rsp),%r12
+ movq 96+8(%rsp),%r13
+ leaq 64(%rsp),%rbx
+ movq 96+16(%rsp),%r8
+ movq 96+24(%rsp),%r9
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ movq 0+0(%rsp),%rax
+ movq 8+0(%rsp),%r14
+ leaq 0+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+.byte 102,72,15,126,207
+ call __ecp_nistz256_sqr_montq
+ xorq %r9,%r9
+ movq %r12,%rax
+ addq $-1,%r12
+ movq %r13,%r10
+ adcq %rsi,%r13
+ movq %r14,%rcx
+ adcq $0,%r14
+ movq %r15,%r8
+ adcq %rbp,%r15
+ adcq $0,%r9
+ xorq %rsi,%rsi
+ testq $1,%rax
+
+ cmovzq %rax,%r12
+ cmovzq %r10,%r13
+ cmovzq %rcx,%r14
+ cmovzq %r8,%r15
+ cmovzq %rsi,%r9
+
+ movq %r13,%rax
+ shrq $1,%r12
+ shlq $63,%rax
+ movq %r14,%r10
+ shrq $1,%r13
+ orq %rax,%r12
+ shlq $63,%r10
+ movq %r15,%rcx
+ shrq $1,%r14
+ orq %r10,%r13
+ shlq $63,%rcx
+ movq %r12,0(%rdi)
+ shrq $1,%r15
+ movq %r13,8(%rdi)
+ shlq $63,%r9
+ orq %rcx,%r14
+ orq %r9,%r15
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+ movq 64(%rsp),%rax
+ leaq 64(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2q
+
+ leaq 32(%rsp),%rbx
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_add_toq
+
+ movq 96(%rsp),%rax
+ leaq 96(%rsp),%rbx
+ movq 0+0(%rsp),%r9
+ movq 8+0(%rsp),%r10
+ leaq 0+0(%rsp),%rsi
+ movq 16+0(%rsp),%r11
+ movq 24+0(%rsp),%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2q
+
+ movq 0+32(%rsp),%rax
+ movq 8+32(%rsp),%r14
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r15
+ movq 24+32(%rsp),%r8
+.byte 102,72,15,126,199
+ call __ecp_nistz256_sqr_montq
+
+ leaq 128(%rsp),%rbx
+ movq %r14,%r8
+ movq %r15,%r9
+ movq %rsi,%r14
+ movq %rbp,%r15
+ call __ecp_nistz256_sub_fromq
+
+ movq 0+0(%rsp),%rax
+ movq 0+8(%rsp),%rbp
+ movq 0+16(%rsp),%rcx
+ movq 0+24(%rsp),%r10
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_subq
+
+ movq 32(%rsp),%rax
+ leaq 32(%rsp),%rbx
+ movq %r12,%r14
+ xorl %ecx,%ecx
+ movq %r12,0+0(%rsp)
+ movq %r13,%r10
+ movq %r13,0+8(%rsp)
+ cmovzq %r8,%r11
+ movq %r8,0+16(%rsp)
+ leaq 0-0(%rsp),%rsi
+ cmovzq %r9,%r12
+ movq %r9,0+24(%rsp)
+ movq %r14,%r9
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+.byte 102,72,15,126,203
+.byte 102,72,15,126,207
+ call __ecp_nistz256_sub_fromq
+
+ addq $160+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
+.globl ecp_nistz256_point_add
+.type ecp_nistz256_point_add, at function
+.align 32
+ecp_nistz256_point_add:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lpoint_addx
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $576+8,%rsp
+
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqu 64(%rsi),%xmm4
+ movdqu 80(%rsi),%xmm5
+ movq %rsi,%rbx
+ movq %rdx,%rsi
+ movdqa %xmm0,384(%rsp)
+ movdqa %xmm1,384+16(%rsp)
+ movdqa %xmm2,416(%rsp)
+ movdqa %xmm3,416+16(%rsp)
+ movdqa %xmm4,448(%rsp)
+ movdqa %xmm5,448+16(%rsp)
+ por %xmm4,%xmm5
+
+ movdqu 0(%rsi),%xmm0
+ pshufd $0xb1,%xmm5,%xmm3
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ por %xmm3,%xmm5
+ movdqu 48(%rsi),%xmm3
+ movq 64+0(%rsi),%rax
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ movdqa %xmm0,480(%rsp)
+ pshufd $0x1e,%xmm5,%xmm4
+ movdqa %xmm1,480+16(%rsp)
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
+ movdqa %xmm2,512(%rsp)
+ movdqa %xmm3,512+16(%rsp)
+ por %xmm4,%xmm5
+ pxor %xmm4,%xmm4
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
+
+ leaq 64-0(%rsi),%rsi
+ movq %rax,544+0(%rsp)
+ movq %r14,544+8(%rsp)
+ movq %r15,544+16(%rsp)
+ movq %r8,544+24(%rsp)
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ pcmpeqd %xmm4,%xmm5
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
+ pshufd $0,%xmm5,%xmm5
+ pshufd $0x1e,%xmm4,%xmm3
+ por %xmm3,%xmm4
+ pxor %xmm3,%xmm3
+ pcmpeqd %xmm3,%xmm4
+ pshufd $0,%xmm4,%xmm4
+ movq 64+0(%rbx),%rax
+ movq 64+8(%rbx),%r14
+ movq 64+16(%rbx),%r15
+ movq 64+24(%rbx),%r8
+.byte 102,72,15,110,203
+
+ leaq 64-0(%rbx),%rsi
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 544(%rsp),%rax
+ leaq 544(%rsp),%rbx
+ movq 0+96(%rsp),%r9
+ movq 8+96(%rsp),%r10
+ leaq 0+96(%rsp),%rsi
+ movq 16+96(%rsp),%r11
+ movq 24+96(%rsp),%r12
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 448(%rsp),%rax
+ leaq 448(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 416(%rsp),%rax
+ leaq 416(%rsp),%rbx
+ movq 0+224(%rsp),%r9
+ movq 8+224(%rsp),%r10
+ leaq 0+224(%rsp),%rsi
+ movq 16+224(%rsp),%r11
+ movq 24+224(%rsp),%r12
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 512(%rsp),%rax
+ leaq 512(%rsp),%rbx
+ movq 0+256(%rsp),%r9
+ movq 8+256(%rsp),%r10
+ leaq 0+256(%rsp),%rsi
+ movq 16+256(%rsp),%r11
+ movq 24+256(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 224(%rsp),%rbx
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ orq %r13,%r12
+ movdqa %xmm4,%xmm2
+ orq %r8,%r12
+ orq %r9,%r12
+ por %xmm5,%xmm2
+.byte 102,73,15,110,220
+
+ movq 384(%rsp),%rax
+ leaq 384(%rsp),%rbx
+ movq 0+96(%rsp),%r9
+ movq 8+96(%rsp),%r10
+ leaq 0+96(%rsp),%rsi
+ movq 16+96(%rsp),%r11
+ movq 24+96(%rsp),%r12
+ leaq 160(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 480(%rsp),%rax
+ leaq 480(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 160(%rsp),%rbx
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ orq %r13,%r12
+ orq %r8,%r12
+ orq %r9,%r12
+
+.byte 0x3e
+ jnz .Ladd_proceedq
+.byte 102,73,15,126,208
+.byte 102,73,15,126,217
+ testq %r8,%r8
+ jnz .Ladd_proceedq
+ testq %r9,%r9
+ jz .Ladd_doubleq
+
+.byte 102,72,15,126,199
+ pxor %xmm0,%xmm0
+ movdqu %xmm0,0(%rdi)
+ movdqu %xmm0,16(%rdi)
+ movdqu %xmm0,32(%rdi)
+ movdqu %xmm0,48(%rdi)
+ movdqu %xmm0,64(%rdi)
+ movdqu %xmm0,80(%rdi)
+ jmp .Ladd_doneq
+
+.align 32
+.Ladd_doubleq:
+.byte 102,72,15,126,206
+.byte 102,72,15,126,199
+ addq $416,%rsp
+ jmp .Lpoint_double_shortcutq
+
+.align 32
+.Ladd_proceedq:
+ movq 0+64(%rsp),%rax
+ movq 8+64(%rsp),%r14
+ leaq 0+64(%rsp),%rsi
+ movq 16+64(%rsp),%r15
+ movq 24+64(%rsp),%r8
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 448(%rsp),%rax
+ leaq 448(%rsp),%rbx
+ movq 0+0(%rsp),%r9
+ movq 8+0(%rsp),%r10
+ leaq 0+0(%rsp),%rsi
+ movq 16+0(%rsp),%r11
+ movq 24+0(%rsp),%r12
+ leaq 352(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 0+0(%rsp),%rax
+ movq 8+0(%rsp),%r14
+ leaq 0+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 544(%rsp),%rax
+ leaq 544(%rsp),%rbx
+ movq 0+352(%rsp),%r9
+ movq 8+352(%rsp),%r10
+ leaq 0+352(%rsp),%rsi
+ movq 16+352(%rsp),%r11
+ movq 24+352(%rsp),%r12
+ leaq 352(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 0(%rsp),%rax
+ leaq 0(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 160(%rsp),%rax
+ leaq 160(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+
+
+
+ xorq %r11,%r11
+ addq %r12,%r12
+ leaq 96(%rsp),%rsi
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ movq 0(%rsi),%rax
+ cmovcq %rbp,%r13
+ movq 8(%rsi),%rbp
+ cmovcq %rcx,%r8
+ movq 16(%rsi),%rcx
+ cmovcq %r10,%r9
+ movq 24(%rsi),%r10
+
+ call __ecp_nistz256_subq
+
+ leaq 128(%rsp),%rbx
+ leaq 288(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ movq 192+0(%rsp),%rax
+ movq 192+8(%rsp),%rbp
+ movq 192+16(%rsp),%rcx
+ movq 192+24(%rsp),%r10
+ leaq 320(%rsp),%rdi
+
+ call __ecp_nistz256_subq
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+ movq 128(%rsp),%rax
+ leaq 128(%rsp),%rbx
+ movq 0+224(%rsp),%r9
+ movq 8+224(%rsp),%r10
+ leaq 0+224(%rsp),%rsi
+ movq 16+224(%rsp),%r11
+ movq 24+224(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 320(%rsp),%rax
+ leaq 320(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq 0+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 320(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 256(%rsp),%rbx
+ leaq 320(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+.byte 102,72,15,126,199
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 352(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 352+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 544(%rsp),%xmm2
+ pand 544+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 448(%rsp),%xmm2
+ pand 448+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,64(%rdi)
+ movdqu %xmm3,80(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 288(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 288+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 480(%rsp),%xmm2
+ pand 480+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 384(%rsp),%xmm2
+ pand 384+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 320(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 320+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 512(%rsp),%xmm2
+ pand 512+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 416(%rsp),%xmm2
+ pand 416+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+
+.Ladd_doneq:
+ addq $576+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
+.globl ecp_nistz256_point_add_affine
+.type ecp_nistz256_point_add_affine, at function
+.align 32
+ecp_nistz256_point_add_affine:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lpoint_add_affinex
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $480+8,%rsp
+
+ movdqu 0(%rsi),%xmm0
+ movq %rdx,%rbx
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqu 64(%rsi),%xmm4
+ movdqu 80(%rsi),%xmm5
+ movq 64+0(%rsi),%rax
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ movdqa %xmm0,320(%rsp)
+ movdqa %xmm1,320+16(%rsp)
+ movdqa %xmm2,352(%rsp)
+ movdqa %xmm3,352+16(%rsp)
+ movdqa %xmm4,384(%rsp)
+ movdqa %xmm5,384+16(%rsp)
+ por %xmm4,%xmm5
+
+ movdqu 0(%rbx),%xmm0
+ pshufd $0xb1,%xmm5,%xmm3
+ movdqu 16(%rbx),%xmm1
+ movdqu 32(%rbx),%xmm2
+ por %xmm3,%xmm5
+ movdqu 48(%rbx),%xmm3
+ movdqa %xmm0,416(%rsp)
+ pshufd $0x1e,%xmm5,%xmm4
+ movdqa %xmm1,416+16(%rsp)
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
+ movdqa %xmm2,448(%rsp)
+ movdqa %xmm3,448+16(%rsp)
+ por %xmm2,%xmm3
+ por %xmm4,%xmm5
+ pxor %xmm4,%xmm4
+ por %xmm1,%xmm3
+
+ leaq 64-0(%rsi),%rsi
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ pcmpeqd %xmm4,%xmm5
+ pshufd $0xb1,%xmm3,%xmm4
+ movq 0(%rbx),%rax
+
+ movq %r12,%r9
+ por %xmm3,%xmm4
+ pshufd $0,%xmm5,%xmm5
+ pshufd $0x1e,%xmm4,%xmm3
+ movq %r13,%r10
+ por %xmm3,%xmm4
+ pxor %xmm3,%xmm3
+ movq %r14,%r11
+ pcmpeqd %xmm3,%xmm4
+ pshufd $0,%xmm4,%xmm4
+
+ leaq 32-0(%rsp),%rsi
+ movq %r15,%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 320(%rsp),%rbx
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ movq 384(%rsp),%rax
+ leaq 384(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 384(%rsp),%rax
+ leaq 384(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq 0+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 288(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 448(%rsp),%rax
+ leaq 448(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq 0+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 352(%rsp),%rbx
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ movq 0+64(%rsp),%rax
+ movq 8+64(%rsp),%r14
+ leaq 0+64(%rsp),%rsi
+ movq 16+64(%rsp),%r15
+ movq 24+64(%rsp),%r8
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 0+96(%rsp),%rax
+ movq 8+96(%rsp),%r14
+ leaq 0+96(%rsp),%rsi
+ movq 16+96(%rsp),%r15
+ movq 24+96(%rsp),%r8
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_sqr_montq
+
+ movq 128(%rsp),%rax
+ leaq 128(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq 0+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 160(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 320(%rsp),%rax
+ leaq 320(%rsp),%rbx
+ movq 0+128(%rsp),%r9
+ movq 8+128(%rsp),%r10
+ leaq 0+128(%rsp),%rsi
+ movq 16+128(%rsp),%r11
+ movq 24+128(%rsp),%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+
+
+
+ xorq %r11,%r11
+ addq %r12,%r12
+ leaq 192(%rsp),%rsi
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ movq 0(%rsi),%rax
+ cmovcq %rbp,%r13
+ movq 8(%rsi),%rbp
+ cmovcq %rcx,%r8
+ movq 16(%rsi),%rcx
+ cmovcq %r10,%r9
+ movq 24(%rsi),%r10
+
+ call __ecp_nistz256_subq
+
+ leaq 160(%rsp),%rbx
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+ movq 0+0(%rsp),%rax
+ movq 0+8(%rsp),%rbp
+ movq 0+16(%rsp),%rcx
+ movq 0+24(%rsp),%r10
+ leaq 64(%rsp),%rdi
+
+ call __ecp_nistz256_subq
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+ movq 352(%rsp),%rax
+ leaq 352(%rsp),%rbx
+ movq 0+160(%rsp),%r9
+ movq 8+160(%rsp),%r10
+ leaq 0+160(%rsp),%rsi
+ movq 16+160(%rsp),%r11
+ movq 24+160(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ movq 96(%rsp),%rax
+ leaq 96(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq 0+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_mul_montq
+
+ leaq 32(%rsp),%rbx
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_sub_fromq
+
+.byte 102,72,15,126,199
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 288(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 288+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand .LONE_mont(%rip),%xmm2
+ pand .LONE_mont+16(%rip),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 384(%rsp),%xmm2
+ pand 384+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,64(%rdi)
+ movdqu %xmm3,80(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 224(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 224+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 416(%rsp),%xmm2
+ pand 416+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 320(%rsp),%xmm2
+ pand 320+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 256(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 256+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 448(%rsp),%xmm2
+ pand 448+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 352(%rsp),%xmm2
+ pand 352+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+
+ addq $480+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
+.type __ecp_nistz256_add_tox, at function
+.align 32
+__ecp_nistz256_add_tox:
+ xorq %r11,%r11
+ adcq 0(%rbx),%r12
+ adcq 8(%rbx),%r13
+ movq %r12,%rax
+ adcq 16(%rbx),%r8
+ adcq 24(%rbx),%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ xorq %r10,%r10
+ sbbq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
+
+.type __ecp_nistz256_sub_fromx, at function
+.align 32
+__ecp_nistz256_sub_fromx:
+ xorq %r11,%r11
+ sbbq 0(%rbx),%r12
+ sbbq 8(%rbx),%r13
+ movq %r12,%rax
+ sbbq 16(%rbx),%r8
+ sbbq 24(%rbx),%r9
+ movq %r13,%rbp
+ sbbq $0,%r11
+
+ xorq %r10,%r10
+ adcq $-1,%r12
+ movq %r8,%rcx
+ adcq %r14,%r13
+ adcq $0,%r8
+ movq %r9,%r10
+ adcq %r15,%r9
+
+ btq $0,%r11
+ cmovncq %rax,%r12
+ cmovncq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovncq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovncq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
+
+.type __ecp_nistz256_subx, at function
+.align 32
+__ecp_nistz256_subx:
+ xorq %r11,%r11
+ sbbq %r12,%rax
+ sbbq %r13,%rbp
+ movq %rax,%r12
+ sbbq %r8,%rcx
+ sbbq %r9,%r10
+ movq %rbp,%r13
+ sbbq $0,%r11
+
+ xorq %r9,%r9
+ adcq $-1,%rax
+ movq %rcx,%r8
+ adcq %r14,%rbp
+ adcq $0,%rcx
+ movq %r10,%r9
+ adcq %r15,%r10
+
+ btq $0,%r11
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
+ cmovcq %rcx,%r8
+ cmovcq %r10,%r9
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
+
+.type __ecp_nistz256_mul_by_2x, at function
+.align 32
+__ecp_nistz256_mul_by_2x:
+ xorq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ xorq %r10,%r10
+ sbbq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
+.type ecp_nistz256_point_doublex, at function
+.align 32
+ecp_nistz256_point_doublex:
+.Lpoint_doublex:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $160+8,%rsp
+
+.Lpoint_double_shortcutx:
+ movdqu 0(%rsi),%xmm0
+ movq %rsi,%rbx
+ movdqu 16(%rsi),%xmm1
+ movq 32+0(%rsi),%r12
+ movq 32+8(%rsi),%r13
+ movq 32+16(%rsi),%r8
+ movq 32+24(%rsi),%r9
+ movq .Lpoly+8(%rip),%r14
+ movq .Lpoly+24(%rip),%r15
+ movdqa %xmm0,96(%rsp)
+ movdqa %xmm1,96+16(%rsp)
+ leaq 32(%rdi),%r10
+ leaq 64(%rdi),%r11
+.byte 102,72,15,110,199
+.byte 102,73,15,110,202
+.byte 102,73,15,110,211
+
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2x
+
+ movq 64+0(%rsi),%rdx
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ leaq 64-128(%rsi),%rsi
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 0+0(%rsp),%rdx
+ movq 8+0(%rsp),%r14
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 32(%rbx),%rdx
+ movq 64+0(%rbx),%r9
+ movq 64+8(%rbx),%r10
+ movq 64+16(%rbx),%r11
+ movq 64+24(%rbx),%r12
+ leaq 64-128(%rbx),%rsi
+ leaq 32(%rbx),%rbx
+.byte 102,72,15,126,215
+ call __ecp_nistz256_mul_montx
+ call __ecp_nistz256_mul_by_2x
+
+ movq 96+0(%rsp),%r12
+ movq 96+8(%rsp),%r13
+ leaq 64(%rsp),%rbx
+ movq 96+16(%rsp),%r8
+ movq 96+24(%rsp),%r9
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_add_tox
+
+ movq 96+0(%rsp),%r12
+ movq 96+8(%rsp),%r13
+ leaq 64(%rsp),%rbx
+ movq 96+16(%rsp),%r8
+ movq 96+24(%rsp),%r9
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+0(%rsp),%rdx
+ movq 8+0(%rsp),%r14
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+.byte 102,72,15,126,207
+ call __ecp_nistz256_sqr_montx
+ xorq %r9,%r9
+ movq %r12,%rax
+ addq $-1,%r12
+ movq %r13,%r10
+ adcq %rsi,%r13
+ movq %r14,%rcx
+ adcq $0,%r14
+ movq %r15,%r8
+ adcq %rbp,%r15
+ adcq $0,%r9
+ xorq %rsi,%rsi
+ testq $1,%rax
+
+ cmovzq %rax,%r12
+ cmovzq %r10,%r13
+ cmovzq %rcx,%r14
+ cmovzq %r8,%r15
+ cmovzq %rsi,%r9
+
+ movq %r13,%rax
+ shrq $1,%r12
+ shlq $63,%rax
+ movq %r14,%r10
+ shrq $1,%r13
+ orq %rax,%r12
+ shlq $63,%r10
+ movq %r15,%rcx
+ shrq $1,%r14
+ orq %r10,%r13
+ shlq $63,%rcx
+ movq %r12,0(%rdi)
+ shrq $1,%r15
+ movq %r13,8(%rdi)
+ shlq $63,%r9
+ orq %rcx,%r14
+ orq %r9,%r15
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+ movq 64(%rsp),%rdx
+ leaq 64(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2x
+
+ leaq 32(%rsp),%rbx
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_add_tox
+
+ movq 96(%rsp),%rdx
+ leaq 96(%rsp),%rbx
+ movq 0+0(%rsp),%r9
+ movq 8+0(%rsp),%r10
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r11
+ movq 24+0(%rsp),%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2x
+
+ movq 0+32(%rsp),%rdx
+ movq 8+32(%rsp),%r14
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r15
+ movq 24+32(%rsp),%r8
+.byte 102,72,15,126,199
+ call __ecp_nistz256_sqr_montx
+
+ leaq 128(%rsp),%rbx
+ movq %r14,%r8
+ movq %r15,%r9
+ movq %rsi,%r14
+ movq %rbp,%r15
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+0(%rsp),%rax
+ movq 0+8(%rsp),%rbp
+ movq 0+16(%rsp),%rcx
+ movq 0+24(%rsp),%r10
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_subx
+
+ movq 32(%rsp),%rdx
+ leaq 32(%rsp),%rbx
+ movq %r12,%r14
+ xorl %ecx,%ecx
+ movq %r12,0+0(%rsp)
+ movq %r13,%r10
+ movq %r13,0+8(%rsp)
+ cmovzq %r8,%r11
+ movq %r8,0+16(%rsp)
+ leaq 0-128(%rsp),%rsi
+ cmovzq %r9,%r12
+ movq %r9,0+24(%rsp)
+ movq %r14,%r9
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+.byte 102,72,15,126,203
+.byte 102,72,15,126,207
+ call __ecp_nistz256_sub_fromx
+
+ addq $160+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
+.type ecp_nistz256_point_addx, at function
+.align 32
+ecp_nistz256_point_addx:
+.Lpoint_addx:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $576+8,%rsp
+
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqu 64(%rsi),%xmm4
+ movdqu 80(%rsi),%xmm5
+ movq %rsi,%rbx
+ movq %rdx,%rsi
+ movdqa %xmm0,384(%rsp)
+ movdqa %xmm1,384+16(%rsp)
+ movdqa %xmm2,416(%rsp)
+ movdqa %xmm3,416+16(%rsp)
+ movdqa %xmm4,448(%rsp)
+ movdqa %xmm5,448+16(%rsp)
+ por %xmm4,%xmm5
+
+ movdqu 0(%rsi),%xmm0
+ pshufd $0xb1,%xmm5,%xmm3
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ por %xmm3,%xmm5
+ movdqu 48(%rsi),%xmm3
+ movq 64+0(%rsi),%rdx
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ movdqa %xmm0,480(%rsp)
+ pshufd $0x1e,%xmm5,%xmm4
+ movdqa %xmm1,480+16(%rsp)
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
+ movdqa %xmm2,512(%rsp)
+ movdqa %xmm3,512+16(%rsp)
+ por %xmm4,%xmm5
+ pxor %xmm4,%xmm4
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
+
+ leaq 64-128(%rsi),%rsi
+ movq %rdx,544+0(%rsp)
+ movq %r14,544+8(%rsp)
+ movq %r15,544+16(%rsp)
+ movq %r8,544+24(%rsp)
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ pcmpeqd %xmm4,%xmm5
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
+ pshufd $0,%xmm5,%xmm5
+ pshufd $0x1e,%xmm4,%xmm3
+ por %xmm3,%xmm4
+ pxor %xmm3,%xmm3
+ pcmpeqd %xmm3,%xmm4
+ pshufd $0,%xmm4,%xmm4
+ movq 64+0(%rbx),%rdx
+ movq 64+8(%rbx),%r14
+ movq 64+16(%rbx),%r15
+ movq 64+24(%rbx),%r8
+.byte 102,72,15,110,203
+
+ leaq 64-128(%rbx),%rsi
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 544(%rsp),%rdx
+ leaq 544(%rsp),%rbx
+ movq 0+96(%rsp),%r9
+ movq 8+96(%rsp),%r10
+ leaq -128+96(%rsp),%rsi
+ movq 16+96(%rsp),%r11
+ movq 24+96(%rsp),%r12
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 448(%rsp),%rdx
+ leaq 448(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 416(%rsp),%rdx
+ leaq 416(%rsp),%rbx
+ movq 0+224(%rsp),%r9
+ movq 8+224(%rsp),%r10
+ leaq -128+224(%rsp),%rsi
+ movq 16+224(%rsp),%r11
+ movq 24+224(%rsp),%r12
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 512(%rsp),%rdx
+ leaq 512(%rsp),%rbx
+ movq 0+256(%rsp),%r9
+ movq 8+256(%rsp),%r10
+ leaq -128+256(%rsp),%rsi
+ movq 16+256(%rsp),%r11
+ movq 24+256(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 224(%rsp),%rbx
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ orq %r13,%r12
+ movdqa %xmm4,%xmm2
+ orq %r8,%r12
+ orq %r9,%r12
+ por %xmm5,%xmm2
+.byte 102,73,15,110,220
+
+ movq 384(%rsp),%rdx
+ leaq 384(%rsp),%rbx
+ movq 0+96(%rsp),%r9
+ movq 8+96(%rsp),%r10
+ leaq -128+96(%rsp),%rsi
+ movq 16+96(%rsp),%r11
+ movq 24+96(%rsp),%r12
+ leaq 160(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 480(%rsp),%rdx
+ leaq 480(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 160(%rsp),%rbx
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ orq %r13,%r12
+ orq %r8,%r12
+ orq %r9,%r12
+
+.byte 0x3e
+ jnz .Ladd_proceedx
+.byte 102,73,15,126,208
+.byte 102,73,15,126,217
+ testq %r8,%r8
+ jnz .Ladd_proceedx
+ testq %r9,%r9
+ jz .Ladd_doublex
+
+.byte 102,72,15,126,199
+ pxor %xmm0,%xmm0
+ movdqu %xmm0,0(%rdi)
+ movdqu %xmm0,16(%rdi)
+ movdqu %xmm0,32(%rdi)
+ movdqu %xmm0,48(%rdi)
+ movdqu %xmm0,64(%rdi)
+ movdqu %xmm0,80(%rdi)
+ jmp .Ladd_donex
+
+.align 32
+.Ladd_doublex:
+.byte 102,72,15,126,206
+.byte 102,72,15,126,199
+ addq $416,%rsp
+ jmp .Lpoint_double_shortcutx
+
+.align 32
+.Ladd_proceedx:
+ movq 0+64(%rsp),%rdx
+ movq 8+64(%rsp),%r14
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r15
+ movq 24+64(%rsp),%r8
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 448(%rsp),%rdx
+ leaq 448(%rsp),%rbx
+ movq 0+0(%rsp),%r9
+ movq 8+0(%rsp),%r10
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r11
+ movq 24+0(%rsp),%r12
+ leaq 352(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 0+0(%rsp),%rdx
+ movq 8+0(%rsp),%r14
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 544(%rsp),%rdx
+ leaq 544(%rsp),%rbx
+ movq 0+352(%rsp),%r9
+ movq 8+352(%rsp),%r10
+ leaq -128+352(%rsp),%rsi
+ movq 16+352(%rsp),%r11
+ movq 24+352(%rsp),%r12
+ leaq 352(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 0(%rsp),%rdx
+ leaq 0(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 160(%rsp),%rdx
+ leaq 160(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+
+
+
+ xorq %r11,%r11
+ addq %r12,%r12
+ leaq 96(%rsp),%rsi
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ movq 0(%rsi),%rax
+ cmovcq %rbp,%r13
+ movq 8(%rsi),%rbp
+ cmovcq %rcx,%r8
+ movq 16(%rsi),%rcx
+ cmovcq %r10,%r9
+ movq 24(%rsi),%r10
+
+ call __ecp_nistz256_subx
+
+ leaq 128(%rsp),%rbx
+ leaq 288(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 192+0(%rsp),%rax
+ movq 192+8(%rsp),%rbp
+ movq 192+16(%rsp),%rcx
+ movq 192+24(%rsp),%r10
+ leaq 320(%rsp),%rdi
+
+ call __ecp_nistz256_subx
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+ movq 128(%rsp),%rdx
+ leaq 128(%rsp),%rbx
+ movq 0+224(%rsp),%r9
+ movq 8+224(%rsp),%r10
+ leaq -128+224(%rsp),%rsi
+ movq 16+224(%rsp),%r11
+ movq 24+224(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 320(%rsp),%rdx
+ leaq 320(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 320(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 256(%rsp),%rbx
+ leaq 320(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+.byte 102,72,15,126,199
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 352(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 352+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 544(%rsp),%xmm2
+ pand 544+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 448(%rsp),%xmm2
+ pand 448+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,64(%rdi)
+ movdqu %xmm3,80(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 288(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 288+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 480(%rsp),%xmm2
+ pand 480+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 384(%rsp),%xmm2
+ pand 384+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 320(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 320+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 512(%rsp),%xmm2
+ pand 512+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 416(%rsp),%xmm2
+ pand 416+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+
+.Ladd_donex:
+ addq $576+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
+.type ecp_nistz256_point_add_affinex, at function
+.align 32
+ecp_nistz256_point_add_affinex:
+.Lpoint_add_affinex:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $480+8,%rsp
+
+ movdqu 0(%rsi),%xmm0
+ movq %rdx,%rbx
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqu 64(%rsi),%xmm4
+ movdqu 80(%rsi),%xmm5
+ movq 64+0(%rsi),%rdx
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ movdqa %xmm0,320(%rsp)
+ movdqa %xmm1,320+16(%rsp)
+ movdqa %xmm2,352(%rsp)
+ movdqa %xmm3,352+16(%rsp)
+ movdqa %xmm4,384(%rsp)
+ movdqa %xmm5,384+16(%rsp)
+ por %xmm4,%xmm5
+
+ movdqu 0(%rbx),%xmm0
+ pshufd $0xb1,%xmm5,%xmm3
+ movdqu 16(%rbx),%xmm1
+ movdqu 32(%rbx),%xmm2
+ por %xmm3,%xmm5
+ movdqu 48(%rbx),%xmm3
+ movdqa %xmm0,416(%rsp)
+ pshufd $0x1e,%xmm5,%xmm4
+ movdqa %xmm1,416+16(%rsp)
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
+ movdqa %xmm2,448(%rsp)
+ movdqa %xmm3,448+16(%rsp)
+ por %xmm2,%xmm3
+ por %xmm4,%xmm5
+ pxor %xmm4,%xmm4
+ por %xmm1,%xmm3
+
+ leaq 64-128(%rsi),%rsi
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ pcmpeqd %xmm4,%xmm5
+ pshufd $0xb1,%xmm3,%xmm4
+ movq 0(%rbx),%rdx
+
+ movq %r12,%r9
+ por %xmm3,%xmm4
+ pshufd $0,%xmm5,%xmm5
+ pshufd $0x1e,%xmm4,%xmm3
+ movq %r13,%r10
+ por %xmm3,%xmm4
+ pxor %xmm3,%xmm3
+ movq %r14,%r11
+ pcmpeqd %xmm3,%xmm4
+ pshufd $0,%xmm4,%xmm4
+
+ leaq 32-128(%rsp),%rsi
+ movq %r15,%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 320(%rsp),%rbx
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 384(%rsp),%rdx
+ leaq 384(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 384(%rsp),%rdx
+ leaq 384(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 288(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 448(%rsp),%rdx
+ leaq 448(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 352(%rsp),%rbx
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+64(%rsp),%rdx
+ movq 8+64(%rsp),%r14
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r15
+ movq 24+64(%rsp),%r8
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 0+96(%rsp),%rdx
+ movq 8+96(%rsp),%r14
+ leaq -128+96(%rsp),%rsi
+ movq 16+96(%rsp),%r15
+ movq 24+96(%rsp),%r8
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 128(%rsp),%rdx
+ leaq 128(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 160(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 320(%rsp),%rdx
+ leaq 320(%rsp),%rbx
+ movq 0+128(%rsp),%r9
+ movq 8+128(%rsp),%r10
+ leaq -128+128(%rsp),%rsi
+ movq 16+128(%rsp),%r11
+ movq 24+128(%rsp),%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+
+
+
+ xorq %r11,%r11
+ addq %r12,%r12
+ leaq 192(%rsp),%rsi
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ sbbq $0,%r11
+
+ cmovcq %rax,%r12
+ movq 0(%rsi),%rax
+ cmovcq %rbp,%r13
+ movq 8(%rsi),%rbp
+ cmovcq %rcx,%r8
+ movq 16(%rsi),%rcx
+ cmovcq %r10,%r9
+ movq 24(%rsi),%r10
+
+ call __ecp_nistz256_subx
+
+ leaq 160(%rsp),%rbx
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+0(%rsp),%rax
+ movq 0+8(%rsp),%rbp
+ movq 0+16(%rsp),%rcx
+ movq 0+24(%rsp),%r10
+ leaq 64(%rsp),%rdi
+
+ call __ecp_nistz256_subx
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+ movq 352(%rsp),%rdx
+ leaq 352(%rsp),%rbx
+ movq 0+160(%rsp),%r9
+ movq 8+160(%rsp),%r10
+ leaq -128+160(%rsp),%rsi
+ movq 16+160(%rsp),%r11
+ movq 24+160(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 96(%rsp),%rdx
+ leaq 96(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 32(%rsp),%rbx
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+.byte 102,72,15,126,199
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 288(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 288+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand .LONE_mont(%rip),%xmm2
+ pand .LONE_mont+16(%rip),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 384(%rsp),%xmm2
+ pand 384+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,64(%rdi)
+ movdqu %xmm3,80(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 224(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 224+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 416(%rsp),%xmm2
+ pand 416+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 320(%rsp),%xmm2
+ pand 320+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 256(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 256+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 448(%rsp),%xmm2
+ pand 448+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 352(%rsp),%xmm2
+ pand 352+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+
+ addq $480+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
Property changes on: trunk/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/rsaz-avx2.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/rsaz-avx2.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/rsaz-avx2.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,1700 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/rsaz-avx2.S 326663 2017-12-07 18:04:48Z jkim $ */
+/* Do not modify. This file is auto-generated from rsaz-avx2.pl. */
+.text
+
+.globl rsaz_1024_sqr_avx2
+.type rsaz_1024_sqr_avx2, at function
+.align 64
+rsaz_1024_sqr_avx2:
+ leaq (%rsp),%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ vzeroupper
+ movq %rax,%rbp
+ movq %rdx,%r13
+ subq $832,%rsp
+ movq %r13,%r15
+ subq $-128,%rdi
+ subq $-128,%rsi
+ subq $-128,%r13
+
+ andq $4095,%r15
+ addq $320,%r15
+ shrq $12,%r15
+ vpxor %ymm9,%ymm9,%ymm9
+ jz .Lsqr_1024_no_n_copy
+
+
+
+
+
+ subq $320,%rsp
+ vmovdqu 0-128(%r13),%ymm0
+ andq $-2048,%rsp
+ vmovdqu 32-128(%r13),%ymm1
+ vmovdqu 64-128(%r13),%ymm2
+ vmovdqu 96-128(%r13),%ymm3
+ vmovdqu 128-128(%r13),%ymm4
+ vmovdqu 160-128(%r13),%ymm5
+ vmovdqu 192-128(%r13),%ymm6
+ vmovdqu 224-128(%r13),%ymm7
+ vmovdqu 256-128(%r13),%ymm8
+ leaq 832+128(%rsp),%r13
+ vmovdqu %ymm0,0-128(%r13)
+ vmovdqu %ymm1,32-128(%r13)
+ vmovdqu %ymm2,64-128(%r13)
+ vmovdqu %ymm3,96-128(%r13)
+ vmovdqu %ymm4,128-128(%r13)
+ vmovdqu %ymm5,160-128(%r13)
+ vmovdqu %ymm6,192-128(%r13)
+ vmovdqu %ymm7,224-128(%r13)
+ vmovdqu %ymm8,256-128(%r13)
+ vmovdqu %ymm9,288-128(%r13)
+
+.Lsqr_1024_no_n_copy:
+ andq $-1024,%rsp
+
+ vmovdqu 32-128(%rsi),%ymm1
+ vmovdqu 64-128(%rsi),%ymm2
+ vmovdqu 96-128(%rsi),%ymm3
+ vmovdqu 128-128(%rsi),%ymm4
+ vmovdqu 160-128(%rsi),%ymm5
+ vmovdqu 192-128(%rsi),%ymm6
+ vmovdqu 224-128(%rsi),%ymm7
+ vmovdqu 256-128(%rsi),%ymm8
+
+ leaq 192(%rsp),%rbx
+ vmovdqu .Land_mask(%rip),%ymm15
+ jmp .LOOP_GRANDE_SQR_1024
+
+.align 32
+.LOOP_GRANDE_SQR_1024:
+ leaq 576+128(%rsp),%r9
+ leaq 448(%rsp),%r12
+
+
+
+
+ vpaddq %ymm1,%ymm1,%ymm1
+ vpbroadcastq 0-128(%rsi),%ymm10
+ vpaddq %ymm2,%ymm2,%ymm2
+ vmovdqa %ymm1,0-128(%r9)
+ vpaddq %ymm3,%ymm3,%ymm3
+ vmovdqa %ymm2,32-128(%r9)
+ vpaddq %ymm4,%ymm4,%ymm4
+ vmovdqa %ymm3,64-128(%r9)
+ vpaddq %ymm5,%ymm5,%ymm5
+ vmovdqa %ymm4,96-128(%r9)
+ vpaddq %ymm6,%ymm6,%ymm6
+ vmovdqa %ymm5,128-128(%r9)
+ vpaddq %ymm7,%ymm7,%ymm7
+ vmovdqa %ymm6,160-128(%r9)
+ vpaddq %ymm8,%ymm8,%ymm8
+ vmovdqa %ymm7,192-128(%r9)
+ vpxor %ymm9,%ymm9,%ymm9
+ vmovdqa %ymm8,224-128(%r9)
+
+ vpmuludq 0-128(%rsi),%ymm10,%ymm0
+ vpbroadcastq 32-128(%rsi),%ymm11
+ vmovdqu %ymm9,288-192(%rbx)
+ vpmuludq %ymm10,%ymm1,%ymm1
+ vmovdqu %ymm9,320-448(%r12)
+ vpmuludq %ymm10,%ymm2,%ymm2
+ vmovdqu %ymm9,352-448(%r12)
+ vpmuludq %ymm10,%ymm3,%ymm3
+ vmovdqu %ymm9,384-448(%r12)
+ vpmuludq %ymm10,%ymm4,%ymm4
+ vmovdqu %ymm9,416-448(%r12)
+ vpmuludq %ymm10,%ymm5,%ymm5
+ vmovdqu %ymm9,448-448(%r12)
+ vpmuludq %ymm10,%ymm6,%ymm6
+ vmovdqu %ymm9,480-448(%r12)
+ vpmuludq %ymm10,%ymm7,%ymm7
+ vmovdqu %ymm9,512-448(%r12)
+ vpmuludq %ymm10,%ymm8,%ymm8
+ vpbroadcastq 64-128(%rsi),%ymm10
+ vmovdqu %ymm9,544-448(%r12)
+
+ movq %rsi,%r15
+ movl $4,%r14d
+ jmp .Lsqr_entry_1024
+.align 32
+.LOOP_SQR_1024:
+ vpbroadcastq 32-128(%r15),%ymm11
+ vpmuludq 0-128(%rsi),%ymm10,%ymm0
+ vpaddq 0-192(%rbx),%ymm0,%ymm0
+ vpmuludq 0-128(%r9),%ymm10,%ymm1
+ vpaddq 32-192(%rbx),%ymm1,%ymm1
+ vpmuludq 32-128(%r9),%ymm10,%ymm2
+ vpaddq 64-192(%rbx),%ymm2,%ymm2
+ vpmuludq 64-128(%r9),%ymm10,%ymm3
+ vpaddq 96-192(%rbx),%ymm3,%ymm3
+ vpmuludq 96-128(%r9),%ymm10,%ymm4
+ vpaddq 128-192(%rbx),%ymm4,%ymm4
+ vpmuludq 128-128(%r9),%ymm10,%ymm5
+ vpaddq 160-192(%rbx),%ymm5,%ymm5
+ vpmuludq 160-128(%r9),%ymm10,%ymm6
+ vpaddq 192-192(%rbx),%ymm6,%ymm6
+ vpmuludq 192-128(%r9),%ymm10,%ymm7
+ vpaddq 224-192(%rbx),%ymm7,%ymm7
+ vpmuludq 224-128(%r9),%ymm10,%ymm8
+ vpbroadcastq 64-128(%r15),%ymm10
+ vpaddq 256-192(%rbx),%ymm8,%ymm8
+.Lsqr_entry_1024:
+ vmovdqu %ymm0,0-192(%rbx)
+ vmovdqu %ymm1,32-192(%rbx)
+
+ vpmuludq 32-128(%rsi),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq 32-128(%r9),%ymm11,%ymm14
+ vpaddq %ymm14,%ymm3,%ymm3
+ vpmuludq 64-128(%r9),%ymm11,%ymm13
+ vpaddq %ymm13,%ymm4,%ymm4
+ vpmuludq 96-128(%r9),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq 128-128(%r9),%ymm11,%ymm14
+ vpaddq %ymm14,%ymm6,%ymm6
+ vpmuludq 160-128(%r9),%ymm11,%ymm13
+ vpaddq %ymm13,%ymm7,%ymm7
+ vpmuludq 192-128(%r9),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq 224-128(%r9),%ymm11,%ymm0
+ vpbroadcastq 96-128(%r15),%ymm11
+ vpaddq 288-192(%rbx),%ymm0,%ymm0
+
+ vmovdqu %ymm2,64-192(%rbx)
+ vmovdqu %ymm3,96-192(%rbx)
+
+ vpmuludq 64-128(%rsi),%ymm10,%ymm13
+ vpaddq %ymm13,%ymm4,%ymm4
+ vpmuludq 64-128(%r9),%ymm10,%ymm12
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq 96-128(%r9),%ymm10,%ymm14
+ vpaddq %ymm14,%ymm6,%ymm6
+ vpmuludq 128-128(%r9),%ymm10,%ymm13
+ vpaddq %ymm13,%ymm7,%ymm7
+ vpmuludq 160-128(%r9),%ymm10,%ymm12
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq 192-128(%r9),%ymm10,%ymm14
+ vpaddq %ymm14,%ymm0,%ymm0
+ vpmuludq 224-128(%r9),%ymm10,%ymm1
+ vpbroadcastq 128-128(%r15),%ymm10
+ vpaddq 320-448(%r12),%ymm1,%ymm1
+
+ vmovdqu %ymm4,128-192(%rbx)
+ vmovdqu %ymm5,160-192(%rbx)
+
+ vpmuludq 96-128(%rsi),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm6,%ymm6
+ vpmuludq 96-128(%r9),%ymm11,%ymm14
+ vpaddq %ymm14,%ymm7,%ymm7
+ vpmuludq 128-128(%r9),%ymm11,%ymm13
+ vpaddq %ymm13,%ymm8,%ymm8
+ vpmuludq 160-128(%r9),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm0,%ymm0
+ vpmuludq 192-128(%r9),%ymm11,%ymm14
+ vpaddq %ymm14,%ymm1,%ymm1
+ vpmuludq 224-128(%r9),%ymm11,%ymm2
+ vpbroadcastq 160-128(%r15),%ymm11
+ vpaddq 352-448(%r12),%ymm2,%ymm2
+
+ vmovdqu %ymm6,192-192(%rbx)
+ vmovdqu %ymm7,224-192(%rbx)
+
+ vpmuludq 128-128(%rsi),%ymm10,%ymm12
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq 128-128(%r9),%ymm10,%ymm14
+ vpaddq %ymm14,%ymm0,%ymm0
+ vpmuludq 160-128(%r9),%ymm10,%ymm13
+ vpaddq %ymm13,%ymm1,%ymm1
+ vpmuludq 192-128(%r9),%ymm10,%ymm12
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq 224-128(%r9),%ymm10,%ymm3
+ vpbroadcastq 192-128(%r15),%ymm10
+ vpaddq 384-448(%r12),%ymm3,%ymm3
+
+ vmovdqu %ymm8,256-192(%rbx)
+ vmovdqu %ymm0,288-192(%rbx)
+ leaq 8(%rbx),%rbx
+
+ vpmuludq 160-128(%rsi),%ymm11,%ymm13
+ vpaddq %ymm13,%ymm1,%ymm1
+ vpmuludq 160-128(%r9),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq 192-128(%r9),%ymm11,%ymm14
+ vpaddq %ymm14,%ymm3,%ymm3
+ vpmuludq 224-128(%r9),%ymm11,%ymm4
+ vpbroadcastq 224-128(%r15),%ymm11
+ vpaddq 416-448(%r12),%ymm4,%ymm4
+
+ vmovdqu %ymm1,320-448(%r12)
+ vmovdqu %ymm2,352-448(%r12)
+
+ vpmuludq 192-128(%rsi),%ymm10,%ymm12
+ vpaddq %ymm12,%ymm3,%ymm3
+ vpmuludq 192-128(%r9),%ymm10,%ymm14
+ vpbroadcastq 256-128(%r15),%ymm0
+ vpaddq %ymm14,%ymm4,%ymm4
+ vpmuludq 224-128(%r9),%ymm10,%ymm5
+ vpbroadcastq 0+8-128(%r15),%ymm10
+ vpaddq 448-448(%r12),%ymm5,%ymm5
+
+ vmovdqu %ymm3,384-448(%r12)
+ vmovdqu %ymm4,416-448(%r12)
+ leaq 8(%r15),%r15
+
+ vpmuludq 224-128(%rsi),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq 224-128(%r9),%ymm11,%ymm6
+ vpaddq 480-448(%r12),%ymm6,%ymm6
+
+ vpmuludq 256-128(%rsi),%ymm0,%ymm7
+ vmovdqu %ymm5,448-448(%r12)
+ vpaddq 512-448(%r12),%ymm7,%ymm7
+ vmovdqu %ymm6,480-448(%r12)
+ vmovdqu %ymm7,512-448(%r12)
+ leaq 8(%r12),%r12
+
+ decl %r14d
+ jnz .LOOP_SQR_1024
+
+ vmovdqu 256(%rsp),%ymm8
+ vmovdqu 288(%rsp),%ymm1
+ vmovdqu 320(%rsp),%ymm2
+ leaq 192(%rsp),%rbx
+
+ vpsrlq $29,%ymm8,%ymm14
+ vpand %ymm15,%ymm8,%ymm8
+ vpsrlq $29,%ymm1,%ymm11
+ vpand %ymm15,%ymm1,%ymm1
+
+ vpermq $0x93,%ymm14,%ymm14
+ vpxor %ymm9,%ymm9,%ymm9
+ vpermq $0x93,%ymm11,%ymm11
+
+ vpblendd $3,%ymm9,%ymm14,%ymm10
+ vpblendd $3,%ymm14,%ymm11,%ymm14
+ vpaddq %ymm10,%ymm8,%ymm8
+ vpblendd $3,%ymm11,%ymm9,%ymm11
+ vpaddq %ymm14,%ymm1,%ymm1
+ vpaddq %ymm11,%ymm2,%ymm2
+ vmovdqu %ymm1,288-192(%rbx)
+ vmovdqu %ymm2,320-192(%rbx)
+
+ movq (%rsp),%rax
+ movq 8(%rsp),%r10
+ movq 16(%rsp),%r11
+ movq 24(%rsp),%r12
+ vmovdqu 32(%rsp),%ymm1
+ vmovdqu 64-192(%rbx),%ymm2
+ vmovdqu 96-192(%rbx),%ymm3
+ vmovdqu 128-192(%rbx),%ymm4
+ vmovdqu 160-192(%rbx),%ymm5
+ vmovdqu 192-192(%rbx),%ymm6
+ vmovdqu 224-192(%rbx),%ymm7
+
+ movq %rax,%r9
+ imull %ecx,%eax
+ andl $0x1fffffff,%eax
+ vmovd %eax,%xmm12
+
+ movq %rax,%rdx
+ imulq -128(%r13),%rax
+ vpbroadcastq %xmm12,%ymm12
+ addq %rax,%r9
+ movq %rdx,%rax
+ imulq 8-128(%r13),%rax
+ shrq $29,%r9
+ addq %rax,%r10
+ movq %rdx,%rax
+ imulq 16-128(%r13),%rax
+ addq %r9,%r10
+ addq %rax,%r11
+ imulq 24-128(%r13),%rdx
+ addq %rdx,%r12
+
+ movq %r10,%rax
+ imull %ecx,%eax
+ andl $0x1fffffff,%eax
+
+ movl $9,%r14d
+ jmp .LOOP_REDUCE_1024
+
+.align 32
+.LOOP_REDUCE_1024:
+ vmovd %eax,%xmm13
+ vpbroadcastq %xmm13,%ymm13
+
+ vpmuludq 32-128(%r13),%ymm12,%ymm10
+ movq %rax,%rdx
+ imulq -128(%r13),%rax
+ vpaddq %ymm10,%ymm1,%ymm1
+ addq %rax,%r10
+ vpmuludq 64-128(%r13),%ymm12,%ymm14
+ movq %rdx,%rax
+ imulq 8-128(%r13),%rax
+ vpaddq %ymm14,%ymm2,%ymm2
+ vpmuludq 96-128(%r13),%ymm12,%ymm11
+.byte 0x67
+ addq %rax,%r11
+.byte 0x67
+ movq %rdx,%rax
+ imulq 16-128(%r13),%rax
+ shrq $29,%r10
+ vpaddq %ymm11,%ymm3,%ymm3
+ vpmuludq 128-128(%r13),%ymm12,%ymm10
+ addq %rax,%r12
+ addq %r10,%r11
+ vpaddq %ymm10,%ymm4,%ymm4
+ vpmuludq 160-128(%r13),%ymm12,%ymm14
+ movq %r11,%rax
+ imull %ecx,%eax
+ vpaddq %ymm14,%ymm5,%ymm5
+ vpmuludq 192-128(%r13),%ymm12,%ymm11
+ andl $0x1fffffff,%eax
+ vpaddq %ymm11,%ymm6,%ymm6
+ vpmuludq 224-128(%r13),%ymm12,%ymm10
+ vpaddq %ymm10,%ymm7,%ymm7
+ vpmuludq 256-128(%r13),%ymm12,%ymm14
+ vmovd %eax,%xmm12
+
+ vpaddq %ymm14,%ymm8,%ymm8
+
+ vpbroadcastq %xmm12,%ymm12
+
+ vpmuludq 32-8-128(%r13),%ymm13,%ymm11
+ vmovdqu 96-8-128(%r13),%ymm14
+ movq %rax,%rdx
+ imulq -128(%r13),%rax
+ vpaddq %ymm11,%ymm1,%ymm1
+ vpmuludq 64-8-128(%r13),%ymm13,%ymm10
+ vmovdqu 128-8-128(%r13),%ymm11
+ addq %rax,%r11
+ movq %rdx,%rax
+ imulq 8-128(%r13),%rax
+ vpaddq %ymm10,%ymm2,%ymm2
+ addq %r12,%rax
+ shrq $29,%r11
+ vpmuludq %ymm13,%ymm14,%ymm14
+ vmovdqu 160-8-128(%r13),%ymm10
+ addq %r11,%rax
+ vpaddq %ymm14,%ymm3,%ymm3
+ vpmuludq %ymm13,%ymm11,%ymm11
+ vmovdqu 192-8-128(%r13),%ymm14
+.byte 0x67
+ movq %rax,%r12
+ imull %ecx,%eax
+ vpaddq %ymm11,%ymm4,%ymm4
+ vpmuludq %ymm13,%ymm10,%ymm10
+.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00
+ andl $0x1fffffff,%eax
+ vpaddq %ymm10,%ymm5,%ymm5
+ vpmuludq %ymm13,%ymm14,%ymm14
+ vmovdqu 256-8-128(%r13),%ymm10
+ vpaddq %ymm14,%ymm6,%ymm6
+ vpmuludq %ymm13,%ymm11,%ymm11
+ vmovdqu 288-8-128(%r13),%ymm9
+ vmovd %eax,%xmm0
+ imulq -128(%r13),%rax
+ vpaddq %ymm11,%ymm7,%ymm7
+ vpmuludq %ymm13,%ymm10,%ymm10
+ vmovdqu 32-16-128(%r13),%ymm14
+ vpbroadcastq %xmm0,%ymm0
+ vpaddq %ymm10,%ymm8,%ymm8
+ vpmuludq %ymm13,%ymm9,%ymm9
+ vmovdqu 64-16-128(%r13),%ymm11
+ addq %rax,%r12
+
+ vmovdqu 32-24-128(%r13),%ymm13
+ vpmuludq %ymm12,%ymm14,%ymm14
+ vmovdqu 96-16-128(%r13),%ymm10
+ vpaddq %ymm14,%ymm1,%ymm1
+ vpmuludq %ymm0,%ymm13,%ymm13
+ vpmuludq %ymm12,%ymm11,%ymm11
+.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff
+ vpaddq %ymm1,%ymm13,%ymm13
+ vpaddq %ymm11,%ymm2,%ymm2
+ vpmuludq %ymm12,%ymm10,%ymm10
+ vmovdqu 160-16-128(%r13),%ymm11
+.byte 0x67
+ vmovq %xmm13,%rax
+ vmovdqu %ymm13,(%rsp)
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpmuludq %ymm12,%ymm14,%ymm14
+ vmovdqu 192-16-128(%r13),%ymm10
+ vpaddq %ymm14,%ymm4,%ymm4
+ vpmuludq %ymm12,%ymm11,%ymm11
+ vmovdqu 224-16-128(%r13),%ymm14
+ vpaddq %ymm11,%ymm5,%ymm5
+ vpmuludq %ymm12,%ymm10,%ymm10
+ vmovdqu 256-16-128(%r13),%ymm11
+ vpaddq %ymm10,%ymm6,%ymm6
+ vpmuludq %ymm12,%ymm14,%ymm14
+ shrq $29,%r12
+ vmovdqu 288-16-128(%r13),%ymm10
+ addq %r12,%rax
+ vpaddq %ymm14,%ymm7,%ymm7
+ vpmuludq %ymm12,%ymm11,%ymm11
+
+ movq %rax,%r9
+ imull %ecx,%eax
+ vpaddq %ymm11,%ymm8,%ymm8
+ vpmuludq %ymm12,%ymm10,%ymm10
+ andl $0x1fffffff,%eax
+ vmovd %eax,%xmm12
+ vmovdqu 96-24-128(%r13),%ymm11
+.byte 0x67
+ vpaddq %ymm10,%ymm9,%ymm9
+ vpbroadcastq %xmm12,%ymm12
+
+ vpmuludq 64-24-128(%r13),%ymm0,%ymm14
+ vmovdqu 128-24-128(%r13),%ymm10
+ movq %rax,%rdx
+ imulq -128(%r13),%rax
+ movq 8(%rsp),%r10
+ vpaddq %ymm14,%ymm2,%ymm1
+ vpmuludq %ymm0,%ymm11,%ymm11
+ vmovdqu 160-24-128(%r13),%ymm14
+ addq %rax,%r9
+ movq %rdx,%rax
+ imulq 8-128(%r13),%rax
+.byte 0x67
+ shrq $29,%r9
+ movq 16(%rsp),%r11
+ vpaddq %ymm11,%ymm3,%ymm2
+ vpmuludq %ymm0,%ymm10,%ymm10
+ vmovdqu 192-24-128(%r13),%ymm11
+ addq %rax,%r10
+ movq %rdx,%rax
+ imulq 16-128(%r13),%rax
+ vpaddq %ymm10,%ymm4,%ymm3
+ vpmuludq %ymm0,%ymm14,%ymm14
+ vmovdqu 224-24-128(%r13),%ymm10
+ imulq 24-128(%r13),%rdx
+ addq %rax,%r11
+ leaq (%r9,%r10,1),%rax
+ vpaddq %ymm14,%ymm5,%ymm4
+ vpmuludq %ymm0,%ymm11,%ymm11
+ vmovdqu 256-24-128(%r13),%ymm14
+ movq %rax,%r10
+ imull %ecx,%eax
+ vpmuludq %ymm0,%ymm10,%ymm10
+ vpaddq %ymm11,%ymm6,%ymm5
+ vmovdqu 288-24-128(%r13),%ymm11
+ andl $0x1fffffff,%eax
+ vpaddq %ymm10,%ymm7,%ymm6
+ vpmuludq %ymm0,%ymm14,%ymm14
+ addq 24(%rsp),%rdx
+ vpaddq %ymm14,%ymm8,%ymm7
+ vpmuludq %ymm0,%ymm11,%ymm11
+ vpaddq %ymm11,%ymm9,%ymm8
+ vmovq %r12,%xmm9
+ movq %rdx,%r12
+
+ decl %r14d
+ jnz .LOOP_REDUCE_1024
+ leaq 448(%rsp),%r12
+ vpaddq %ymm9,%ymm13,%ymm0
+ vpxor %ymm9,%ymm9,%ymm9
+
+ vpaddq 288-192(%rbx),%ymm0,%ymm0
+ vpaddq 320-448(%r12),%ymm1,%ymm1
+ vpaddq 352-448(%r12),%ymm2,%ymm2
+ vpaddq 384-448(%r12),%ymm3,%ymm3
+ vpaddq 416-448(%r12),%ymm4,%ymm4
+ vpaddq 448-448(%r12),%ymm5,%ymm5
+ vpaddq 480-448(%r12),%ymm6,%ymm6
+ vpaddq 512-448(%r12),%ymm7,%ymm7
+ vpaddq 544-448(%r12),%ymm8,%ymm8
+
+ vpsrlq $29,%ymm0,%ymm14
+ vpand %ymm15,%ymm0,%ymm0
+ vpsrlq $29,%ymm1,%ymm11
+ vpand %ymm15,%ymm1,%ymm1
+ vpsrlq $29,%ymm2,%ymm12
+ vpermq $0x93,%ymm14,%ymm14
+ vpand %ymm15,%ymm2,%ymm2
+ vpsrlq $29,%ymm3,%ymm13
+ vpermq $0x93,%ymm11,%ymm11
+ vpand %ymm15,%ymm3,%ymm3
+ vpermq $0x93,%ymm12,%ymm12
+
+ vpblendd $3,%ymm9,%ymm14,%ymm10
+ vpermq $0x93,%ymm13,%ymm13
+ vpblendd $3,%ymm14,%ymm11,%ymm14
+ vpaddq %ymm10,%ymm0,%ymm0
+ vpblendd $3,%ymm11,%ymm12,%ymm11
+ vpaddq %ymm14,%ymm1,%ymm1
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm11,%ymm2,%ymm2
+ vpblendd $3,%ymm13,%ymm9,%ymm13
+ vpaddq %ymm12,%ymm3,%ymm3
+ vpaddq %ymm13,%ymm4,%ymm4
+
+ vpsrlq $29,%ymm0,%ymm14
+ vpand %ymm15,%ymm0,%ymm0
+ vpsrlq $29,%ymm1,%ymm11
+ vpand %ymm15,%ymm1,%ymm1
+ vpsrlq $29,%ymm2,%ymm12
+ vpermq $0x93,%ymm14,%ymm14
+ vpand %ymm15,%ymm2,%ymm2
+ vpsrlq $29,%ymm3,%ymm13
+ vpermq $0x93,%ymm11,%ymm11
+ vpand %ymm15,%ymm3,%ymm3
+ vpermq $0x93,%ymm12,%ymm12
+
+ vpblendd $3,%ymm9,%ymm14,%ymm10
+ vpermq $0x93,%ymm13,%ymm13
+ vpblendd $3,%ymm14,%ymm11,%ymm14
+ vpaddq %ymm10,%ymm0,%ymm0
+ vpblendd $3,%ymm11,%ymm12,%ymm11
+ vpaddq %ymm14,%ymm1,%ymm1
+ vmovdqu %ymm0,0-128(%rdi)
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm11,%ymm2,%ymm2
+ vmovdqu %ymm1,32-128(%rdi)
+ vpblendd $3,%ymm13,%ymm9,%ymm13
+ vpaddq %ymm12,%ymm3,%ymm3
+ vmovdqu %ymm2,64-128(%rdi)
+ vpaddq %ymm13,%ymm4,%ymm4
+ vmovdqu %ymm3,96-128(%rdi)
+ vpsrlq $29,%ymm4,%ymm14
+ vpand %ymm15,%ymm4,%ymm4
+ vpsrlq $29,%ymm5,%ymm11
+ vpand %ymm15,%ymm5,%ymm5
+ vpsrlq $29,%ymm6,%ymm12
+ vpermq $0x93,%ymm14,%ymm14
+ vpand %ymm15,%ymm6,%ymm6
+ vpsrlq $29,%ymm7,%ymm13
+ vpermq $0x93,%ymm11,%ymm11
+ vpand %ymm15,%ymm7,%ymm7
+ vpsrlq $29,%ymm8,%ymm0
+ vpermq $0x93,%ymm12,%ymm12
+ vpand %ymm15,%ymm8,%ymm8
+ vpermq $0x93,%ymm13,%ymm13
+
+ vpblendd $3,%ymm9,%ymm14,%ymm10
+ vpermq $0x93,%ymm0,%ymm0
+ vpblendd $3,%ymm14,%ymm11,%ymm14
+ vpaddq %ymm10,%ymm4,%ymm4
+ vpblendd $3,%ymm11,%ymm12,%ymm11
+ vpaddq %ymm14,%ymm5,%ymm5
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm11,%ymm6,%ymm6
+ vpblendd $3,%ymm13,%ymm0,%ymm13
+ vpaddq %ymm12,%ymm7,%ymm7
+ vpaddq %ymm13,%ymm8,%ymm8
+
+ vpsrlq $29,%ymm4,%ymm14
+ vpand %ymm15,%ymm4,%ymm4
+ vpsrlq $29,%ymm5,%ymm11
+ vpand %ymm15,%ymm5,%ymm5
+ vpsrlq $29,%ymm6,%ymm12
+ vpermq $0x93,%ymm14,%ymm14
+ vpand %ymm15,%ymm6,%ymm6
+ vpsrlq $29,%ymm7,%ymm13
+ vpermq $0x93,%ymm11,%ymm11
+ vpand %ymm15,%ymm7,%ymm7
+ vpsrlq $29,%ymm8,%ymm0
+ vpermq $0x93,%ymm12,%ymm12
+ vpand %ymm15,%ymm8,%ymm8
+ vpermq $0x93,%ymm13,%ymm13
+
+ vpblendd $3,%ymm9,%ymm14,%ymm10
+ vpermq $0x93,%ymm0,%ymm0
+ vpblendd $3,%ymm14,%ymm11,%ymm14
+ vpaddq %ymm10,%ymm4,%ymm4
+ vpblendd $3,%ymm11,%ymm12,%ymm11
+ vpaddq %ymm14,%ymm5,%ymm5
+ vmovdqu %ymm4,128-128(%rdi)
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm11,%ymm6,%ymm6
+ vmovdqu %ymm5,160-128(%rdi)
+ vpblendd $3,%ymm13,%ymm0,%ymm13
+ vpaddq %ymm12,%ymm7,%ymm7
+ vmovdqu %ymm6,192-128(%rdi)
+ vpaddq %ymm13,%ymm8,%ymm8
+ vmovdqu %ymm7,224-128(%rdi)
+ vmovdqu %ymm8,256-128(%rdi)
+
+ movq %rdi,%rsi
+ decl %r8d
+ jne .LOOP_GRANDE_SQR_1024
+
+ vzeroall
+ movq %rbp,%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lsqr_1024_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
+.globl rsaz_1024_mul_avx2
+.type rsaz_1024_mul_avx2, at function
+.align 64
+rsaz_1024_mul_avx2:
+ leaq (%rsp),%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rax,%rbp
+ vzeroall
+ movq %rdx,%r13
+ subq $64,%rsp
+
+
+
+
+
+
+.byte 0x67,0x67
+ movq %rsi,%r15
+ andq $4095,%r15
+ addq $320,%r15
+ shrq $12,%r15
+ movq %rsi,%r15
+ cmovnzq %r13,%rsi
+ cmovnzq %r15,%r13
+
+ movq %rcx,%r15
+ subq $-128,%rsi
+ subq $-128,%rcx
+ subq $-128,%rdi
+
+ andq $4095,%r15
+ addq $320,%r15
+.byte 0x67,0x67
+ shrq $12,%r15
+ jz .Lmul_1024_no_n_copy
+
+
+
+
+
+ subq $320,%rsp
+ vmovdqu 0-128(%rcx),%ymm0
+ andq $-512,%rsp
+ vmovdqu 32-128(%rcx),%ymm1
+ vmovdqu 64-128(%rcx),%ymm2
+ vmovdqu 96-128(%rcx),%ymm3
+ vmovdqu 128-128(%rcx),%ymm4
+ vmovdqu 160-128(%rcx),%ymm5
+ vmovdqu 192-128(%rcx),%ymm6
+ vmovdqu 224-128(%rcx),%ymm7
+ vmovdqu 256-128(%rcx),%ymm8
+ leaq 64+128(%rsp),%rcx
+ vmovdqu %ymm0,0-128(%rcx)
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovdqu %ymm1,32-128(%rcx)
+ vpxor %ymm1,%ymm1,%ymm1
+ vmovdqu %ymm2,64-128(%rcx)
+ vpxor %ymm2,%ymm2,%ymm2
+ vmovdqu %ymm3,96-128(%rcx)
+ vpxor %ymm3,%ymm3,%ymm3
+ vmovdqu %ymm4,128-128(%rcx)
+ vpxor %ymm4,%ymm4,%ymm4
+ vmovdqu %ymm5,160-128(%rcx)
+ vpxor %ymm5,%ymm5,%ymm5
+ vmovdqu %ymm6,192-128(%rcx)
+ vpxor %ymm6,%ymm6,%ymm6
+ vmovdqu %ymm7,224-128(%rcx)
+ vpxor %ymm7,%ymm7,%ymm7
+ vmovdqu %ymm8,256-128(%rcx)
+ vmovdqa %ymm0,%ymm8
+ vmovdqu %ymm9,288-128(%rcx)
+.Lmul_1024_no_n_copy:
+ andq $-64,%rsp
+
+ movq (%r13),%rbx
+ vpbroadcastq (%r13),%ymm10
+ vmovdqu %ymm0,(%rsp)
+ xorq %r9,%r9
+.byte 0x67
+ xorq %r10,%r10
+ xorq %r11,%r11
+ xorq %r12,%r12
+
+ vmovdqu .Land_mask(%rip),%ymm15
+ movl $9,%r14d
+ vmovdqu %ymm9,288-128(%rdi)
+ jmp .Loop_mul_1024
+
+.align 32
+.Loop_mul_1024:
+ vpsrlq $29,%ymm3,%ymm9
+ movq %rbx,%rax
+ imulq -128(%rsi),%rax
+ addq %r9,%rax
+ movq %rbx,%r10
+ imulq 8-128(%rsi),%r10
+ addq 8(%rsp),%r10
+
+ movq %rax,%r9
+ imull %r8d,%eax
+ andl $0x1fffffff,%eax
+
+ movq %rbx,%r11
+ imulq 16-128(%rsi),%r11
+ addq 16(%rsp),%r11
+
+ movq %rbx,%r12
+ imulq 24-128(%rsi),%r12
+ addq 24(%rsp),%r12
+ vpmuludq 32-128(%rsi),%ymm10,%ymm0
+ vmovd %eax,%xmm11
+ vpaddq %ymm0,%ymm1,%ymm1
+ vpmuludq 64-128(%rsi),%ymm10,%ymm12
+ vpbroadcastq %xmm11,%ymm11
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq 96-128(%rsi),%ymm10,%ymm13
+ vpand %ymm15,%ymm3,%ymm3
+ vpaddq %ymm13,%ymm3,%ymm3
+ vpmuludq 128-128(%rsi),%ymm10,%ymm0
+ vpaddq %ymm0,%ymm4,%ymm4
+ vpmuludq 160-128(%rsi),%ymm10,%ymm12
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq 192-128(%rsi),%ymm10,%ymm13
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpmuludq 224-128(%rsi),%ymm10,%ymm0
+ vpermq $0x93,%ymm9,%ymm9
+ vpaddq %ymm0,%ymm7,%ymm7
+ vpmuludq 256-128(%rsi),%ymm10,%ymm12
+ vpbroadcastq 8(%r13),%ymm10
+ vpaddq %ymm12,%ymm8,%ymm8
+
+ movq %rax,%rdx
+ imulq -128(%rcx),%rax
+ addq %rax,%r9
+ movq %rdx,%rax
+ imulq 8-128(%rcx),%rax
+ addq %rax,%r10
+ movq %rdx,%rax
+ imulq 16-128(%rcx),%rax
+ addq %rax,%r11
+ shrq $29,%r9
+ imulq 24-128(%rcx),%rdx
+ addq %rdx,%r12
+ addq %r9,%r10
+
+ vpmuludq 32-128(%rcx),%ymm11,%ymm13
+ vmovq %xmm10,%rbx
+ vpaddq %ymm13,%ymm1,%ymm1
+ vpmuludq 64-128(%rcx),%ymm11,%ymm0
+ vpaddq %ymm0,%ymm2,%ymm2
+ vpmuludq 96-128(%rcx),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm3,%ymm3
+ vpmuludq 128-128(%rcx),%ymm11,%ymm13
+ vpaddq %ymm13,%ymm4,%ymm4
+ vpmuludq 160-128(%rcx),%ymm11,%ymm0
+ vpaddq %ymm0,%ymm5,%ymm5
+ vpmuludq 192-128(%rcx),%ymm11,%ymm12
+ vpaddq %ymm12,%ymm6,%ymm6
+ vpmuludq 224-128(%rcx),%ymm11,%ymm13
+ vpblendd $3,%ymm14,%ymm9,%ymm12
+ vpaddq %ymm13,%ymm7,%ymm7
+ vpmuludq 256-128(%rcx),%ymm11,%ymm0
+ vpaddq %ymm12,%ymm3,%ymm3
+ vpaddq %ymm0,%ymm8,%ymm8
+
+ movq %rbx,%rax
+ imulq -128(%rsi),%rax
+ addq %rax,%r10
+ vmovdqu -8+32-128(%rsi),%ymm12
+ movq %rbx,%rax
+ imulq 8-128(%rsi),%rax
+ addq %rax,%r11
+ vmovdqu -8+64-128(%rsi),%ymm13
+
+ movq %r10,%rax
+ vpblendd $0xfc,%ymm14,%ymm9,%ymm9
+ imull %r8d,%eax
+ vpaddq %ymm9,%ymm4,%ymm4
+ andl $0x1fffffff,%eax
+
+ imulq 16-128(%rsi),%rbx
+ addq %rbx,%r12
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vmovd %eax,%xmm11
+ vmovdqu -8+96-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm1,%ymm1
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vpbroadcastq %xmm11,%ymm11
+ vmovdqu -8+128-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm2,%ymm2
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovdqu -8+160-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm3,%ymm3
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vmovdqu -8+192-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm4,%ymm4
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vmovdqu -8+224-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm5,%ymm5
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovdqu -8+256-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm6,%ymm6
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vmovdqu -8+288-128(%rsi),%ymm9
+ vpaddq %ymm12,%ymm7,%ymm7
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vpaddq %ymm13,%ymm8,%ymm8
+ vpmuludq %ymm10,%ymm9,%ymm9
+ vpbroadcastq 16(%r13),%ymm10
+
+ movq %rax,%rdx
+ imulq -128(%rcx),%rax
+ addq %rax,%r10
+ vmovdqu -8+32-128(%rcx),%ymm0
+ movq %rdx,%rax
+ imulq 8-128(%rcx),%rax
+ addq %rax,%r11
+ vmovdqu -8+64-128(%rcx),%ymm12
+ shrq $29,%r10
+ imulq 16-128(%rcx),%rdx
+ addq %rdx,%r12
+ addq %r10,%r11
+
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovq %xmm10,%rbx
+ vmovdqu -8+96-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm1,%ymm1
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu -8+128-128(%rcx),%ymm0
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -8+160-128(%rcx),%ymm12
+ vpaddq %ymm13,%ymm3,%ymm3
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovdqu -8+192-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm4,%ymm4
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu -8+224-128(%rcx),%ymm0
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -8+256-128(%rcx),%ymm12
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovdqu -8+288-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm7,%ymm7
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vpaddq %ymm13,%ymm9,%ymm9
+
+ vmovdqu -16+32-128(%rsi),%ymm0
+ movq %rbx,%rax
+ imulq -128(%rsi),%rax
+ addq %r11,%rax
+
+ vmovdqu -16+64-128(%rsi),%ymm12
+ movq %rax,%r11
+ imull %r8d,%eax
+ andl $0x1fffffff,%eax
+
+ imulq 8-128(%rsi),%rbx
+ addq %rbx,%r12
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovd %eax,%xmm11
+ vmovdqu -16+96-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm1,%ymm1
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vpbroadcastq %xmm11,%ymm11
+ vmovdqu -16+128-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vmovdqu -16+160-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm3,%ymm3
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovdqu -16+192-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm4,%ymm4
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vmovdqu -16+224-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vmovdqu -16+256-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovdqu -16+288-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm7,%ymm7
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vpbroadcastq 24(%r13),%ymm10
+ vpaddq %ymm13,%ymm9,%ymm9
+
+ vmovdqu -16+32-128(%rcx),%ymm0
+ movq %rax,%rdx
+ imulq -128(%rcx),%rax
+ addq %rax,%r11
+ vmovdqu -16+64-128(%rcx),%ymm12
+ imulq 8-128(%rcx),%rdx
+ addq %rdx,%r12
+ shrq $29,%r11
+
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovq %xmm10,%rbx
+ vmovdqu -16+96-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm1,%ymm1
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu -16+128-128(%rcx),%ymm0
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -16+160-128(%rcx),%ymm12
+ vpaddq %ymm13,%ymm3,%ymm3
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovdqu -16+192-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm4,%ymm4
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu -16+224-128(%rcx),%ymm0
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -16+256-128(%rcx),%ymm12
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovdqu -16+288-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm7,%ymm7
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu -24+32-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -24+64-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm9,%ymm9
+
+ addq %r11,%r12
+ imulq -128(%rsi),%rbx
+ addq %rbx,%r12
+
+ movq %r12,%rax
+ imull %r8d,%eax
+ andl $0x1fffffff,%eax
+
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovd %eax,%xmm11
+ vmovdqu -24+96-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm1,%ymm1
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vpbroadcastq %xmm11,%ymm11
+ vmovdqu -24+128-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm2,%ymm2
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vmovdqu -24+160-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm3,%ymm3
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovdqu -24+192-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm4,%ymm4
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vmovdqu -24+224-128(%rsi),%ymm0
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vmovdqu -24+256-128(%rsi),%ymm12
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpmuludq %ymm10,%ymm0,%ymm0
+ vmovdqu -24+288-128(%rsi),%ymm13
+ vpaddq %ymm0,%ymm7,%ymm7
+ vpmuludq %ymm10,%ymm12,%ymm12
+ vpaddq %ymm12,%ymm8,%ymm8
+ vpmuludq %ymm10,%ymm13,%ymm13
+ vpbroadcastq 32(%r13),%ymm10
+ vpaddq %ymm13,%ymm9,%ymm9
+ addq $32,%r13
+
+ vmovdqu -24+32-128(%rcx),%ymm0
+ imulq -128(%rcx),%rax
+ addq %rax,%r12
+ shrq $29,%r12
+
+ vmovdqu -24+64-128(%rcx),%ymm12
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovq %xmm10,%rbx
+ vmovdqu -24+96-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm1,%ymm0
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu %ymm0,(%rsp)
+ vpaddq %ymm12,%ymm2,%ymm1
+ vmovdqu -24+128-128(%rcx),%ymm0
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -24+160-128(%rcx),%ymm12
+ vpaddq %ymm13,%ymm3,%ymm2
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovdqu -24+192-128(%rcx),%ymm13
+ vpaddq %ymm0,%ymm4,%ymm3
+ vpmuludq %ymm11,%ymm12,%ymm12
+ vmovdqu -24+224-128(%rcx),%ymm0
+ vpaddq %ymm12,%ymm5,%ymm4
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovdqu -24+256-128(%rcx),%ymm12
+ vpaddq %ymm13,%ymm6,%ymm5
+ vpmuludq %ymm11,%ymm0,%ymm0
+ vmovdqu -24+288-128(%rcx),%ymm13
+ movq %r12,%r9
+ vpaddq %ymm0,%ymm7,%ymm6
+ vpmuludq %ymm11,%ymm12,%ymm12
+ addq (%rsp),%r9
+ vpaddq %ymm12,%ymm8,%ymm7
+ vpmuludq %ymm11,%ymm13,%ymm13
+ vmovq %r12,%xmm12
+ vpaddq %ymm13,%ymm9,%ymm8
+
+ decl %r14d
+ jnz .Loop_mul_1024
+ vpaddq (%rsp),%ymm12,%ymm0
+
+ vpsrlq $29,%ymm0,%ymm12
+ vpand %ymm15,%ymm0,%ymm0
+ vpsrlq $29,%ymm1,%ymm13
+ vpand %ymm15,%ymm1,%ymm1
+ vpsrlq $29,%ymm2,%ymm10
+ vpermq $0x93,%ymm12,%ymm12
+ vpand %ymm15,%ymm2,%ymm2
+ vpsrlq $29,%ymm3,%ymm11
+ vpermq $0x93,%ymm13,%ymm13
+ vpand %ymm15,%ymm3,%ymm3
+
+ vpblendd $3,%ymm14,%ymm12,%ymm9
+ vpermq $0x93,%ymm10,%ymm10
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpermq $0x93,%ymm11,%ymm11
+ vpaddq %ymm9,%ymm0,%ymm0
+ vpblendd $3,%ymm13,%ymm10,%ymm13
+ vpaddq %ymm12,%ymm1,%ymm1
+ vpblendd $3,%ymm10,%ymm11,%ymm10
+ vpaddq %ymm13,%ymm2,%ymm2
+ vpblendd $3,%ymm11,%ymm14,%ymm11
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpaddq %ymm11,%ymm4,%ymm4
+
+ vpsrlq $29,%ymm0,%ymm12
+ vpand %ymm15,%ymm0,%ymm0
+ vpsrlq $29,%ymm1,%ymm13
+ vpand %ymm15,%ymm1,%ymm1
+ vpsrlq $29,%ymm2,%ymm10
+ vpermq $0x93,%ymm12,%ymm12
+ vpand %ymm15,%ymm2,%ymm2
+ vpsrlq $29,%ymm3,%ymm11
+ vpermq $0x93,%ymm13,%ymm13
+ vpand %ymm15,%ymm3,%ymm3
+ vpermq $0x93,%ymm10,%ymm10
+
+ vpblendd $3,%ymm14,%ymm12,%ymm9
+ vpermq $0x93,%ymm11,%ymm11
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm9,%ymm0,%ymm0
+ vpblendd $3,%ymm13,%ymm10,%ymm13
+ vpaddq %ymm12,%ymm1,%ymm1
+ vpblendd $3,%ymm10,%ymm11,%ymm10
+ vpaddq %ymm13,%ymm2,%ymm2
+ vpblendd $3,%ymm11,%ymm14,%ymm11
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpaddq %ymm11,%ymm4,%ymm4
+
+ vmovdqu %ymm0,0-128(%rdi)
+ vmovdqu %ymm1,32-128(%rdi)
+ vmovdqu %ymm2,64-128(%rdi)
+ vmovdqu %ymm3,96-128(%rdi)
+ vpsrlq $29,%ymm4,%ymm12
+ vpand %ymm15,%ymm4,%ymm4
+ vpsrlq $29,%ymm5,%ymm13
+ vpand %ymm15,%ymm5,%ymm5
+ vpsrlq $29,%ymm6,%ymm10
+ vpermq $0x93,%ymm12,%ymm12
+ vpand %ymm15,%ymm6,%ymm6
+ vpsrlq $29,%ymm7,%ymm11
+ vpermq $0x93,%ymm13,%ymm13
+ vpand %ymm15,%ymm7,%ymm7
+ vpsrlq $29,%ymm8,%ymm0
+ vpermq $0x93,%ymm10,%ymm10
+ vpand %ymm15,%ymm8,%ymm8
+ vpermq $0x93,%ymm11,%ymm11
+
+ vpblendd $3,%ymm14,%ymm12,%ymm9
+ vpermq $0x93,%ymm0,%ymm0
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm9,%ymm4,%ymm4
+ vpblendd $3,%ymm13,%ymm10,%ymm13
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpblendd $3,%ymm10,%ymm11,%ymm10
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpblendd $3,%ymm11,%ymm0,%ymm11
+ vpaddq %ymm10,%ymm7,%ymm7
+ vpaddq %ymm11,%ymm8,%ymm8
+
+ vpsrlq $29,%ymm4,%ymm12
+ vpand %ymm15,%ymm4,%ymm4
+ vpsrlq $29,%ymm5,%ymm13
+ vpand %ymm15,%ymm5,%ymm5
+ vpsrlq $29,%ymm6,%ymm10
+ vpermq $0x93,%ymm12,%ymm12
+ vpand %ymm15,%ymm6,%ymm6
+ vpsrlq $29,%ymm7,%ymm11
+ vpermq $0x93,%ymm13,%ymm13
+ vpand %ymm15,%ymm7,%ymm7
+ vpsrlq $29,%ymm8,%ymm0
+ vpermq $0x93,%ymm10,%ymm10
+ vpand %ymm15,%ymm8,%ymm8
+ vpermq $0x93,%ymm11,%ymm11
+
+ vpblendd $3,%ymm14,%ymm12,%ymm9
+ vpermq $0x93,%ymm0,%ymm0
+ vpblendd $3,%ymm12,%ymm13,%ymm12
+ vpaddq %ymm9,%ymm4,%ymm4
+ vpblendd $3,%ymm13,%ymm10,%ymm13
+ vpaddq %ymm12,%ymm5,%ymm5
+ vpblendd $3,%ymm10,%ymm11,%ymm10
+ vpaddq %ymm13,%ymm6,%ymm6
+ vpblendd $3,%ymm11,%ymm0,%ymm11
+ vpaddq %ymm10,%ymm7,%ymm7
+ vpaddq %ymm11,%ymm8,%ymm8
+
+ vmovdqu %ymm4,128-128(%rdi)
+ vmovdqu %ymm5,160-128(%rdi)
+ vmovdqu %ymm6,192-128(%rdi)
+ vmovdqu %ymm7,224-128(%rdi)
+ vmovdqu %ymm8,256-128(%rdi)
+ vzeroupper
+
+ movq %rbp,%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lmul_1024_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
+.globl rsaz_1024_red2norm_avx2
+.type rsaz_1024_red2norm_avx2, at function
+.align 32
+rsaz_1024_red2norm_avx2:
+ subq $-128,%rsi
+ xorq %rax,%rax
+ movq -128(%rsi),%r8
+ movq -120(%rsi),%r9
+ movq -112(%rsi),%r10
+ shlq $0,%r8
+ shlq $29,%r9
+ movq %r10,%r11
+ shlq $58,%r10
+ shrq $6,%r11
+ addq %r8,%rax
+ addq %r9,%rax
+ addq %r10,%rax
+ adcq $0,%r11
+ movq %rax,0(%rdi)
+ movq %r11,%rax
+ movq -104(%rsi),%r8
+ movq -96(%rsi),%r9
+ shlq $23,%r8
+ movq %r9,%r10
+ shlq $52,%r9
+ shrq $12,%r10
+ addq %r8,%rax
+ addq %r9,%rax
+ adcq $0,%r10
+ movq %rax,8(%rdi)
+ movq %r10,%rax
+ movq -88(%rsi),%r11
+ movq -80(%rsi),%r8
+ shlq $17,%r11
+ movq %r8,%r9
+ shlq $46,%r8
+ shrq $18,%r9
+ addq %r11,%rax
+ addq %r8,%rax
+ adcq $0,%r9
+ movq %rax,16(%rdi)
+ movq %r9,%rax
+ movq -72(%rsi),%r10
+ movq -64(%rsi),%r11
+ shlq $11,%r10
+ movq %r11,%r8
+ shlq $40,%r11
+ shrq $24,%r8
+ addq %r10,%rax
+ addq %r11,%rax
+ adcq $0,%r8
+ movq %rax,24(%rdi)
+ movq %r8,%rax
+ movq -56(%rsi),%r9
+ movq -48(%rsi),%r10
+ movq -40(%rsi),%r11
+ shlq $5,%r9
+ shlq $34,%r10
+ movq %r11,%r8
+ shlq $63,%r11
+ shrq $1,%r8
+ addq %r9,%rax
+ addq %r10,%rax
+ addq %r11,%rax
+ adcq $0,%r8
+ movq %rax,32(%rdi)
+ movq %r8,%rax
+ movq -32(%rsi),%r9
+ movq -24(%rsi),%r10
+ shlq $28,%r9
+ movq %r10,%r11
+ shlq $57,%r10
+ shrq $7,%r11
+ addq %r9,%rax
+ addq %r10,%rax
+ adcq $0,%r11
+ movq %rax,40(%rdi)
+ movq %r11,%rax
+ movq -16(%rsi),%r8
+ movq -8(%rsi),%r9
+ shlq $22,%r8
+ movq %r9,%r10
+ shlq $51,%r9
+ shrq $13,%r10
+ addq %r8,%rax
+ addq %r9,%rax
+ adcq $0,%r10
+ movq %rax,48(%rdi)
+ movq %r10,%rax
+ movq 0(%rsi),%r11
+ movq 8(%rsi),%r8
+ shlq $16,%r11
+ movq %r8,%r9
+ shlq $45,%r8
+ shrq $19,%r9
+ addq %r11,%rax
+ addq %r8,%rax
+ adcq $0,%r9
+ movq %rax,56(%rdi)
+ movq %r9,%rax
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+ shlq $10,%r10
+ movq %r11,%r8
+ shlq $39,%r11
+ shrq $25,%r8
+ addq %r10,%rax
+ addq %r11,%rax
+ adcq $0,%r8
+ movq %rax,64(%rdi)
+ movq %r8,%rax
+ movq 32(%rsi),%r9
+ movq 40(%rsi),%r10
+ movq 48(%rsi),%r11
+ shlq $4,%r9
+ shlq $33,%r10
+ movq %r11,%r8
+ shlq $62,%r11
+ shrq $2,%r8
+ addq %r9,%rax
+ addq %r10,%rax
+ addq %r11,%rax
+ adcq $0,%r8
+ movq %rax,72(%rdi)
+ movq %r8,%rax
+ movq 56(%rsi),%r9
+ movq 64(%rsi),%r10
+ shlq $27,%r9
+ movq %r10,%r11
+ shlq $56,%r10
+ shrq $8,%r11
+ addq %r9,%rax
+ addq %r10,%rax
+ adcq $0,%r11
+ movq %rax,80(%rdi)
+ movq %r11,%rax
+ movq 72(%rsi),%r8
+ movq 80(%rsi),%r9
+ shlq $21,%r8
+ movq %r9,%r10
+ shlq $50,%r9
+ shrq $14,%r10
+ addq %r8,%rax
+ addq %r9,%rax
+ adcq $0,%r10
+ movq %rax,88(%rdi)
+ movq %r10,%rax
+ movq 88(%rsi),%r11
+ movq 96(%rsi),%r8
+ shlq $15,%r11
+ movq %r8,%r9
+ shlq $44,%r8
+ shrq $20,%r9
+ addq %r11,%rax
+ addq %r8,%rax
+ adcq $0,%r9
+ movq %rax,96(%rdi)
+ movq %r9,%rax
+ movq 104(%rsi),%r10
+ movq 112(%rsi),%r11
+ shlq $9,%r10
+ movq %r11,%r8
+ shlq $38,%r11
+ shrq $26,%r8
+ addq %r10,%rax
+ addq %r11,%rax
+ adcq $0,%r8
+ movq %rax,104(%rdi)
+ movq %r8,%rax
+ movq 120(%rsi),%r9
+ movq 128(%rsi),%r10
+ movq 136(%rsi),%r11
+ shlq $3,%r9
+ shlq $32,%r10
+ movq %r11,%r8
+ shlq $61,%r11
+ shrq $3,%r8
+ addq %r9,%rax
+ addq %r10,%rax
+ addq %r11,%rax
+ adcq $0,%r8
+ movq %rax,112(%rdi)
+ movq %r8,%rax
+ movq 144(%rsi),%r9
+ movq 152(%rsi),%r10
+ shlq $26,%r9
+ movq %r10,%r11
+ shlq $55,%r10
+ shrq $9,%r11
+ addq %r9,%rax
+ addq %r10,%rax
+ adcq $0,%r11
+ movq %rax,120(%rdi)
+ movq %r11,%rax
+ .byte 0xf3,0xc3
+.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
+
+.globl rsaz_1024_norm2red_avx2
+.type rsaz_1024_norm2red_avx2, at function
+.align 32
+rsaz_1024_norm2red_avx2:
+ subq $-128,%rdi
+ movq (%rsi),%r8
+ movl $0x1fffffff,%eax
+ movq 8(%rsi),%r9
+ movq %r8,%r11
+ shrq $0,%r11
+ andq %rax,%r11
+ movq %r11,-128(%rdi)
+ movq %r8,%r10
+ shrq $29,%r10
+ andq %rax,%r10
+ movq %r10,-120(%rdi)
+ shrdq $58,%r9,%r8
+ andq %rax,%r8
+ movq %r8,-112(%rdi)
+ movq 16(%rsi),%r10
+ movq %r9,%r8
+ shrq $23,%r8
+ andq %rax,%r8
+ movq %r8,-104(%rdi)
+ shrdq $52,%r10,%r9
+ andq %rax,%r9
+ movq %r9,-96(%rdi)
+ movq 24(%rsi),%r11
+ movq %r10,%r9
+ shrq $17,%r9
+ andq %rax,%r9
+ movq %r9,-88(%rdi)
+ shrdq $46,%r11,%r10
+ andq %rax,%r10
+ movq %r10,-80(%rdi)
+ movq 32(%rsi),%r8
+ movq %r11,%r10
+ shrq $11,%r10
+ andq %rax,%r10
+ movq %r10,-72(%rdi)
+ shrdq $40,%r8,%r11
+ andq %rax,%r11
+ movq %r11,-64(%rdi)
+ movq 40(%rsi),%r9
+ movq %r8,%r11
+ shrq $5,%r11
+ andq %rax,%r11
+ movq %r11,-56(%rdi)
+ movq %r8,%r10
+ shrq $34,%r10
+ andq %rax,%r10
+ movq %r10,-48(%rdi)
+ shrdq $63,%r9,%r8
+ andq %rax,%r8
+ movq %r8,-40(%rdi)
+ movq 48(%rsi),%r10
+ movq %r9,%r8
+ shrq $28,%r8
+ andq %rax,%r8
+ movq %r8,-32(%rdi)
+ shrdq $57,%r10,%r9
+ andq %rax,%r9
+ movq %r9,-24(%rdi)
+ movq 56(%rsi),%r11
+ movq %r10,%r9
+ shrq $22,%r9
+ andq %rax,%r9
+ movq %r9,-16(%rdi)
+ shrdq $51,%r11,%r10
+ andq %rax,%r10
+ movq %r10,-8(%rdi)
+ movq 64(%rsi),%r8
+ movq %r11,%r10
+ shrq $16,%r10
+ andq %rax,%r10
+ movq %r10,0(%rdi)
+ shrdq $45,%r8,%r11
+ andq %rax,%r11
+ movq %r11,8(%rdi)
+ movq 72(%rsi),%r9
+ movq %r8,%r11
+ shrq $10,%r11
+ andq %rax,%r11
+ movq %r11,16(%rdi)
+ shrdq $39,%r9,%r8
+ andq %rax,%r8
+ movq %r8,24(%rdi)
+ movq 80(%rsi),%r10
+ movq %r9,%r8
+ shrq $4,%r8
+ andq %rax,%r8
+ movq %r8,32(%rdi)
+ movq %r9,%r11
+ shrq $33,%r11
+ andq %rax,%r11
+ movq %r11,40(%rdi)
+ shrdq $62,%r10,%r9
+ andq %rax,%r9
+ movq %r9,48(%rdi)
+ movq 88(%rsi),%r11
+ movq %r10,%r9
+ shrq $27,%r9
+ andq %rax,%r9
+ movq %r9,56(%rdi)
+ shrdq $56,%r11,%r10
+ andq %rax,%r10
+ movq %r10,64(%rdi)
+ movq 96(%rsi),%r8
+ movq %r11,%r10
+ shrq $21,%r10
+ andq %rax,%r10
+ movq %r10,72(%rdi)
+ shrdq $50,%r8,%r11
+ andq %rax,%r11
+ movq %r11,80(%rdi)
+ movq 104(%rsi),%r9
+ movq %r8,%r11
+ shrq $15,%r11
+ andq %rax,%r11
+ movq %r11,88(%rdi)
+ shrdq $44,%r9,%r8
+ andq %rax,%r8
+ movq %r8,96(%rdi)
+ movq 112(%rsi),%r10
+ movq %r9,%r8
+ shrq $9,%r8
+ andq %rax,%r8
+ movq %r8,104(%rdi)
+ shrdq $38,%r10,%r9
+ andq %rax,%r9
+ movq %r9,112(%rdi)
+ movq 120(%rsi),%r11
+ movq %r10,%r9
+ shrq $3,%r9
+ andq %rax,%r9
+ movq %r9,120(%rdi)
+ movq %r10,%r8
+ shrq $32,%r8
+ andq %rax,%r8
+ movq %r8,128(%rdi)
+ shrdq $61,%r11,%r10
+ andq %rax,%r10
+ movq %r10,136(%rdi)
+ xorq %r8,%r8
+ movq %r11,%r10
+ shrq $26,%r10
+ andq %rax,%r10
+ movq %r10,144(%rdi)
+ shrdq $55,%r8,%r11
+ andq %rax,%r11
+ movq %r11,152(%rdi)
+ movq %r8,160(%rdi)
+ movq %r8,168(%rdi)
+ movq %r8,176(%rdi)
+ movq %r8,184(%rdi)
+ .byte 0xf3,0xc3
+.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
+.globl rsaz_1024_scatter5_avx2
+.type rsaz_1024_scatter5_avx2, at function
+.align 32
+rsaz_1024_scatter5_avx2:
+ vzeroupper
+ vmovdqu .Lscatter_permd(%rip),%ymm5
+ shll $4,%edx
+ leaq (%rdi,%rdx,1),%rdi
+ movl $9,%eax
+ jmp .Loop_scatter_1024
+
+.align 32
+.Loop_scatter_1024:
+ vmovdqu (%rsi),%ymm0
+ leaq 32(%rsi),%rsi
+ vpermd %ymm0,%ymm5,%ymm0
+ vmovdqu %xmm0,(%rdi)
+ leaq 512(%rdi),%rdi
+ decl %eax
+ jnz .Loop_scatter_1024
+
+ vzeroupper
+ .byte 0xf3,0xc3
+.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
+
+.globl rsaz_1024_gather5_avx2
+.type rsaz_1024_gather5_avx2, at function
+.align 32
+rsaz_1024_gather5_avx2:
+ vzeroupper
+ movq %rsp,%r11
+ leaq -256(%rsp),%rsp
+ andq $-32,%rsp
+ leaq .Linc(%rip),%r10
+ leaq -128(%rsp),%rax
+
+ vmovd %edx,%xmm4
+ vmovdqa (%r10),%ymm0
+ vmovdqa 32(%r10),%ymm1
+ vmovdqa 64(%r10),%ymm5
+ vpbroadcastd %xmm4,%ymm4
+
+ vpaddd %ymm5,%ymm0,%ymm2
+ vpcmpeqd %ymm4,%ymm0,%ymm0
+ vpaddd %ymm5,%ymm1,%ymm3
+ vpcmpeqd %ymm4,%ymm1,%ymm1
+ vmovdqa %ymm0,0+128(%rax)
+ vpaddd %ymm5,%ymm2,%ymm0
+ vpcmpeqd %ymm4,%ymm2,%ymm2
+ vmovdqa %ymm1,32+128(%rax)
+ vpaddd %ymm5,%ymm3,%ymm1
+ vpcmpeqd %ymm4,%ymm3,%ymm3
+ vmovdqa %ymm2,64+128(%rax)
+ vpaddd %ymm5,%ymm0,%ymm2
+ vpcmpeqd %ymm4,%ymm0,%ymm0
+ vmovdqa %ymm3,96+128(%rax)
+ vpaddd %ymm5,%ymm1,%ymm3
+ vpcmpeqd %ymm4,%ymm1,%ymm1
+ vmovdqa %ymm0,128+128(%rax)
+ vpaddd %ymm5,%ymm2,%ymm8
+ vpcmpeqd %ymm4,%ymm2,%ymm2
+ vmovdqa %ymm1,160+128(%rax)
+ vpaddd %ymm5,%ymm3,%ymm9
+ vpcmpeqd %ymm4,%ymm3,%ymm3
+ vmovdqa %ymm2,192+128(%rax)
+ vpaddd %ymm5,%ymm8,%ymm10
+ vpcmpeqd %ymm4,%ymm8,%ymm8
+ vmovdqa %ymm3,224+128(%rax)
+ vpaddd %ymm5,%ymm9,%ymm11
+ vpcmpeqd %ymm4,%ymm9,%ymm9
+ vpaddd %ymm5,%ymm10,%ymm12
+ vpcmpeqd %ymm4,%ymm10,%ymm10
+ vpaddd %ymm5,%ymm11,%ymm13
+ vpcmpeqd %ymm4,%ymm11,%ymm11
+ vpaddd %ymm5,%ymm12,%ymm14
+ vpcmpeqd %ymm4,%ymm12,%ymm12
+ vpaddd %ymm5,%ymm13,%ymm15
+ vpcmpeqd %ymm4,%ymm13,%ymm13
+ vpcmpeqd %ymm4,%ymm14,%ymm14
+ vpcmpeqd %ymm4,%ymm15,%ymm15
+
+ vmovdqa -32(%r10),%ymm7
+ leaq 128(%rsi),%rsi
+ movl $9,%edx
+
+.Loop_gather_1024:
+ vmovdqa 0-128(%rsi),%ymm0
+ vmovdqa 32-128(%rsi),%ymm1
+ vmovdqa 64-128(%rsi),%ymm2
+ vmovdqa 96-128(%rsi),%ymm3
+ vpand 0+128(%rax),%ymm0,%ymm0
+ vpand 32+128(%rax),%ymm1,%ymm1
+ vpand 64+128(%rax),%ymm2,%ymm2
+ vpor %ymm0,%ymm1,%ymm4
+ vpand 96+128(%rax),%ymm3,%ymm3
+ vmovdqa 128-128(%rsi),%ymm0
+ vmovdqa 160-128(%rsi),%ymm1
+ vpor %ymm2,%ymm3,%ymm5
+ vmovdqa 192-128(%rsi),%ymm2
+ vmovdqa 224-128(%rsi),%ymm3
+ vpand 128+128(%rax),%ymm0,%ymm0
+ vpand 160+128(%rax),%ymm1,%ymm1
+ vpand 192+128(%rax),%ymm2,%ymm2
+ vpor %ymm0,%ymm4,%ymm4
+ vpand 224+128(%rax),%ymm3,%ymm3
+ vpand 256-128(%rsi),%ymm8,%ymm0
+ vpor %ymm1,%ymm5,%ymm5
+ vpand 288-128(%rsi),%ymm9,%ymm1
+ vpor %ymm2,%ymm4,%ymm4
+ vpand 320-128(%rsi),%ymm10,%ymm2
+ vpor %ymm3,%ymm5,%ymm5
+ vpand 352-128(%rsi),%ymm11,%ymm3
+ vpor %ymm0,%ymm4,%ymm4
+ vpand 384-128(%rsi),%ymm12,%ymm0
+ vpor %ymm1,%ymm5,%ymm5
+ vpand 416-128(%rsi),%ymm13,%ymm1
+ vpor %ymm2,%ymm4,%ymm4
+ vpand 448-128(%rsi),%ymm14,%ymm2
+ vpor %ymm3,%ymm5,%ymm5
+ vpand 480-128(%rsi),%ymm15,%ymm3
+ leaq 512(%rsi),%rsi
+ vpor %ymm0,%ymm4,%ymm4
+ vpor %ymm1,%ymm5,%ymm5
+ vpor %ymm2,%ymm4,%ymm4
+ vpor %ymm3,%ymm5,%ymm5
+
+ vpor %ymm5,%ymm4,%ymm4
+ vextracti128 $1,%ymm4,%xmm5
+ vpor %xmm4,%xmm5,%xmm5
+ vpermd %ymm5,%ymm7,%ymm5
+ vmovdqu %ymm5,(%rdi)
+ leaq 32(%rdi),%rdi
+ decl %edx
+ jnz .Loop_gather_1024
+
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovdqu %ymm0,(%rdi)
+ vzeroupper
+ leaq (%r11),%rsp
+ .byte 0xf3,0xc3
+.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
+
+.globl rsaz_avx2_eligible
+.type rsaz_avx2_eligible, at function
+.align 32
+rsaz_avx2_eligible:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
+ movl $524544,%ecx
+ movl $0,%edx
+ andl %eax,%ecx
+ cmpl $524544,%ecx
+ cmovel %edx,%eax
+ andl $32,%eax
+ shrl $5,%eax
+ .byte 0xf3,0xc3
+.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
+
+.align 64
+.Land_mask:
+.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
+.Lscatter_permd:
+.long 0,2,4,6,7,7,7,7
+.Lgather_permd:
+.long 0,7,1,7,2,7,3,7
+.Linc:
+.long 0,0,0,0, 1,1,1,1
+.long 2,2,2,2, 3,3,3,3
+.long 4,4,4,4, 4,4,4,4
+.align 64
Property changes on: trunk/secure/lib/libcrypto/amd64/rsaz-avx2.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/rsaz-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/rsaz-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/rsaz-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,1876 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/rsaz-x86_64.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
+.text
+
+
+
+.globl rsaz_512_sqr
+.type rsaz_512_sqr, at function
+.align 32
+rsaz_512_sqr:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ subq $128+24,%rsp
+.Lsqr_body:
+ movq %rdx,%rbp
+ movq (%rsi),%rdx
+ movq 8(%rsi),%rax
+ movq %rcx,128(%rsp)
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Loop_sqrx
+ jmp .Loop_sqr
+
+.align 32
+.Loop_sqr:
+ movl %r8d,128+8(%rsp)
+
+ movq %rdx,%rbx
+ mulq %rdx
+ movq %rax,%r8
+ movq 16(%rsi),%rax
+ movq %rdx,%r9
+
+ mulq %rbx
+ addq %rax,%r9
+ movq 24(%rsi),%rax
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ mulq %rbx
+ addq %rax,%r10
+ movq 32(%rsi),%rax
+ movq %rdx,%r11
+ adcq $0,%r11
+
+ mulq %rbx
+ addq %rax,%r11
+ movq 40(%rsi),%rax
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ mulq %rbx
+ addq %rax,%r12
+ movq 48(%rsi),%rax
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ mulq %rbx
+ addq %rax,%r13
+ movq 56(%rsi),%rax
+ movq %rdx,%r14
+ adcq $0,%r14
+
+ mulq %rbx
+ addq %rax,%r14
+ movq %rbx,%rax
+ movq %rdx,%r15
+ adcq $0,%r15
+
+ addq %r8,%r8
+ movq %r9,%rcx
+ adcq %r9,%r9
+
+ mulq %rax
+ movq %rax,(%rsp)
+ addq %rdx,%r8
+ adcq $0,%r9
+
+ movq %r8,8(%rsp)
+ shrq $63,%rcx
+
+
+ movq 8(%rsi),%r8
+ movq 16(%rsi),%rax
+ mulq %r8
+ addq %rax,%r10
+ movq 24(%rsi),%rax
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r8
+ addq %rax,%r11
+ movq 32(%rsi),%rax
+ adcq $0,%rdx
+ addq %rbx,%r11
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r8
+ addq %rax,%r12
+ movq 40(%rsi),%rax
+ adcq $0,%rdx
+ addq %rbx,%r12
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r8
+ addq %rax,%r13
+ movq 48(%rsi),%rax
+ adcq $0,%rdx
+ addq %rbx,%r13
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r8
+ addq %rax,%r14
+ movq 56(%rsi),%rax
+ adcq $0,%rdx
+ addq %rbx,%r14
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r8
+ addq %rax,%r15
+ movq %r8,%rax
+ adcq $0,%rdx
+ addq %rbx,%r15
+ movq %rdx,%r8
+ movq %r10,%rdx
+ adcq $0,%r8
+
+ addq %rdx,%rdx
+ leaq (%rcx,%r10,2),%r10
+ movq %r11,%rbx
+ adcq %r11,%r11
+
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+ movq %r9,16(%rsp)
+ movq %r10,24(%rsp)
+ shrq $63,%rbx
+
+
+ movq 16(%rsi),%r9
+ movq 24(%rsi),%rax
+ mulq %r9
+ addq %rax,%r12
+ movq 32(%rsi),%rax
+ movq %rdx,%rcx
+ adcq $0,%rcx
+
+ mulq %r9
+ addq %rax,%r13
+ movq 40(%rsi),%rax
+ adcq $0,%rdx
+ addq %rcx,%r13
+ movq %rdx,%rcx
+ adcq $0,%rcx
+
+ mulq %r9
+ addq %rax,%r14
+ movq 48(%rsi),%rax
+ adcq $0,%rdx
+ addq %rcx,%r14
+ movq %rdx,%rcx
+ adcq $0,%rcx
+
+ mulq %r9
+ movq %r12,%r10
+ leaq (%rbx,%r12,2),%r12
+ addq %rax,%r15
+ movq 56(%rsi),%rax
+ adcq $0,%rdx
+ addq %rcx,%r15
+ movq %rdx,%rcx
+ adcq $0,%rcx
+
+ mulq %r9
+ shrq $63,%r10
+ addq %rax,%r8
+ movq %r9,%rax
+ adcq $0,%rdx
+ addq %rcx,%r8
+ movq %rdx,%r9
+ adcq $0,%r9
+
+ movq %r13,%rcx
+ leaq (%r10,%r13,2),%r13
+
+ mulq %rax
+ addq %rax,%r11
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+ movq %r11,32(%rsp)
+ movq %r12,40(%rsp)
+ shrq $63,%rcx
+
+
+ movq 24(%rsi),%r10
+ movq 32(%rsi),%rax
+ mulq %r10
+ addq %rax,%r14
+ movq 40(%rsi),%rax
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r10
+ addq %rax,%r15
+ movq 48(%rsi),%rax
+ adcq $0,%rdx
+ addq %rbx,%r15
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r10
+ movq %r14,%r12
+ leaq (%rcx,%r14,2),%r14
+ addq %rax,%r8
+ movq 56(%rsi),%rax
+ adcq $0,%rdx
+ addq %rbx,%r8
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r10
+ shrq $63,%r12
+ addq %rax,%r9
+ movq %r10,%rax
+ adcq $0,%rdx
+ addq %rbx,%r9
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ movq %r15,%rbx
+ leaq (%r12,%r15,2),%r15
+
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%r14
+ adcq $0,%r15
+
+ movq %r13,48(%rsp)
+ movq %r14,56(%rsp)
+ shrq $63,%rbx
+
+
+ movq 32(%rsi),%r11
+ movq 40(%rsi),%rax
+ mulq %r11
+ addq %rax,%r8
+ movq 48(%rsi),%rax
+ movq %rdx,%rcx
+ adcq $0,%rcx
+
+ mulq %r11
+ addq %rax,%r9
+ movq 56(%rsi),%rax
+ adcq $0,%rdx
+ movq %r8,%r12
+ leaq (%rbx,%r8,2),%r8
+ addq %rcx,%r9
+ movq %rdx,%rcx
+ adcq $0,%rcx
+
+ mulq %r11
+ shrq $63,%r12
+ addq %rax,%r10
+ movq %r11,%rax
+ adcq $0,%rdx
+ addq %rcx,%r10
+ movq %rdx,%r11
+ adcq $0,%r11
+
+ movq %r9,%rcx
+ leaq (%r12,%r9,2),%r9
+
+ mulq %rax
+ addq %rax,%r15
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+ movq %r15,64(%rsp)
+ movq %r8,72(%rsp)
+ shrq $63,%rcx
+
+
+ movq 40(%rsi),%r12
+ movq 48(%rsi),%rax
+ mulq %r12
+ addq %rax,%r10
+ movq 56(%rsi),%rax
+ movq %rdx,%rbx
+ adcq $0,%rbx
+
+ mulq %r12
+ addq %rax,%r11
+ movq %r12,%rax
+ movq %r10,%r15
+ leaq (%rcx,%r10,2),%r10
+ adcq $0,%rdx
+ shrq $63,%r15
+ addq %rbx,%r11
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ movq %r11,%rbx
+ leaq (%r15,%r11,2),%r11
+
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+ movq %r9,80(%rsp)
+ movq %r10,88(%rsp)
+
+
+ movq 48(%rsi),%r13
+ movq 56(%rsi),%rax
+ mulq %r13
+ addq %rax,%r12
+ movq %r13,%rax
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ xorq %r14,%r14
+ shlq $1,%rbx
+ adcq %r12,%r12
+ adcq %r13,%r13
+ adcq %r14,%r14
+
+ mulq %rax
+ addq %rax,%r11
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+ movq %r11,96(%rsp)
+ movq %r12,104(%rsp)
+
+
+ movq 56(%rsi),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq $0,%rdx
+
+ addq %rdx,%r14
+
+ movq %r13,112(%rsp)
+ movq %r14,120(%rsp)
+
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reduce
+
+ addq 64(%rsp),%r8
+ adcq 72(%rsp),%r9
+ adcq 80(%rsp),%r10
+ adcq 88(%rsp),%r11
+ adcq 96(%rsp),%r12
+ adcq 104(%rsp),%r13
+ adcq 112(%rsp),%r14
+ adcq 120(%rsp),%r15
+ sbbq %rcx,%rcx
+
+ call __rsaz_512_subtract
+
+ movq %r8,%rdx
+ movq %r9,%rax
+ movl 128+8(%rsp),%r8d
+ movq %rdi,%rsi
+
+ decl %r8d
+ jnz .Loop_sqr
+ jmp .Lsqr_tail
+
+.align 32
+.Loop_sqrx:
+ movl %r8d,128+8(%rsp)
+.byte 102,72,15,110,199
+.byte 102,72,15,110,205
+
+ mulxq %rax,%r8,%r9
+
+ mulxq 16(%rsi),%rcx,%r10
+ xorq %rbp,%rbp
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rcx,%r9
+
+ mulxq 32(%rsi),%rcx,%r12
+ adcxq %rax,%r10
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rcx,%r11
+
+.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
+ adcxq %rax,%r12
+ adcxq %rcx,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
+ adcxq %rax,%r14
+ adcxq %rbp,%r15
+
+ movq %r9,%rcx
+ shldq $1,%r8,%r9
+ shlq $1,%r8
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rdx,%r8
+ movq 8(%rsi),%rdx
+ adcxq %rbp,%r9
+
+ movq %rax,(%rsp)
+ movq %r8,8(%rsp)
+
+
+ mulxq 16(%rsi),%rax,%rbx
+ adoxq %rax,%r10
+ adcxq %rbx,%r11
+
+.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
+ adoxq %rdi,%r11
+ adcxq %r8,%r12
+
+ mulxq 32(%rsi),%rax,%rbx
+ adoxq %rax,%r12
+ adcxq %rbx,%r13
+
+ mulxq 40(%rsi),%rdi,%r8
+ adoxq %rdi,%r13
+ adcxq %r8,%r14
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
+ adoxq %rax,%r14
+ adcxq %rbx,%r15
+
+.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
+ adoxq %rdi,%r15
+ adcxq %rbp,%r8
+ adoxq %rbp,%r8
+
+ movq %r11,%rbx
+ shldq $1,%r10,%r11
+ shldq $1,%rcx,%r10
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rcx
+ movq 16(%rsi),%rdx
+ adcxq %rax,%r9
+ adcxq %rcx,%r10
+ adcxq %rbp,%r11
+
+ movq %r9,16(%rsp)
+.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
+
+
+.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
+ adoxq %rdi,%r12
+ adcxq %r9,%r13
+
+ mulxq 32(%rsi),%rax,%rcx
+ adoxq %rax,%r13
+ adcxq %rcx,%r14
+
+ mulxq 40(%rsi),%rdi,%r9
+ adoxq %rdi,%r14
+ adcxq %r9,%r15
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
+ adoxq %rax,%r15
+ adcxq %rcx,%r8
+
+.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
+ adoxq %rdi,%r8
+ adcxq %rbp,%r9
+ adoxq %rbp,%r9
+
+ movq %r13,%rcx
+ shldq $1,%r12,%r13
+ shldq $1,%rbx,%r12
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r11
+ adcxq %rdx,%r12
+ movq 24(%rsi),%rdx
+ adcxq %rbp,%r13
+
+ movq %r11,32(%rsp)
+.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
+
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
+ adoxq %rax,%r14
+ adcxq %rbx,%r15
+
+ mulxq 40(%rsi),%rdi,%r10
+ adoxq %rdi,%r15
+ adcxq %r10,%r8
+
+ mulxq 48(%rsi),%rax,%rbx
+ adoxq %rax,%r8
+ adcxq %rbx,%r9
+
+ mulxq 56(%rsi),%rdi,%r10
+ adoxq %rdi,%r9
+ adcxq %rbp,%r10
+ adoxq %rbp,%r10
+
+.byte 0x66
+ movq %r15,%rbx
+ shldq $1,%r14,%r15
+ shldq $1,%rcx,%r14
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r13
+ adcxq %rdx,%r14
+ movq 32(%rsi),%rdx
+ adcxq %rbp,%r15
+
+ movq %r13,48(%rsp)
+ movq %r14,56(%rsp)
+
+
+.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
+ adoxq %rdi,%r8
+ adcxq %r11,%r9
+
+ mulxq 48(%rsi),%rax,%rcx
+ adoxq %rax,%r9
+ adcxq %rcx,%r10
+
+ mulxq 56(%rsi),%rdi,%r11
+ adoxq %rdi,%r10
+ adcxq %rbp,%r11
+ adoxq %rbp,%r11
+
+ movq %r9,%rcx
+ shldq $1,%r8,%r9
+ shldq $1,%rbx,%r8
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r15
+ adcxq %rdx,%r8
+ movq 40(%rsi),%rdx
+ adcxq %rbp,%r9
+
+ movq %r15,64(%rsp)
+ movq %r8,72(%rsp)
+
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
+ adoxq %rax,%r10
+ adcxq %rbx,%r11
+
+.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
+ adoxq %rdi,%r11
+ adcxq %rbp,%r12
+ adoxq %rbp,%r12
+
+ movq %r11,%rbx
+ shldq $1,%r10,%r11
+ shldq $1,%rcx,%r10
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r9
+ adcxq %rdx,%r10
+ movq 48(%rsi),%rdx
+ adcxq %rbp,%r11
+
+ movq %r9,80(%rsp)
+ movq %r10,88(%rsp)
+
+
+.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
+ adoxq %rax,%r12
+ adoxq %rbp,%r13
+
+ xorq %r14,%r14
+ shldq $1,%r13,%r14
+ shldq $1,%r12,%r13
+ shldq $1,%rbx,%r12
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r11
+ adcxq %rdx,%r12
+ movq 56(%rsi),%rdx
+ adcxq %rbp,%r13
+
+.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
+.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
+
+
+ mulxq %rdx,%rax,%rdx
+ adoxq %rax,%r13
+ adoxq %rbp,%rdx
+
+.byte 0x66
+ addq %rdx,%r14
+
+ movq %r13,112(%rsp)
+ movq %r14,120(%rsp)
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq 128(%rsp),%rdx
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+
+ addq 64(%rsp),%r8
+ adcq 72(%rsp),%r9
+ adcq 80(%rsp),%r10
+ adcq 88(%rsp),%r11
+ adcq 96(%rsp),%r12
+ adcq 104(%rsp),%r13
+ adcq 112(%rsp),%r14
+ adcq 120(%rsp),%r15
+ sbbq %rcx,%rcx
+
+ call __rsaz_512_subtract
+
+ movq %r8,%rdx
+ movq %r9,%rax
+ movl 128+8(%rsp),%r8d
+ movq %rdi,%rsi
+
+ decl %r8d
+ jnz .Loop_sqrx
+
+.Lsqr_tail:
+
+ leaq 128+24+48(%rsp),%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lsqr_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_512_sqr,.-rsaz_512_sqr
+.globl rsaz_512_mul
+.type rsaz_512_mul, at function
+.align 32
+rsaz_512_mul:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ subq $128+24,%rsp
+.Lmul_body:
+.byte 102,72,15,110,199
+.byte 102,72,15,110,201
+ movq %r8,128(%rsp)
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Lmulx
+ movq (%rdx),%rbx
+ movq %rdx,%rbp
+ call __rsaz_512_mul
+
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reduce
+ jmp .Lmul_tail
+
+.align 32
+.Lmulx:
+ movq %rdx,%rbp
+ movq (%rdx),%rdx
+ call __rsaz_512_mulx
+
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq 128(%rsp),%rdx
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+.Lmul_tail:
+ addq 64(%rsp),%r8
+ adcq 72(%rsp),%r9
+ adcq 80(%rsp),%r10
+ adcq 88(%rsp),%r11
+ adcq 96(%rsp),%r12
+ adcq 104(%rsp),%r13
+ adcq 112(%rsp),%r14
+ adcq 120(%rsp),%r15
+ sbbq %rcx,%rcx
+
+ call __rsaz_512_subtract
+
+ leaq 128+24+48(%rsp),%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lmul_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_512_mul,.-rsaz_512_mul
+.globl rsaz_512_mul_gather4
+.type rsaz_512_mul_gather4, at function
+.align 32
+rsaz_512_mul_gather4:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ subq $152,%rsp
+.Lmul_gather4_body:
+ movd %r9d,%xmm8
+ movdqa .Linc+16(%rip),%xmm1
+ movdqa .Linc(%rip),%xmm0
+
+ pshufd $0,%xmm8,%xmm8
+ movdqa %xmm1,%xmm7
+ movdqa %xmm1,%xmm2
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm8,%xmm0
+ movdqa %xmm7,%xmm3
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm8,%xmm1
+ movdqa %xmm7,%xmm4
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm8,%xmm2
+ movdqa %xmm7,%xmm5
+ paddd %xmm3,%xmm4
+ pcmpeqd %xmm8,%xmm3
+ movdqa %xmm7,%xmm6
+ paddd %xmm4,%xmm5
+ pcmpeqd %xmm8,%xmm4
+ paddd %xmm5,%xmm6
+ pcmpeqd %xmm8,%xmm5
+ paddd %xmm6,%xmm7
+ pcmpeqd %xmm8,%xmm6
+ pcmpeqd %xmm8,%xmm7
+
+ movdqa 0(%rdx),%xmm8
+ movdqa 16(%rdx),%xmm9
+ movdqa 32(%rdx),%xmm10
+ movdqa 48(%rdx),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rdx),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rdx),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rdx),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rdx),%xmm15
+ leaq 128(%rdx),%rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Lmulx_gather
+.byte 102,76,15,126,195
+
+ movq %r8,128(%rsp)
+ movq %rdi,128+8(%rsp)
+ movq %rcx,128+16(%rsp)
+
+ movq (%rsi),%rax
+ movq 8(%rsi),%rcx
+ mulq %rbx
+ movq %rax,(%rsp)
+ movq %rcx,%rax
+ movq %rdx,%r8
+
+ mulq %rbx
+ addq %rax,%r8
+ movq 16(%rsi),%rax
+ movq %rdx,%r9
+ adcq $0,%r9
+
+ mulq %rbx
+ addq %rax,%r9
+ movq 24(%rsi),%rax
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ mulq %rbx
+ addq %rax,%r10
+ movq 32(%rsi),%rax
+ movq %rdx,%r11
+ adcq $0,%r11
+
+ mulq %rbx
+ addq %rax,%r11
+ movq 40(%rsi),%rax
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ mulq %rbx
+ addq %rax,%r12
+ movq 48(%rsi),%rax
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ mulq %rbx
+ addq %rax,%r13
+ movq 56(%rsi),%rax
+ movq %rdx,%r14
+ adcq $0,%r14
+
+ mulq %rbx
+ addq %rax,%r14
+ movq (%rsi),%rax
+ movq %rdx,%r15
+ adcq $0,%r15
+
+ leaq 8(%rsp),%rdi
+ movl $7,%ecx
+ jmp .Loop_mul_gather
+
+.align 32
+.Loop_mul_gather:
+ movdqa 0(%rbp),%xmm8
+ movdqa 16(%rbp),%xmm9
+ movdqa 32(%rbp),%xmm10
+ movdqa 48(%rbp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rbp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rbp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rbp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rbp),%xmm15
+ leaq 128(%rbp),%rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+.byte 102,76,15,126,195
+
+ mulq %rbx
+ addq %rax,%r8
+ movq 8(%rsi),%rax
+ movq %r8,(%rdi)
+ movq %rdx,%r8
+ adcq $0,%r8
+
+ mulq %rbx
+ addq %rax,%r9
+ movq 16(%rsi),%rax
+ adcq $0,%rdx
+ addq %r9,%r8
+ movq %rdx,%r9
+ adcq $0,%r9
+
+ mulq %rbx
+ addq %rax,%r10
+ movq 24(%rsi),%rax
+ adcq $0,%rdx
+ addq %r10,%r9
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ mulq %rbx
+ addq %rax,%r11
+ movq 32(%rsi),%rax
+ adcq $0,%rdx
+ addq %r11,%r10
+ movq %rdx,%r11
+ adcq $0,%r11
+
+ mulq %rbx
+ addq %rax,%r12
+ movq 40(%rsi),%rax
+ adcq $0,%rdx
+ addq %r12,%r11
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ mulq %rbx
+ addq %rax,%r13
+ movq 48(%rsi),%rax
+ adcq $0,%rdx
+ addq %r13,%r12
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ mulq %rbx
+ addq %rax,%r14
+ movq 56(%rsi),%rax
+ adcq $0,%rdx
+ addq %r14,%r13
+ movq %rdx,%r14
+ adcq $0,%r14
+
+ mulq %rbx
+ addq %rax,%r15
+ movq (%rsi),%rax
+ adcq $0,%rdx
+ addq %r15,%r14
+ movq %rdx,%r15
+ adcq $0,%r15
+
+ leaq 8(%rdi),%rdi
+
+ decl %ecx
+ jnz .Loop_mul_gather
+
+ movq %r8,(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r12,32(%rdi)
+ movq %r13,40(%rdi)
+ movq %r14,48(%rdi)
+ movq %r15,56(%rdi)
+
+ movq 128+8(%rsp),%rdi
+ movq 128+16(%rsp),%rbp
+
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reduce
+ jmp .Lmul_gather_tail
+
+.align 32
+.Lmulx_gather:
+.byte 102,76,15,126,194
+
+ movq %r8,128(%rsp)
+ movq %rdi,128+8(%rsp)
+ movq %rcx,128+16(%rsp)
+
+ mulxq (%rsi),%rbx,%r8
+ movq %rbx,(%rsp)
+ xorl %edi,%edi
+
+ mulxq 8(%rsi),%rax,%r9
+
+ mulxq 16(%rsi),%rbx,%r10
+ adcxq %rax,%r8
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rbx,%r9
+
+ mulxq 32(%rsi),%rbx,%r12
+ adcxq %rax,%r10
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rbx,%r11
+
+ mulxq 48(%rsi),%rbx,%r14
+ adcxq %rax,%r12
+
+ mulxq 56(%rsi),%rax,%r15
+ adcxq %rbx,%r13
+ adcxq %rax,%r14
+.byte 0x67
+ movq %r8,%rbx
+ adcxq %rdi,%r15
+
+ movq $-7,%rcx
+ jmp .Loop_mulx_gather
+
+.align 32
+.Loop_mulx_gather:
+ movdqa 0(%rbp),%xmm8
+ movdqa 16(%rbp),%xmm9
+ movdqa 32(%rbp),%xmm10
+ movdqa 48(%rbp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rbp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rbp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rbp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rbp),%xmm15
+ leaq 128(%rbp),%rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+.byte 102,76,15,126,194
+
+.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
+ adcxq %rax,%rbx
+ adoxq %r9,%r8
+
+ mulxq 8(%rsi),%rax,%r9
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+ mulxq 16(%rsi),%rax,%r10
+ adcxq %rax,%r9
+ adoxq %r11,%r10
+
+.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
+ adcxq %rax,%r10
+ adoxq %r12,%r11
+
+ mulxq 32(%rsi),%rax,%r12
+ adcxq %rax,%r11
+ adoxq %r13,%r12
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
+ adcxq %rax,%r13
+.byte 0x67
+ adoxq %r15,%r14
+
+ mulxq 56(%rsi),%rax,%r15
+ movq %rbx,64(%rsp,%rcx,8)
+ adcxq %rax,%r14
+ adoxq %rdi,%r15
+ movq %r8,%rbx
+ adcxq %rdi,%r15
+
+ incq %rcx
+ jnz .Loop_mulx_gather
+
+ movq %r8,64(%rsp)
+ movq %r9,64+8(%rsp)
+ movq %r10,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+ movq %r12,64+32(%rsp)
+ movq %r13,64+40(%rsp)
+ movq %r14,64+48(%rsp)
+ movq %r15,64+56(%rsp)
+
+ movq 128(%rsp),%rdx
+ movq 128+8(%rsp),%rdi
+ movq 128+16(%rsp),%rbp
+
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+
+.Lmul_gather_tail:
+ addq 64(%rsp),%r8
+ adcq 72(%rsp),%r9
+ adcq 80(%rsp),%r10
+ adcq 88(%rsp),%r11
+ adcq 96(%rsp),%r12
+ adcq 104(%rsp),%r13
+ adcq 112(%rsp),%r14
+ adcq 120(%rsp),%r15
+ sbbq %rcx,%rcx
+
+ call __rsaz_512_subtract
+
+ leaq 128+24+48(%rsp),%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lmul_gather4_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
+.globl rsaz_512_mul_scatter4
+.type rsaz_512_mul_scatter4, at function
+.align 32
+rsaz_512_mul_scatter4:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ movl %r9d,%r9d
+ subq $128+24,%rsp
+.Lmul_scatter4_body:
+ leaq (%r8,%r9,8),%r8
+.byte 102,72,15,110,199
+.byte 102,72,15,110,202
+.byte 102,73,15,110,208
+ movq %rcx,128(%rsp)
+
+ movq %rdi,%rbp
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Lmulx_scatter
+ movq (%rdi),%rbx
+ call __rsaz_512_mul
+
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reduce
+ jmp .Lmul_scatter_tail
+
+.align 32
+.Lmulx_scatter:
+ movq (%rdi),%rdx
+ call __rsaz_512_mulx
+
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq 128(%rsp),%rdx
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+
+.Lmul_scatter_tail:
+ addq 64(%rsp),%r8
+ adcq 72(%rsp),%r9
+ adcq 80(%rsp),%r10
+ adcq 88(%rsp),%r11
+ adcq 96(%rsp),%r12
+ adcq 104(%rsp),%r13
+ adcq 112(%rsp),%r14
+ adcq 120(%rsp),%r15
+.byte 102,72,15,126,214
+ sbbq %rcx,%rcx
+
+ call __rsaz_512_subtract
+
+ movq %r8,0(%rsi)
+ movq %r9,128(%rsi)
+ movq %r10,256(%rsi)
+ movq %r11,384(%rsi)
+ movq %r12,512(%rsi)
+ movq %r13,640(%rsi)
+ movq %r14,768(%rsi)
+ movq %r15,896(%rsi)
+
+ leaq 128+24+48(%rsp),%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lmul_scatter4_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
+.globl rsaz_512_mul_by_one
+.type rsaz_512_mul_by_one, at function
+.align 32
+rsaz_512_mul_by_one:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ subq $128+24,%rsp
+.Lmul_by_one_body:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
+ movq %rdx,%rbp
+ movq %rcx,128(%rsp)
+
+ movq (%rsi),%r8
+ pxor %xmm0,%xmm0
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+ movq 32(%rsi),%r12
+ movq 40(%rsi),%r13
+ movq 48(%rsi),%r14
+ movq 56(%rsi),%r15
+
+ movdqa %xmm0,(%rsp)
+ movdqa %xmm0,16(%rsp)
+ movdqa %xmm0,32(%rsp)
+ movdqa %xmm0,48(%rsp)
+ movdqa %xmm0,64(%rsp)
+ movdqa %xmm0,80(%rsp)
+ movdqa %xmm0,96(%rsp)
+ andl $0x80100,%eax
+ cmpl $0x80100,%eax
+ je .Lby_one_callx
+ call __rsaz_512_reduce
+ jmp .Lby_one_tail
+.align 32
+.Lby_one_callx:
+ movq 128(%rsp),%rdx
+ call __rsaz_512_reducex
+.Lby_one_tail:
+ movq %r8,(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r12,32(%rdi)
+ movq %r13,40(%rdi)
+ movq %r14,48(%rdi)
+ movq %r15,56(%rdi)
+
+ leaq 128+24+48(%rsp),%rax
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lmul_by_one_epilogue:
+ .byte 0xf3,0xc3
+.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
+.type __rsaz_512_reduce, at function
+.align 32
+__rsaz_512_reduce:
+ movq %r8,%rbx
+ imulq 128+8(%rsp),%rbx
+ movq 0(%rbp),%rax
+ movl $8,%ecx
+ jmp .Lreduction_loop
+
+.align 32
+.Lreduction_loop:
+ mulq %rbx
+ movq 8(%rbp),%rax
+ negq %r8
+ movq %rdx,%r8
+ adcq $0,%r8
+
+ mulq %rbx
+ addq %rax,%r9
+ movq 16(%rbp),%rax
+ adcq $0,%rdx
+ addq %r9,%r8
+ movq %rdx,%r9
+ adcq $0,%r9
+
+ mulq %rbx
+ addq %rax,%r10
+ movq 24(%rbp),%rax
+ adcq $0,%rdx
+ addq %r10,%r9
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ mulq %rbx
+ addq %rax,%r11
+ movq 32(%rbp),%rax
+ adcq $0,%rdx
+ addq %r11,%r10
+ movq 128+8(%rsp),%rsi
+
+
+ adcq $0,%rdx
+ movq %rdx,%r11
+
+ mulq %rbx
+ addq %rax,%r12
+ movq 40(%rbp),%rax
+ adcq $0,%rdx
+ imulq %r8,%rsi
+ addq %r12,%r11
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ mulq %rbx
+ addq %rax,%r13
+ movq 48(%rbp),%rax
+ adcq $0,%rdx
+ addq %r13,%r12
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ mulq %rbx
+ addq %rax,%r14
+ movq 56(%rbp),%rax
+ adcq $0,%rdx
+ addq %r14,%r13
+ movq %rdx,%r14
+ adcq $0,%r14
+
+ mulq %rbx
+ movq %rsi,%rbx
+ addq %rax,%r15
+ movq 0(%rbp),%rax
+ adcq $0,%rdx
+ addq %r15,%r14
+ movq %rdx,%r15
+ adcq $0,%r15
+
+ decl %ecx
+ jne .Lreduction_loop
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_reduce,.-__rsaz_512_reduce
+.type __rsaz_512_reducex, at function
+.align 32
+__rsaz_512_reducex:
+
+ imulq %r8,%rdx
+ xorq %rsi,%rsi
+ movl $8,%ecx
+ jmp .Lreduction_loopx
+
+.align 32
+.Lreduction_loopx:
+ movq %r8,%rbx
+ mulxq 0(%rbp),%rax,%r8
+ adcxq %rbx,%rax
+ adoxq %r9,%r8
+
+ mulxq 8(%rbp),%rax,%r9
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+ mulxq 16(%rbp),%rbx,%r10
+ adcxq %rbx,%r9
+ adoxq %r11,%r10
+
+ mulxq 24(%rbp),%rbx,%r11
+ adcxq %rbx,%r10
+ adoxq %r12,%r11
+
+.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
+ movq %rdx,%rax
+ movq %r8,%rdx
+ adcxq %rbx,%r11
+ adoxq %r13,%r12
+
+ mulxq 128+8(%rsp),%rbx,%rdx
+ movq %rax,%rdx
+
+ mulxq 40(%rbp),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
+ adcxq %rax,%r13
+ adoxq %r15,%r14
+
+ mulxq 56(%rbp),%rax,%r15
+ movq %rbx,%rdx
+ adcxq %rax,%r14
+ adoxq %rsi,%r15
+ adcxq %rsi,%r15
+
+ decl %ecx
+ jne .Lreduction_loopx
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_reducex,.-__rsaz_512_reducex
+.type __rsaz_512_subtract, at function
+.align 32
+__rsaz_512_subtract:
+ movq %r8,(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r12,32(%rdi)
+ movq %r13,40(%rdi)
+ movq %r14,48(%rdi)
+ movq %r15,56(%rdi)
+
+ movq 0(%rbp),%r8
+ movq 8(%rbp),%r9
+ negq %r8
+ notq %r9
+ andq %rcx,%r8
+ movq 16(%rbp),%r10
+ andq %rcx,%r9
+ notq %r10
+ movq 24(%rbp),%r11
+ andq %rcx,%r10
+ notq %r11
+ movq 32(%rbp),%r12
+ andq %rcx,%r11
+ notq %r12
+ movq 40(%rbp),%r13
+ andq %rcx,%r12
+ notq %r13
+ movq 48(%rbp),%r14
+ andq %rcx,%r13
+ notq %r14
+ movq 56(%rbp),%r15
+ andq %rcx,%r14
+ notq %r15
+ andq %rcx,%r15
+
+ addq (%rdi),%r8
+ adcq 8(%rdi),%r9
+ adcq 16(%rdi),%r10
+ adcq 24(%rdi),%r11
+ adcq 32(%rdi),%r12
+ adcq 40(%rdi),%r13
+ adcq 48(%rdi),%r14
+ adcq 56(%rdi),%r15
+
+ movq %r8,(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r12,32(%rdi)
+ movq %r13,40(%rdi)
+ movq %r14,48(%rdi)
+ movq %r15,56(%rdi)
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_subtract,.-__rsaz_512_subtract
+.type __rsaz_512_mul, at function
+.align 32
+__rsaz_512_mul:
+ leaq 8(%rsp),%rdi
+
+ movq (%rsi),%rax
+ mulq %rbx
+ movq %rax,(%rdi)
+ movq 8(%rsi),%rax
+ movq %rdx,%r8
+
+ mulq %rbx
+ addq %rax,%r8
+ movq 16(%rsi),%rax
+ movq %rdx,%r9
+ adcq $0,%r9
+
+ mulq %rbx
+ addq %rax,%r9
+ movq 24(%rsi),%rax
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ mulq %rbx
+ addq %rax,%r10
+ movq 32(%rsi),%rax
+ movq %rdx,%r11
+ adcq $0,%r11
+
+ mulq %rbx
+ addq %rax,%r11
+ movq 40(%rsi),%rax
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ mulq %rbx
+ addq %rax,%r12
+ movq 48(%rsi),%rax
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ mulq %rbx
+ addq %rax,%r13
+ movq 56(%rsi),%rax
+ movq %rdx,%r14
+ adcq $0,%r14
+
+ mulq %rbx
+ addq %rax,%r14
+ movq (%rsi),%rax
+ movq %rdx,%r15
+ adcq $0,%r15
+
+ leaq 8(%rbp),%rbp
+ leaq 8(%rdi),%rdi
+
+ movl $7,%ecx
+ jmp .Loop_mul
+
+.align 32
+.Loop_mul:
+ movq (%rbp),%rbx
+ mulq %rbx
+ addq %rax,%r8
+ movq 8(%rsi),%rax
+ movq %r8,(%rdi)
+ movq %rdx,%r8
+ adcq $0,%r8
+
+ mulq %rbx
+ addq %rax,%r9
+ movq 16(%rsi),%rax
+ adcq $0,%rdx
+ addq %r9,%r8
+ movq %rdx,%r9
+ adcq $0,%r9
+
+ mulq %rbx
+ addq %rax,%r10
+ movq 24(%rsi),%rax
+ adcq $0,%rdx
+ addq %r10,%r9
+ movq %rdx,%r10
+ adcq $0,%r10
+
+ mulq %rbx
+ addq %rax,%r11
+ movq 32(%rsi),%rax
+ adcq $0,%rdx
+ addq %r11,%r10
+ movq %rdx,%r11
+ adcq $0,%r11
+
+ mulq %rbx
+ addq %rax,%r12
+ movq 40(%rsi),%rax
+ adcq $0,%rdx
+ addq %r12,%r11
+ movq %rdx,%r12
+ adcq $0,%r12
+
+ mulq %rbx
+ addq %rax,%r13
+ movq 48(%rsi),%rax
+ adcq $0,%rdx
+ addq %r13,%r12
+ movq %rdx,%r13
+ adcq $0,%r13
+
+ mulq %rbx
+ addq %rax,%r14
+ movq 56(%rsi),%rax
+ adcq $0,%rdx
+ addq %r14,%r13
+ movq %rdx,%r14
+ leaq 8(%rbp),%rbp
+ adcq $0,%r14
+
+ mulq %rbx
+ addq %rax,%r15
+ movq (%rsi),%rax
+ adcq $0,%rdx
+ addq %r15,%r14
+ movq %rdx,%r15
+ adcq $0,%r15
+
+ leaq 8(%rdi),%rdi
+
+ decl %ecx
+ jnz .Loop_mul
+
+ movq %r8,(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r12,32(%rdi)
+ movq %r13,40(%rdi)
+ movq %r14,48(%rdi)
+ movq %r15,56(%rdi)
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_mul,.-__rsaz_512_mul
+.type __rsaz_512_mulx, at function
+.align 32
+__rsaz_512_mulx:
+ mulxq (%rsi),%rbx,%r8
+ movq $-6,%rcx
+
+ mulxq 8(%rsi),%rax,%r9
+ movq %rbx,8(%rsp)
+
+ mulxq 16(%rsi),%rbx,%r10
+ adcq %rax,%r8
+
+ mulxq 24(%rsi),%rax,%r11
+ adcq %rbx,%r9
+
+ mulxq 32(%rsi),%rbx,%r12
+ adcq %rax,%r10
+
+ mulxq 40(%rsi),%rax,%r13
+ adcq %rbx,%r11
+
+ mulxq 48(%rsi),%rbx,%r14
+ adcq %rax,%r12
+
+ mulxq 56(%rsi),%rax,%r15
+ movq 8(%rbp),%rdx
+ adcq %rbx,%r13
+ adcq %rax,%r14
+ adcq $0,%r15
+
+ xorq %rdi,%rdi
+ jmp .Loop_mulx
+
+.align 32
+.Loop_mulx:
+ movq %r8,%rbx
+ mulxq (%rsi),%rax,%r8
+ adcxq %rax,%rbx
+ adoxq %r9,%r8
+
+ mulxq 8(%rsi),%rax,%r9
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+ mulxq 16(%rsi),%rax,%r10
+ adcxq %rax,%r9
+ adoxq %r11,%r10
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rax,%r10
+ adoxq %r12,%r11
+
+.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
+ adcxq %rax,%r11
+ adoxq %r13,%r12
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+ mulxq 48(%rsi),%rax,%r14
+ adcxq %rax,%r13
+ adoxq %r15,%r14
+
+ mulxq 56(%rsi),%rax,%r15
+ movq 64(%rbp,%rcx,8),%rdx
+ movq %rbx,8+64-8(%rsp,%rcx,8)
+ adcxq %rax,%r14
+ adoxq %rdi,%r15
+ adcxq %rdi,%r15
+
+ incq %rcx
+ jnz .Loop_mulx
+
+ movq %r8,%rbx
+ mulxq (%rsi),%rax,%r8
+ adcxq %rax,%rbx
+ adoxq %r9,%r8
+
+.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
+ adcxq %rax,%r9
+ adoxq %r11,%r10
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rax,%r10
+ adoxq %r12,%r11
+
+ mulxq 32(%rsi),%rax,%r12
+ adcxq %rax,%r11
+ adoxq %r13,%r12
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
+ adcxq %rax,%r13
+ adoxq %r15,%r14
+
+.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
+ adcxq %rax,%r14
+ adoxq %rdi,%r15
+ adcxq %rdi,%r15
+
+ movq %rbx,8+64-8(%rsp)
+ movq %r8,8+64(%rsp)
+ movq %r9,8+64+8(%rsp)
+ movq %r10,8+64+16(%rsp)
+ movq %r11,8+64+24(%rsp)
+ movq %r12,8+64+32(%rsp)
+ movq %r13,8+64+40(%rsp)
+ movq %r14,8+64+48(%rsp)
+ movq %r15,8+64+56(%rsp)
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_mulx,.-__rsaz_512_mulx
+.globl rsaz_512_scatter4
+.type rsaz_512_scatter4, at function
+.align 16
+rsaz_512_scatter4:
+ leaq (%rdi,%rdx,8),%rdi
+ movl $8,%r9d
+ jmp .Loop_scatter
+.align 16
+.Loop_scatter:
+ movq (%rsi),%rax
+ leaq 8(%rsi),%rsi
+ movq %rax,(%rdi)
+ leaq 128(%rdi),%rdi
+ decl %r9d
+ jnz .Loop_scatter
+ .byte 0xf3,0xc3
+.size rsaz_512_scatter4,.-rsaz_512_scatter4
+
+.globl rsaz_512_gather4
+.type rsaz_512_gather4, at function
+.align 16
+rsaz_512_gather4:
+ movd %edx,%xmm8
+ movdqa .Linc+16(%rip),%xmm1
+ movdqa .Linc(%rip),%xmm0
+
+ pshufd $0,%xmm8,%xmm8
+ movdqa %xmm1,%xmm7
+ movdqa %xmm1,%xmm2
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm8,%xmm0
+ movdqa %xmm7,%xmm3
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm8,%xmm1
+ movdqa %xmm7,%xmm4
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm8,%xmm2
+ movdqa %xmm7,%xmm5
+ paddd %xmm3,%xmm4
+ pcmpeqd %xmm8,%xmm3
+ movdqa %xmm7,%xmm6
+ paddd %xmm4,%xmm5
+ pcmpeqd %xmm8,%xmm4
+ paddd %xmm5,%xmm6
+ pcmpeqd %xmm8,%xmm5
+ paddd %xmm6,%xmm7
+ pcmpeqd %xmm8,%xmm6
+ pcmpeqd %xmm8,%xmm7
+ movl $8,%r9d
+ jmp .Loop_gather
+.align 16
+.Loop_gather:
+ movdqa 0(%rsi),%xmm8
+ movdqa 16(%rsi),%xmm9
+ movdqa 32(%rsi),%xmm10
+ movdqa 48(%rsi),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rsi),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rsi),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rsi),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rsi),%xmm15
+ leaq 128(%rsi),%rsi
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+ movq %xmm8,(%rdi)
+ leaq 8(%rdi),%rdi
+ decl %r9d
+ jnz .Loop_gather
+ .byte 0xf3,0xc3
+.LSEH_end_rsaz_512_gather4:
+.size rsaz_512_gather4,.-rsaz_512_gather4
+
+.align 64
+.Linc:
+.long 0,0, 1,1
+.long 2,2, 2,2
Property changes on: trunk/secure/lib/libcrypto/amd64/rsaz-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,7224 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from sha1-mb-x86_64.pl. */
+.text
+
+
+
+.globl sha1_multi_block
+.type sha1_multi_block, at function
+.align 32
+sha1_multi_block:
+ movq OPENSSL_ia32cap_P+4(%rip),%rcx
+ btq $61,%rcx
+ jc _shaext_shortcut
+ testl $268435456,%ecx
+ jnz _avx_shortcut
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+.Lbody:
+ leaq K_XX_XX(%rip),%rbp
+ leaq 256(%rsp),%rbx
+
+.Loop_grande:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz .Ldone
+
+ movdqu 0(%rdi),%xmm10
+ leaq 128(%rsp),%rax
+ movdqu 32(%rdi),%xmm11
+ movdqu 64(%rdi),%xmm12
+ movdqu 96(%rdi),%xmm13
+ movdqu 128(%rdi),%xmm14
+ movdqa 96(%rbp),%xmm5
+ movdqa -32(%rbp),%xmm15
+ jmp .Loop
+
+.align 32
+.Loop:
+ movd (%r8),%xmm0
+ leaq 64(%r8),%r8
+ movd (%r9),%xmm2
+ leaq 64(%r9),%r9
+ movd (%r10),%xmm3
+ leaq 64(%r10),%r10
+ movd (%r11),%xmm4
+ leaq 64(%r11),%r11
+ punpckldq %xmm3,%xmm0
+ movd -60(%r8),%xmm1
+ punpckldq %xmm4,%xmm2
+ movd -60(%r9),%xmm9
+ punpckldq %xmm2,%xmm0
+ movd -60(%r10),%xmm8
+.byte 102,15,56,0,197
+ movd -60(%r11),%xmm7
+ punpckldq %xmm8,%xmm1
+ movdqa %xmm10,%xmm8
+ paddd %xmm15,%xmm14
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm13,%xmm7
+ pand %xmm12,%xmm6
+ punpckldq %xmm9,%xmm1
+ movdqa %xmm10,%xmm9
+
+ movdqa %xmm0,0-128(%rax)
+ paddd %xmm0,%xmm14
+ movd -56(%r8),%xmm2
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm11,%xmm7
+
+ por %xmm9,%xmm8
+ movd -56(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+.byte 102,15,56,0,205
+ movd -56(%r10),%xmm8
+ por %xmm7,%xmm11
+ movd -56(%r11),%xmm7
+ punpckldq %xmm8,%xmm2
+ movdqa %xmm14,%xmm8
+ paddd %xmm15,%xmm13
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm12,%xmm7
+ pand %xmm11,%xmm6
+ punpckldq %xmm9,%xmm2
+ movdqa %xmm14,%xmm9
+
+ movdqa %xmm1,16-128(%rax)
+ paddd %xmm1,%xmm13
+ movd -52(%r8),%xmm3
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm10,%xmm7
+
+ por %xmm9,%xmm8
+ movd -52(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+.byte 102,15,56,0,213
+ movd -52(%r10),%xmm8
+ por %xmm7,%xmm10
+ movd -52(%r11),%xmm7
+ punpckldq %xmm8,%xmm3
+ movdqa %xmm13,%xmm8
+ paddd %xmm15,%xmm12
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm11,%xmm7
+ pand %xmm10,%xmm6
+ punpckldq %xmm9,%xmm3
+ movdqa %xmm13,%xmm9
+
+ movdqa %xmm2,32-128(%rax)
+ paddd %xmm2,%xmm12
+ movd -48(%r8),%xmm4
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm14,%xmm7
+
+ por %xmm9,%xmm8
+ movd -48(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+.byte 102,15,56,0,221
+ movd -48(%r10),%xmm8
+ por %xmm7,%xmm14
+ movd -48(%r11),%xmm7
+ punpckldq %xmm8,%xmm4
+ movdqa %xmm12,%xmm8
+ paddd %xmm15,%xmm11
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm10,%xmm7
+ pand %xmm14,%xmm6
+ punpckldq %xmm9,%xmm4
+ movdqa %xmm12,%xmm9
+
+ movdqa %xmm3,48-128(%rax)
+ paddd %xmm3,%xmm11
+ movd -44(%r8),%xmm0
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm13,%xmm7
+
+ por %xmm9,%xmm8
+ movd -44(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+.byte 102,15,56,0,229
+ movd -44(%r10),%xmm8
+ por %xmm7,%xmm13
+ movd -44(%r11),%xmm7
+ punpckldq %xmm8,%xmm0
+ movdqa %xmm11,%xmm8
+ paddd %xmm15,%xmm10
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm14,%xmm7
+ pand %xmm13,%xmm6
+ punpckldq %xmm9,%xmm0
+ movdqa %xmm11,%xmm9
+
+ movdqa %xmm4,64-128(%rax)
+ paddd %xmm4,%xmm10
+ movd -40(%r8),%xmm1
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm12,%xmm7
+
+ por %xmm9,%xmm8
+ movd -40(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+.byte 102,15,56,0,197
+ movd -40(%r10),%xmm8
+ por %xmm7,%xmm12
+ movd -40(%r11),%xmm7
+ punpckldq %xmm8,%xmm1
+ movdqa %xmm10,%xmm8
+ paddd %xmm15,%xmm14
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm13,%xmm7
+ pand %xmm12,%xmm6
+ punpckldq %xmm9,%xmm1
+ movdqa %xmm10,%xmm9
+
+ movdqa %xmm0,80-128(%rax)
+ paddd %xmm0,%xmm14
+ movd -36(%r8),%xmm2
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm11,%xmm7
+
+ por %xmm9,%xmm8
+ movd -36(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+.byte 102,15,56,0,205
+ movd -36(%r10),%xmm8
+ por %xmm7,%xmm11
+ movd -36(%r11),%xmm7
+ punpckldq %xmm8,%xmm2
+ movdqa %xmm14,%xmm8
+ paddd %xmm15,%xmm13
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm12,%xmm7
+ pand %xmm11,%xmm6
+ punpckldq %xmm9,%xmm2
+ movdqa %xmm14,%xmm9
+
+ movdqa %xmm1,96-128(%rax)
+ paddd %xmm1,%xmm13
+ movd -32(%r8),%xmm3
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm10,%xmm7
+
+ por %xmm9,%xmm8
+ movd -32(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+.byte 102,15,56,0,213
+ movd -32(%r10),%xmm8
+ por %xmm7,%xmm10
+ movd -32(%r11),%xmm7
+ punpckldq %xmm8,%xmm3
+ movdqa %xmm13,%xmm8
+ paddd %xmm15,%xmm12
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm11,%xmm7
+ pand %xmm10,%xmm6
+ punpckldq %xmm9,%xmm3
+ movdqa %xmm13,%xmm9
+
+ movdqa %xmm2,112-128(%rax)
+ paddd %xmm2,%xmm12
+ movd -28(%r8),%xmm4
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm14,%xmm7
+
+ por %xmm9,%xmm8
+ movd -28(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+.byte 102,15,56,0,221
+ movd -28(%r10),%xmm8
+ por %xmm7,%xmm14
+ movd -28(%r11),%xmm7
+ punpckldq %xmm8,%xmm4
+ movdqa %xmm12,%xmm8
+ paddd %xmm15,%xmm11
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm10,%xmm7
+ pand %xmm14,%xmm6
+ punpckldq %xmm9,%xmm4
+ movdqa %xmm12,%xmm9
+
+ movdqa %xmm3,128-128(%rax)
+ paddd %xmm3,%xmm11
+ movd -24(%r8),%xmm0
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm13,%xmm7
+
+ por %xmm9,%xmm8
+ movd -24(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+.byte 102,15,56,0,229
+ movd -24(%r10),%xmm8
+ por %xmm7,%xmm13
+ movd -24(%r11),%xmm7
+ punpckldq %xmm8,%xmm0
+ movdqa %xmm11,%xmm8
+ paddd %xmm15,%xmm10
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm14,%xmm7
+ pand %xmm13,%xmm6
+ punpckldq %xmm9,%xmm0
+ movdqa %xmm11,%xmm9
+
+ movdqa %xmm4,144-128(%rax)
+ paddd %xmm4,%xmm10
+ movd -20(%r8),%xmm1
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm12,%xmm7
+
+ por %xmm9,%xmm8
+ movd -20(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+.byte 102,15,56,0,197
+ movd -20(%r10),%xmm8
+ por %xmm7,%xmm12
+ movd -20(%r11),%xmm7
+ punpckldq %xmm8,%xmm1
+ movdqa %xmm10,%xmm8
+ paddd %xmm15,%xmm14
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm13,%xmm7
+ pand %xmm12,%xmm6
+ punpckldq %xmm9,%xmm1
+ movdqa %xmm10,%xmm9
+
+ movdqa %xmm0,160-128(%rax)
+ paddd %xmm0,%xmm14
+ movd -16(%r8),%xmm2
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm11,%xmm7
+
+ por %xmm9,%xmm8
+ movd -16(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+.byte 102,15,56,0,205
+ movd -16(%r10),%xmm8
+ por %xmm7,%xmm11
+ movd -16(%r11),%xmm7
+ punpckldq %xmm8,%xmm2
+ movdqa %xmm14,%xmm8
+ paddd %xmm15,%xmm13
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm12,%xmm7
+ pand %xmm11,%xmm6
+ punpckldq %xmm9,%xmm2
+ movdqa %xmm14,%xmm9
+
+ movdqa %xmm1,176-128(%rax)
+ paddd %xmm1,%xmm13
+ movd -12(%r8),%xmm3
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm10,%xmm7
+
+ por %xmm9,%xmm8
+ movd -12(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+.byte 102,15,56,0,213
+ movd -12(%r10),%xmm8
+ por %xmm7,%xmm10
+ movd -12(%r11),%xmm7
+ punpckldq %xmm8,%xmm3
+ movdqa %xmm13,%xmm8
+ paddd %xmm15,%xmm12
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm11,%xmm7
+ pand %xmm10,%xmm6
+ punpckldq %xmm9,%xmm3
+ movdqa %xmm13,%xmm9
+
+ movdqa %xmm2,192-128(%rax)
+ paddd %xmm2,%xmm12
+ movd -8(%r8),%xmm4
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm14,%xmm7
+
+ por %xmm9,%xmm8
+ movd -8(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+.byte 102,15,56,0,221
+ movd -8(%r10),%xmm8
+ por %xmm7,%xmm14
+ movd -8(%r11),%xmm7
+ punpckldq %xmm8,%xmm4
+ movdqa %xmm12,%xmm8
+ paddd %xmm15,%xmm11
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm6
+ pslld $5,%xmm8
+ pandn %xmm10,%xmm7
+ pand %xmm14,%xmm6
+ punpckldq %xmm9,%xmm4
+ movdqa %xmm12,%xmm9
+
+ movdqa %xmm3,208-128(%rax)
+ paddd %xmm3,%xmm11
+ movd -4(%r8),%xmm0
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm13,%xmm7
+
+ por %xmm9,%xmm8
+ movd -4(%r9),%xmm9
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+.byte 102,15,56,0,229
+ movd -4(%r10),%xmm8
+ por %xmm7,%xmm13
+ movdqa 0-128(%rax),%xmm1
+ movd -4(%r11),%xmm7
+ punpckldq %xmm8,%xmm0
+ movdqa %xmm11,%xmm8
+ paddd %xmm15,%xmm10
+ punpckldq %xmm7,%xmm9
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm6
+ pslld $5,%xmm8
+ prefetcht0 63(%r8)
+ pandn %xmm14,%xmm7
+ pand %xmm13,%xmm6
+ punpckldq %xmm9,%xmm0
+ movdqa %xmm11,%xmm9
+
+ movdqa %xmm4,224-128(%rax)
+ paddd %xmm4,%xmm10
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+ movdqa %xmm12,%xmm7
+ prefetcht0 63(%r9)
+
+ por %xmm9,%xmm8
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm10
+ prefetcht0 63(%r10)
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+.byte 102,15,56,0,197
+ prefetcht0 63(%r11)
+ por %xmm7,%xmm12
+ movdqa 16-128(%rax),%xmm2
+ pxor %xmm3,%xmm1
+ movdqa 32-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ pxor 128-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ movdqa %xmm11,%xmm7
+ pslld $5,%xmm8
+ pxor %xmm3,%xmm1
+ movdqa %xmm11,%xmm6
+ pandn %xmm13,%xmm7
+ movdqa %xmm1,%xmm5
+ pand %xmm12,%xmm6
+ movdqa %xmm10,%xmm9
+ psrld $31,%xmm5
+ paddd %xmm1,%xmm1
+
+ movdqa %xmm0,240-128(%rax)
+ paddd %xmm0,%xmm14
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+
+ movdqa %xmm11,%xmm7
+ por %xmm9,%xmm8
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 48-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ pxor 144-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ movdqa %xmm10,%xmm7
+ pslld $5,%xmm8
+ pxor %xmm4,%xmm2
+ movdqa %xmm10,%xmm6
+ pandn %xmm12,%xmm7
+ movdqa %xmm2,%xmm5
+ pand %xmm11,%xmm6
+ movdqa %xmm14,%xmm9
+ psrld $31,%xmm5
+ paddd %xmm2,%xmm2
+
+ movdqa %xmm1,0-128(%rax)
+ paddd %xmm1,%xmm13
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+
+ movdqa %xmm10,%xmm7
+ por %xmm9,%xmm8
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 64-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ pxor 160-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ movdqa %xmm14,%xmm7
+ pslld $5,%xmm8
+ pxor %xmm0,%xmm3
+ movdqa %xmm14,%xmm6
+ pandn %xmm11,%xmm7
+ movdqa %xmm3,%xmm5
+ pand %xmm10,%xmm6
+ movdqa %xmm13,%xmm9
+ psrld $31,%xmm5
+ paddd %xmm3,%xmm3
+
+ movdqa %xmm2,16-128(%rax)
+ paddd %xmm2,%xmm12
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+
+ movdqa %xmm14,%xmm7
+ por %xmm9,%xmm8
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 80-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ pxor 176-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ movdqa %xmm13,%xmm7
+ pslld $5,%xmm8
+ pxor %xmm1,%xmm4
+ movdqa %xmm13,%xmm6
+ pandn %xmm10,%xmm7
+ movdqa %xmm4,%xmm5
+ pand %xmm14,%xmm6
+ movdqa %xmm12,%xmm9
+ psrld $31,%xmm5
+ paddd %xmm4,%xmm4
+
+ movdqa %xmm3,32-128(%rax)
+ paddd %xmm3,%xmm11
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+
+ movdqa %xmm13,%xmm7
+ por %xmm9,%xmm8
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 96-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ pxor 192-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ movdqa %xmm12,%xmm7
+ pslld $5,%xmm8
+ pxor %xmm2,%xmm0
+ movdqa %xmm12,%xmm6
+ pandn %xmm14,%xmm7
+ movdqa %xmm0,%xmm5
+ pand %xmm13,%xmm6
+ movdqa %xmm11,%xmm9
+ psrld $31,%xmm5
+ paddd %xmm0,%xmm0
+
+ movdqa %xmm4,48-128(%rax)
+ paddd %xmm4,%xmm10
+ psrld $27,%xmm9
+ pxor %xmm7,%xmm6
+
+ movdqa %xmm12,%xmm7
+ por %xmm9,%xmm8
+ pslld $30,%xmm7
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ movdqa 0(%rbp),%xmm15
+ pxor %xmm3,%xmm1
+ movdqa 112-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 208-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,64-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 128-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 224-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,80-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 144-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 240-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ movdqa %xmm2,96-128(%rax)
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 160-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 0-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ movdqa %xmm3,112-128(%rax)
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 176-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 16-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ movdqa %xmm4,128-128(%rax)
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 192-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 32-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,144-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 208-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 48-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,160-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 224-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 64-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ movdqa %xmm2,176-128(%rax)
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 240-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 80-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ movdqa %xmm3,192-128(%rax)
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 0-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 96-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ movdqa %xmm4,208-128(%rax)
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 16-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 112-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,224-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 32-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 128-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,240-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 48-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 144-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ movdqa %xmm2,0-128(%rax)
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 64-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 160-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ movdqa %xmm3,16-128(%rax)
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 80-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 176-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ movdqa %xmm4,32-128(%rax)
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 96-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 192-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,48-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 112-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 208-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,64-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 128-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 224-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ movdqa %xmm2,80-128(%rax)
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 144-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 240-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ movdqa %xmm3,96-128(%rax)
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 160-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 0-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ movdqa %xmm4,112-128(%rax)
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm3,%xmm1
+ movdqa 176-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm7
+ pxor 16-128(%rax),%xmm1
+ pxor %xmm3,%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ movdqa %xmm10,%xmm9
+ pand %xmm12,%xmm7
+
+ movdqa %xmm13,%xmm6
+ movdqa %xmm1,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm14
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm0,128-128(%rax)
+ paddd %xmm0,%xmm14
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm11,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm1,%xmm1
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 192-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm7
+ pxor 32-128(%rax),%xmm2
+ pxor %xmm4,%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ movdqa %xmm14,%xmm9
+ pand %xmm11,%xmm7
+
+ movdqa %xmm12,%xmm6
+ movdqa %xmm2,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm13
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm1,144-128(%rax)
+ paddd %xmm1,%xmm13
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm10,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm2,%xmm2
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 208-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm7
+ pxor 48-128(%rax),%xmm3
+ pxor %xmm0,%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ movdqa %xmm13,%xmm9
+ pand %xmm10,%xmm7
+
+ movdqa %xmm11,%xmm6
+ movdqa %xmm3,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm12
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm2,160-128(%rax)
+ paddd %xmm2,%xmm12
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm14,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm3,%xmm3
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 224-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm7
+ pxor 64-128(%rax),%xmm4
+ pxor %xmm1,%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ movdqa %xmm12,%xmm9
+ pand %xmm14,%xmm7
+
+ movdqa %xmm10,%xmm6
+ movdqa %xmm4,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm11
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm3,176-128(%rax)
+ paddd %xmm3,%xmm11
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm13,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm4,%xmm4
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 240-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm7
+ pxor 80-128(%rax),%xmm0
+ pxor %xmm2,%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ movdqa %xmm11,%xmm9
+ pand %xmm13,%xmm7
+
+ movdqa %xmm14,%xmm6
+ movdqa %xmm0,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm10
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm4,192-128(%rax)
+ paddd %xmm4,%xmm10
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm12,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm0,%xmm0
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 0-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm7
+ pxor 96-128(%rax),%xmm1
+ pxor %xmm3,%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ movdqa %xmm10,%xmm9
+ pand %xmm12,%xmm7
+
+ movdqa %xmm13,%xmm6
+ movdqa %xmm1,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm14
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm0,208-128(%rax)
+ paddd %xmm0,%xmm14
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm11,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm1,%xmm1
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 16-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm7
+ pxor 112-128(%rax),%xmm2
+ pxor %xmm4,%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ movdqa %xmm14,%xmm9
+ pand %xmm11,%xmm7
+
+ movdqa %xmm12,%xmm6
+ movdqa %xmm2,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm13
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm1,224-128(%rax)
+ paddd %xmm1,%xmm13
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm10,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm2,%xmm2
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 32-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm7
+ pxor 128-128(%rax),%xmm3
+ pxor %xmm0,%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ movdqa %xmm13,%xmm9
+ pand %xmm10,%xmm7
+
+ movdqa %xmm11,%xmm6
+ movdqa %xmm3,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm12
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm2,240-128(%rax)
+ paddd %xmm2,%xmm12
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm14,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm3,%xmm3
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 48-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm7
+ pxor 144-128(%rax),%xmm4
+ pxor %xmm1,%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ movdqa %xmm12,%xmm9
+ pand %xmm14,%xmm7
+
+ movdqa %xmm10,%xmm6
+ movdqa %xmm4,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm11
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm3,0-128(%rax)
+ paddd %xmm3,%xmm11
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm13,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm4,%xmm4
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 64-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm7
+ pxor 160-128(%rax),%xmm0
+ pxor %xmm2,%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ movdqa %xmm11,%xmm9
+ pand %xmm13,%xmm7
+
+ movdqa %xmm14,%xmm6
+ movdqa %xmm0,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm10
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm4,16-128(%rax)
+ paddd %xmm4,%xmm10
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm12,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm0,%xmm0
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 80-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm7
+ pxor 176-128(%rax),%xmm1
+ pxor %xmm3,%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ movdqa %xmm10,%xmm9
+ pand %xmm12,%xmm7
+
+ movdqa %xmm13,%xmm6
+ movdqa %xmm1,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm14
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm0,32-128(%rax)
+ paddd %xmm0,%xmm14
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm11,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm1,%xmm1
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 96-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm7
+ pxor 192-128(%rax),%xmm2
+ pxor %xmm4,%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ movdqa %xmm14,%xmm9
+ pand %xmm11,%xmm7
+
+ movdqa %xmm12,%xmm6
+ movdqa %xmm2,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm13
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm1,48-128(%rax)
+ paddd %xmm1,%xmm13
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm10,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm2,%xmm2
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 112-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm7
+ pxor 208-128(%rax),%xmm3
+ pxor %xmm0,%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ movdqa %xmm13,%xmm9
+ pand %xmm10,%xmm7
+
+ movdqa %xmm11,%xmm6
+ movdqa %xmm3,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm12
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm2,64-128(%rax)
+ paddd %xmm2,%xmm12
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm14,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm3,%xmm3
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 128-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm7
+ pxor 224-128(%rax),%xmm4
+ pxor %xmm1,%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ movdqa %xmm12,%xmm9
+ pand %xmm14,%xmm7
+
+ movdqa %xmm10,%xmm6
+ movdqa %xmm4,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm11
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm3,80-128(%rax)
+ paddd %xmm3,%xmm11
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm13,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm4,%xmm4
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 144-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm7
+ pxor 240-128(%rax),%xmm0
+ pxor %xmm2,%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ movdqa %xmm11,%xmm9
+ pand %xmm13,%xmm7
+
+ movdqa %xmm14,%xmm6
+ movdqa %xmm0,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm10
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm4,96-128(%rax)
+ paddd %xmm4,%xmm10
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm12,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm0,%xmm0
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 160-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm7
+ pxor 0-128(%rax),%xmm1
+ pxor %xmm3,%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ movdqa %xmm10,%xmm9
+ pand %xmm12,%xmm7
+
+ movdqa %xmm13,%xmm6
+ movdqa %xmm1,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm14
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm0,112-128(%rax)
+ paddd %xmm0,%xmm14
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm11,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm1,%xmm1
+ paddd %xmm6,%xmm14
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 176-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm7
+ pxor 16-128(%rax),%xmm2
+ pxor %xmm4,%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ movdqa %xmm14,%xmm9
+ pand %xmm11,%xmm7
+
+ movdqa %xmm12,%xmm6
+ movdqa %xmm2,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm13
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm1,128-128(%rax)
+ paddd %xmm1,%xmm13
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm10,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm2,%xmm2
+ paddd %xmm6,%xmm13
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 192-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm7
+ pxor 32-128(%rax),%xmm3
+ pxor %xmm0,%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ movdqa %xmm13,%xmm9
+ pand %xmm10,%xmm7
+
+ movdqa %xmm11,%xmm6
+ movdqa %xmm3,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm12
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm2,144-128(%rax)
+ paddd %xmm2,%xmm12
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm14,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm3,%xmm3
+ paddd %xmm6,%xmm12
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 208-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm7
+ pxor 48-128(%rax),%xmm4
+ pxor %xmm1,%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ movdqa %xmm12,%xmm9
+ pand %xmm14,%xmm7
+
+ movdqa %xmm10,%xmm6
+ movdqa %xmm4,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm11
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm3,160-128(%rax)
+ paddd %xmm3,%xmm11
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm13,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm4,%xmm4
+ paddd %xmm6,%xmm11
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 224-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm7
+ pxor 64-128(%rax),%xmm0
+ pxor %xmm2,%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ movdqa %xmm11,%xmm9
+ pand %xmm13,%xmm7
+
+ movdqa %xmm14,%xmm6
+ movdqa %xmm0,%xmm5
+ psrld $27,%xmm9
+ paddd %xmm7,%xmm10
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm4,176-128(%rax)
+ paddd %xmm4,%xmm10
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ pand %xmm12,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ paddd %xmm0,%xmm0
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ movdqa 64(%rbp),%xmm15
+ pxor %xmm3,%xmm1
+ movdqa 240-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 80-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,192-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 0-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 96-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,208-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 16-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 112-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ movdqa %xmm2,224-128(%rax)
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 32-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 128-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ movdqa %xmm3,240-128(%rax)
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 48-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 144-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ movdqa %xmm4,0-128(%rax)
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 64-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 160-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,16-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 80-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 176-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,32-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 96-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 192-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ movdqa %xmm2,48-128(%rax)
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 112-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 208-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ movdqa %xmm3,64-128(%rax)
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 128-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 224-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ movdqa %xmm4,80-128(%rax)
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 144-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 240-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ movdqa %xmm0,96-128(%rax)
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 160-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 0-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ movdqa %xmm1,112-128(%rax)
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 176-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 16-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 192-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 32-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ pxor %xmm2,%xmm0
+ movdqa 208-128(%rax),%xmm2
+
+ movdqa %xmm11,%xmm8
+ movdqa %xmm14,%xmm6
+ pxor 48-128(%rax),%xmm0
+ paddd %xmm15,%xmm10
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ paddd %xmm4,%xmm10
+ pxor %xmm2,%xmm0
+ psrld $27,%xmm9
+ pxor %xmm13,%xmm6
+ movdqa %xmm12,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm0,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm10
+ paddd %xmm0,%xmm0
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm5,%xmm0
+ por %xmm7,%xmm12
+ pxor %xmm3,%xmm1
+ movdqa 224-128(%rax),%xmm3
+
+ movdqa %xmm10,%xmm8
+ movdqa %xmm13,%xmm6
+ pxor 64-128(%rax),%xmm1
+ paddd %xmm15,%xmm14
+ pslld $5,%xmm8
+ pxor %xmm11,%xmm6
+
+ movdqa %xmm10,%xmm9
+ paddd %xmm0,%xmm14
+ pxor %xmm3,%xmm1
+ psrld $27,%xmm9
+ pxor %xmm12,%xmm6
+ movdqa %xmm11,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm1,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm14
+ paddd %xmm1,%xmm1
+
+ psrld $2,%xmm11
+ paddd %xmm8,%xmm14
+ por %xmm5,%xmm1
+ por %xmm7,%xmm11
+ pxor %xmm4,%xmm2
+ movdqa 240-128(%rax),%xmm4
+
+ movdqa %xmm14,%xmm8
+ movdqa %xmm12,%xmm6
+ pxor 80-128(%rax),%xmm2
+ paddd %xmm15,%xmm13
+ pslld $5,%xmm8
+ pxor %xmm10,%xmm6
+
+ movdqa %xmm14,%xmm9
+ paddd %xmm1,%xmm13
+ pxor %xmm4,%xmm2
+ psrld $27,%xmm9
+ pxor %xmm11,%xmm6
+ movdqa %xmm10,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm2,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm13
+ paddd %xmm2,%xmm2
+
+ psrld $2,%xmm10
+ paddd %xmm8,%xmm13
+ por %xmm5,%xmm2
+ por %xmm7,%xmm10
+ pxor %xmm0,%xmm3
+ movdqa 0-128(%rax),%xmm0
+
+ movdqa %xmm13,%xmm8
+ movdqa %xmm11,%xmm6
+ pxor 96-128(%rax),%xmm3
+ paddd %xmm15,%xmm12
+ pslld $5,%xmm8
+ pxor %xmm14,%xmm6
+
+ movdqa %xmm13,%xmm9
+ paddd %xmm2,%xmm12
+ pxor %xmm0,%xmm3
+ psrld $27,%xmm9
+ pxor %xmm10,%xmm6
+ movdqa %xmm14,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm3,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm12
+ paddd %xmm3,%xmm3
+
+ psrld $2,%xmm14
+ paddd %xmm8,%xmm12
+ por %xmm5,%xmm3
+ por %xmm7,%xmm14
+ pxor %xmm1,%xmm4
+ movdqa 16-128(%rax),%xmm1
+
+ movdqa %xmm12,%xmm8
+ movdqa %xmm10,%xmm6
+ pxor 112-128(%rax),%xmm4
+ paddd %xmm15,%xmm11
+ pslld $5,%xmm8
+ pxor %xmm13,%xmm6
+
+ movdqa %xmm12,%xmm9
+ paddd %xmm3,%xmm11
+ pxor %xmm1,%xmm4
+ psrld $27,%xmm9
+ pxor %xmm14,%xmm6
+ movdqa %xmm13,%xmm7
+
+ pslld $30,%xmm7
+ movdqa %xmm4,%xmm5
+ por %xmm9,%xmm8
+ psrld $31,%xmm5
+ paddd %xmm6,%xmm11
+ paddd %xmm4,%xmm4
+
+ psrld $2,%xmm13
+ paddd %xmm8,%xmm11
+ por %xmm5,%xmm4
+ por %xmm7,%xmm13
+ movdqa %xmm11,%xmm8
+ paddd %xmm15,%xmm10
+ movdqa %xmm14,%xmm6
+ pslld $5,%xmm8
+ pxor %xmm12,%xmm6
+
+ movdqa %xmm11,%xmm9
+ paddd %xmm4,%xmm10
+ psrld $27,%xmm9
+ movdqa %xmm12,%xmm7
+ pxor %xmm13,%xmm6
+
+ pslld $30,%xmm7
+ por %xmm9,%xmm8
+ paddd %xmm6,%xmm10
+
+ psrld $2,%xmm12
+ paddd %xmm8,%xmm10
+ por %xmm7,%xmm12
+ movdqa (%rbx),%xmm0
+ movl $1,%ecx
+ cmpl 0(%rbx),%ecx
+ pxor %xmm8,%xmm8
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ movdqa %xmm0,%xmm1
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ pcmpgtd %xmm8,%xmm1
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ paddd %xmm1,%xmm0
+ cmovgeq %rbp,%r11
+
+ movdqu 0(%rdi),%xmm6
+ pand %xmm1,%xmm10
+ movdqu 32(%rdi),%xmm7
+ pand %xmm1,%xmm11
+ paddd %xmm6,%xmm10
+ movdqu 64(%rdi),%xmm8
+ pand %xmm1,%xmm12
+ paddd %xmm7,%xmm11
+ movdqu 96(%rdi),%xmm9
+ pand %xmm1,%xmm13
+ paddd %xmm8,%xmm12
+ movdqu 128(%rdi),%xmm5
+ pand %xmm1,%xmm14
+ movdqu %xmm10,0(%rdi)
+ paddd %xmm9,%xmm13
+ movdqu %xmm11,32(%rdi)
+ paddd %xmm5,%xmm14
+ movdqu %xmm12,64(%rdi)
+ movdqu %xmm13,96(%rdi)
+ movdqu %xmm14,128(%rdi)
+
+ movdqa %xmm0,(%rbx)
+ movdqa 96(%rbp),%xmm5
+ movdqa -32(%rbp),%xmm15
+ decl %edx
+ jnz .Loop
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande
+
+.Ldone:
+ movq 272(%rsp),%rax
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha1_multi_block,.-sha1_multi_block
+.type sha1_multi_block_shaext, at function
+.align 32
+sha1_multi_block_shaext:
+_shaext_shortcut:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ shll $1,%edx
+ andq $-256,%rsp
+ leaq 64(%rdi),%rdi
+ movq %rax,272(%rsp)
+.Lbody_shaext:
+ leaq 256(%rsp),%rbx
+ movdqa K_XX_XX+128(%rip),%xmm3
+
+.Loop_grande_shaext:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rsp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rsp,%r9
+ testl %edx,%edx
+ jz .Ldone_shaext
+
+ movq 0-64(%rdi),%xmm0
+ movq 32-64(%rdi),%xmm4
+ movq 64-64(%rdi),%xmm5
+ movq 96-64(%rdi),%xmm6
+ movq 128-64(%rdi),%xmm7
+
+ punpckldq %xmm4,%xmm0
+ punpckldq %xmm6,%xmm5
+
+ movdqa %xmm0,%xmm8
+ punpcklqdq %xmm5,%xmm0
+ punpckhqdq %xmm5,%xmm8
+
+ pshufd $63,%xmm7,%xmm1
+ pshufd $127,%xmm7,%xmm9
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm8,%xmm8
+ jmp .Loop_shaext
+
+.align 32
+.Loop_shaext:
+ movdqu 0(%r8),%xmm4
+ movdqu 0(%r9),%xmm11
+ movdqu 16(%r8),%xmm5
+ movdqu 16(%r9),%xmm12
+ movdqu 32(%r8),%xmm6
+.byte 102,15,56,0,227
+ movdqu 32(%r9),%xmm13
+.byte 102,68,15,56,0,219
+ movdqu 48(%r8),%xmm7
+ leaq 64(%r8),%r8
+.byte 102,15,56,0,235
+ movdqu 48(%r9),%xmm14
+ leaq 64(%r9),%r9
+.byte 102,68,15,56,0,227
+
+ movdqa %xmm1,80(%rsp)
+ paddd %xmm4,%xmm1
+ movdqa %xmm9,112(%rsp)
+ paddd %xmm11,%xmm9
+ movdqa %xmm0,64(%rsp)
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,96(%rsp)
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+.byte 69,15,58,204,193,0
+.byte 69,15,56,200,212
+.byte 102,15,56,0,243
+ prefetcht0 127(%r8)
+.byte 15,56,201,229
+.byte 102,68,15,56,0,235
+ prefetcht0 127(%r9)
+.byte 69,15,56,201,220
+
+.byte 102,15,56,0,251
+ movdqa %xmm0,%xmm1
+.byte 102,68,15,56,0,243
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,0
+.byte 15,56,200,206
+.byte 69,15,58,204,194,0
+.byte 69,15,56,200,205
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+ pxor %xmm13,%xmm11
+.byte 69,15,56,201,229
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,0
+.byte 15,56,200,215
+.byte 69,15,58,204,193,0
+.byte 69,15,56,200,214
+.byte 15,56,202,231
+.byte 69,15,56,202,222
+ pxor %xmm7,%xmm5
+.byte 15,56,201,247
+ pxor %xmm14,%xmm12
+.byte 69,15,56,201,238
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,0
+.byte 15,56,200,204
+.byte 69,15,58,204,194,0
+.byte 69,15,56,200,203
+.byte 15,56,202,236
+.byte 69,15,56,202,227
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+ pxor %xmm11,%xmm13
+.byte 69,15,56,201,243
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+.byte 69,15,58,204,193,0
+.byte 69,15,56,200,212
+.byte 15,56,202,245
+.byte 69,15,56,202,236
+ pxor %xmm5,%xmm7
+.byte 15,56,201,229
+ pxor %xmm12,%xmm14
+.byte 69,15,56,201,220
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+.byte 69,15,58,204,194,1
+.byte 69,15,56,200,205
+.byte 15,56,202,254
+.byte 69,15,56,202,245
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+ pxor %xmm13,%xmm11
+.byte 69,15,56,201,229
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,1
+.byte 15,56,200,215
+.byte 69,15,58,204,193,1
+.byte 69,15,56,200,214
+.byte 15,56,202,231
+.byte 69,15,56,202,222
+ pxor %xmm7,%xmm5
+.byte 15,56,201,247
+ pxor %xmm14,%xmm12
+.byte 69,15,56,201,238
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,1
+.byte 15,56,200,204
+.byte 69,15,58,204,194,1
+.byte 69,15,56,200,203
+.byte 15,56,202,236
+.byte 69,15,56,202,227
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+ pxor %xmm11,%xmm13
+.byte 69,15,56,201,243
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,1
+.byte 15,56,200,213
+.byte 69,15,58,204,193,1
+.byte 69,15,56,200,212
+.byte 15,56,202,245
+.byte 69,15,56,202,236
+ pxor %xmm5,%xmm7
+.byte 15,56,201,229
+ pxor %xmm12,%xmm14
+.byte 69,15,56,201,220
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+.byte 69,15,58,204,194,1
+.byte 69,15,56,200,205
+.byte 15,56,202,254
+.byte 69,15,56,202,245
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+ pxor %xmm13,%xmm11
+.byte 69,15,56,201,229
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+.byte 69,15,58,204,193,2
+.byte 69,15,56,200,214
+.byte 15,56,202,231
+.byte 69,15,56,202,222
+ pxor %xmm7,%xmm5
+.byte 15,56,201,247
+ pxor %xmm14,%xmm12
+.byte 69,15,56,201,238
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,2
+.byte 15,56,200,204
+.byte 69,15,58,204,194,2
+.byte 69,15,56,200,203
+.byte 15,56,202,236
+.byte 69,15,56,202,227
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+ pxor %xmm11,%xmm13
+.byte 69,15,56,201,243
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,2
+.byte 15,56,200,213
+.byte 69,15,58,204,193,2
+.byte 69,15,56,200,212
+.byte 15,56,202,245
+.byte 69,15,56,202,236
+ pxor %xmm5,%xmm7
+.byte 15,56,201,229
+ pxor %xmm12,%xmm14
+.byte 69,15,56,201,220
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,2
+.byte 15,56,200,206
+.byte 69,15,58,204,194,2
+.byte 69,15,56,200,205
+.byte 15,56,202,254
+.byte 69,15,56,202,245
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+ pxor %xmm13,%xmm11
+.byte 69,15,56,201,229
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+.byte 69,15,58,204,193,2
+.byte 69,15,56,200,214
+.byte 15,56,202,231
+.byte 69,15,56,202,222
+ pxor %xmm7,%xmm5
+.byte 15,56,201,247
+ pxor %xmm14,%xmm12
+.byte 69,15,56,201,238
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+.byte 69,15,58,204,194,3
+.byte 69,15,56,200,203
+.byte 15,56,202,236
+.byte 69,15,56,202,227
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+ pxor %xmm11,%xmm13
+.byte 69,15,56,201,243
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,3
+.byte 15,56,200,213
+.byte 69,15,58,204,193,3
+.byte 69,15,56,200,212
+.byte 15,56,202,245
+.byte 69,15,56,202,236
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+
+ movl $1,%ecx
+ pxor %xmm4,%xmm4
+ cmpl 0(%rbx),%ecx
+ cmovgeq %rsp,%r8
+
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,3
+.byte 15,56,200,206
+.byte 69,15,58,204,194,3
+.byte 69,15,56,200,205
+.byte 15,56,202,254
+.byte 69,15,56,202,245
+
+ cmpl 4(%rbx),%ecx
+ cmovgeq %rsp,%r9
+ movq (%rbx),%xmm6
+
+ movdqa %xmm0,%xmm2
+ movdqa %xmm8,%xmm10
+.byte 15,58,204,193,3
+.byte 15,56,200,215
+.byte 69,15,58,204,193,3
+.byte 69,15,56,200,214
+
+ pshufd $0x00,%xmm6,%xmm11
+ pshufd $0x55,%xmm6,%xmm12
+ movdqa %xmm6,%xmm7
+ pcmpgtd %xmm4,%xmm11
+ pcmpgtd %xmm4,%xmm12
+
+ movdqa %xmm0,%xmm1
+ movdqa %xmm8,%xmm9
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+.byte 69,15,58,204,194,3
+.byte 68,15,56,200,204
+
+ pcmpgtd %xmm4,%xmm7
+ pand %xmm11,%xmm0
+ pand %xmm11,%xmm1
+ pand %xmm12,%xmm8
+ pand %xmm12,%xmm9
+ paddd %xmm7,%xmm6
+
+ paddd 64(%rsp),%xmm0
+ paddd 80(%rsp),%xmm1
+ paddd 96(%rsp),%xmm8
+ paddd 112(%rsp),%xmm9
+
+ movq %xmm6,(%rbx)
+ decl %edx
+ jnz .Loop_shaext
+
+ movl 280(%rsp),%edx
+
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm8,%xmm8
+
+ movdqa %xmm0,%xmm6
+ punpckldq %xmm8,%xmm0
+ punpckhdq %xmm8,%xmm6
+ punpckhdq %xmm9,%xmm1
+ movq %xmm0,0-64(%rdi)
+ psrldq $8,%xmm0
+ movq %xmm6,64-64(%rdi)
+ psrldq $8,%xmm6
+ movq %xmm0,32-64(%rdi)
+ psrldq $8,%xmm1
+ movq %xmm6,96-64(%rdi)
+ movq %xmm1,128-64(%rdi)
+
+ leaq 8(%rdi),%rdi
+ leaq 32(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande_shaext
+
+.Ldone_shaext:
+
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue_shaext:
+ .byte 0xf3,0xc3
+.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
+.type sha1_multi_block_avx, at function
+.align 32
+sha1_multi_block_avx:
+_avx_shortcut:
+ shrq $32,%rcx
+ cmpl $2,%edx
+ jb .Lavx
+ testl $32,%ecx
+ jnz _avx2_shortcut
+ jmp .Lavx
+.align 32
+.Lavx:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+.Lbody_avx:
+ leaq K_XX_XX(%rip),%rbp
+ leaq 256(%rsp),%rbx
+
+ vzeroupper
+.Loop_grande_avx:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz .Ldone_avx
+
+ vmovdqu 0(%rdi),%xmm10
+ leaq 128(%rsp),%rax
+ vmovdqu 32(%rdi),%xmm11
+ vmovdqu 64(%rdi),%xmm12
+ vmovdqu 96(%rdi),%xmm13
+ vmovdqu 128(%rdi),%xmm14
+ vmovdqu 96(%rbp),%xmm5
+ jmp .Loop_avx
+
+.align 32
+.Loop_avx:
+ vmovdqa -32(%rbp),%xmm15
+ vmovd (%r8),%xmm0
+ leaq 64(%r8),%r8
+ vmovd (%r9),%xmm2
+ leaq 64(%r9),%r9
+ vpinsrd $1,(%r10),%xmm0,%xmm0
+ leaq 64(%r10),%r10
+ vpinsrd $1,(%r11),%xmm2,%xmm2
+ leaq 64(%r11),%r11
+ vmovd -60(%r8),%xmm1
+ vpunpckldq %xmm2,%xmm0,%xmm0
+ vmovd -60(%r9),%xmm9
+ vpshufb %xmm5,%xmm0,%xmm0
+ vpinsrd $1,-60(%r10),%xmm1,%xmm1
+ vpinsrd $1,-60(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpandn %xmm13,%xmm11,%xmm7
+ vpand %xmm12,%xmm11,%xmm6
+
+ vmovdqa %xmm0,0-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpunpckldq %xmm9,%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -56(%r8),%xmm2
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -56(%r9),%xmm9
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpshufb %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpinsrd $1,-56(%r10),%xmm2,%xmm2
+ vpinsrd $1,-56(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpandn %xmm12,%xmm10,%xmm7
+ vpand %xmm11,%xmm10,%xmm6
+
+ vmovdqa %xmm1,16-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpunpckldq %xmm9,%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -52(%r8),%xmm3
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -52(%r9),%xmm9
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpshufb %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpinsrd $1,-52(%r10),%xmm3,%xmm3
+ vpinsrd $1,-52(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpandn %xmm11,%xmm14,%xmm7
+ vpand %xmm10,%xmm14,%xmm6
+
+ vmovdqa %xmm2,32-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpunpckldq %xmm9,%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -48(%r8),%xmm4
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -48(%r9),%xmm9
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpshufb %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpinsrd $1,-48(%r10),%xmm4,%xmm4
+ vpinsrd $1,-48(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpandn %xmm10,%xmm13,%xmm7
+ vpand %xmm14,%xmm13,%xmm6
+
+ vmovdqa %xmm3,48-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpunpckldq %xmm9,%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -44(%r8),%xmm0
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -44(%r9),%xmm9
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpshufb %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpinsrd $1,-44(%r10),%xmm0,%xmm0
+ vpinsrd $1,-44(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpandn %xmm14,%xmm12,%xmm7
+ vpand %xmm13,%xmm12,%xmm6
+
+ vmovdqa %xmm4,64-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpunpckldq %xmm9,%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -40(%r8),%xmm1
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -40(%r9),%xmm9
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpshufb %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpinsrd $1,-40(%r10),%xmm1,%xmm1
+ vpinsrd $1,-40(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpandn %xmm13,%xmm11,%xmm7
+ vpand %xmm12,%xmm11,%xmm6
+
+ vmovdqa %xmm0,80-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpunpckldq %xmm9,%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -36(%r8),%xmm2
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -36(%r9),%xmm9
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpshufb %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpinsrd $1,-36(%r10),%xmm2,%xmm2
+ vpinsrd $1,-36(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpandn %xmm12,%xmm10,%xmm7
+ vpand %xmm11,%xmm10,%xmm6
+
+ vmovdqa %xmm1,96-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpunpckldq %xmm9,%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -32(%r8),%xmm3
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -32(%r9),%xmm9
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpshufb %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpinsrd $1,-32(%r10),%xmm3,%xmm3
+ vpinsrd $1,-32(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpandn %xmm11,%xmm14,%xmm7
+ vpand %xmm10,%xmm14,%xmm6
+
+ vmovdqa %xmm2,112-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpunpckldq %xmm9,%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -28(%r8),%xmm4
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -28(%r9),%xmm9
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpshufb %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpinsrd $1,-28(%r10),%xmm4,%xmm4
+ vpinsrd $1,-28(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpandn %xmm10,%xmm13,%xmm7
+ vpand %xmm14,%xmm13,%xmm6
+
+ vmovdqa %xmm3,128-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpunpckldq %xmm9,%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -24(%r8),%xmm0
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -24(%r9),%xmm9
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpshufb %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpinsrd $1,-24(%r10),%xmm0,%xmm0
+ vpinsrd $1,-24(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpandn %xmm14,%xmm12,%xmm7
+ vpand %xmm13,%xmm12,%xmm6
+
+ vmovdqa %xmm4,144-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpunpckldq %xmm9,%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -20(%r8),%xmm1
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -20(%r9),%xmm9
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpshufb %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpinsrd $1,-20(%r10),%xmm1,%xmm1
+ vpinsrd $1,-20(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpandn %xmm13,%xmm11,%xmm7
+ vpand %xmm12,%xmm11,%xmm6
+
+ vmovdqa %xmm0,160-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpunpckldq %xmm9,%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -16(%r8),%xmm2
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -16(%r9),%xmm9
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpshufb %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpinsrd $1,-16(%r10),%xmm2,%xmm2
+ vpinsrd $1,-16(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpandn %xmm12,%xmm10,%xmm7
+ vpand %xmm11,%xmm10,%xmm6
+
+ vmovdqa %xmm1,176-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpunpckldq %xmm9,%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -12(%r8),%xmm3
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -12(%r9),%xmm9
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpshufb %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpinsrd $1,-12(%r10),%xmm3,%xmm3
+ vpinsrd $1,-12(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpandn %xmm11,%xmm14,%xmm7
+ vpand %xmm10,%xmm14,%xmm6
+
+ vmovdqa %xmm2,192-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpunpckldq %xmm9,%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -8(%r8),%xmm4
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -8(%r9),%xmm9
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpshufb %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpinsrd $1,-8(%r10),%xmm4,%xmm4
+ vpinsrd $1,-8(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpandn %xmm10,%xmm13,%xmm7
+ vpand %xmm14,%xmm13,%xmm6
+
+ vmovdqa %xmm3,208-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpunpckldq %xmm9,%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vmovd -4(%r8),%xmm0
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vmovd -4(%r9),%xmm9
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpshufb %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vmovdqa 0-128(%rax),%xmm1
+ vpinsrd $1,-4(%r10),%xmm0,%xmm0
+ vpinsrd $1,-4(%r11),%xmm9,%xmm9
+ vpaddd %xmm15,%xmm10,%xmm10
+ prefetcht0 63(%r8)
+ vpslld $5,%xmm11,%xmm8
+ vpandn %xmm14,%xmm12,%xmm7
+ vpand %xmm13,%xmm12,%xmm6
+
+ vmovdqa %xmm4,224-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpunpckldq %xmm9,%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ prefetcht0 63(%r9)
+ vpxor %xmm7,%xmm6,%xmm6
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ prefetcht0 63(%r10)
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ prefetcht0 63(%r11)
+ vpshufb %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vmovdqa 16-128(%rax),%xmm2
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 32-128(%rax),%xmm3
+
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpandn %xmm13,%xmm11,%xmm7
+
+ vpand %xmm12,%xmm11,%xmm6
+
+ vmovdqa %xmm0,240-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 128-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 48-128(%rax),%xmm4
+
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpandn %xmm12,%xmm10,%xmm7
+
+ vpand %xmm11,%xmm10,%xmm6
+
+ vmovdqa %xmm1,0-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 144-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 64-128(%rax),%xmm0
+
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpandn %xmm11,%xmm14,%xmm7
+
+ vpand %xmm10,%xmm14,%xmm6
+
+ vmovdqa %xmm2,16-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 160-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 80-128(%rax),%xmm1
+
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpandn %xmm10,%xmm13,%xmm7
+
+ vpand %xmm14,%xmm13,%xmm6
+
+ vmovdqa %xmm3,32-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 176-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 96-128(%rax),%xmm2
+
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpandn %xmm14,%xmm12,%xmm7
+
+ vpand %xmm13,%xmm12,%xmm6
+
+ vmovdqa %xmm4,48-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 192-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm7,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vmovdqa 0(%rbp),%xmm15
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 112-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,64-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 208-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 128-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,80-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 224-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 144-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vmovdqa %xmm2,96-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 240-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 160-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vmovdqa %xmm3,112-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 0-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 176-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vmovdqa %xmm4,128-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 16-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 192-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,144-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 32-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 208-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,160-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 48-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 224-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vmovdqa %xmm2,176-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 64-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 240-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vmovdqa %xmm3,192-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 80-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 0-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vmovdqa %xmm4,208-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 96-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 16-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,224-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 112-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 32-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,240-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 128-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 48-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vmovdqa %xmm2,0-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 144-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 64-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vmovdqa %xmm3,16-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 160-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 80-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vmovdqa %xmm4,32-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 176-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 96-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,48-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 192-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 112-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,64-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 208-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 128-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vmovdqa %xmm2,80-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 224-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 144-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vmovdqa %xmm3,96-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 240-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 160-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vmovdqa %xmm4,112-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 0-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vmovdqa 32(%rbp),%xmm15
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 176-128(%rax),%xmm3
+
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpand %xmm12,%xmm13,%xmm7
+ vpxor 16-128(%rax),%xmm1,%xmm1
+
+ vpaddd %xmm7,%xmm14,%xmm14
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm13,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vmovdqu %xmm0,128-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm1,%xmm5
+ vpand %xmm11,%xmm6,%xmm6
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 192-128(%rax),%xmm4
+
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpand %xmm11,%xmm12,%xmm7
+ vpxor 32-128(%rax),%xmm2,%xmm2
+
+ vpaddd %xmm7,%xmm13,%xmm13
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm12,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vmovdqu %xmm1,144-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm2,%xmm5
+ vpand %xmm10,%xmm6,%xmm6
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 208-128(%rax),%xmm0
+
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpand %xmm10,%xmm11,%xmm7
+ vpxor 48-128(%rax),%xmm3,%xmm3
+
+ vpaddd %xmm7,%xmm12,%xmm12
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm11,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vmovdqu %xmm2,160-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm3,%xmm5
+ vpand %xmm14,%xmm6,%xmm6
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 224-128(%rax),%xmm1
+
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpand %xmm14,%xmm10,%xmm7
+ vpxor 64-128(%rax),%xmm4,%xmm4
+
+ vpaddd %xmm7,%xmm11,%xmm11
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm10,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vmovdqu %xmm3,176-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm4,%xmm5
+ vpand %xmm13,%xmm6,%xmm6
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 240-128(%rax),%xmm2
+
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpand %xmm13,%xmm14,%xmm7
+ vpxor 80-128(%rax),%xmm0,%xmm0
+
+ vpaddd %xmm7,%xmm10,%xmm10
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm14,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vmovdqu %xmm4,192-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm0,%xmm5
+ vpand %xmm12,%xmm6,%xmm6
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 0-128(%rax),%xmm3
+
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpand %xmm12,%xmm13,%xmm7
+ vpxor 96-128(%rax),%xmm1,%xmm1
+
+ vpaddd %xmm7,%xmm14,%xmm14
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm13,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vmovdqu %xmm0,208-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm1,%xmm5
+ vpand %xmm11,%xmm6,%xmm6
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 16-128(%rax),%xmm4
+
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpand %xmm11,%xmm12,%xmm7
+ vpxor 112-128(%rax),%xmm2,%xmm2
+
+ vpaddd %xmm7,%xmm13,%xmm13
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm12,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vmovdqu %xmm1,224-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm2,%xmm5
+ vpand %xmm10,%xmm6,%xmm6
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 32-128(%rax),%xmm0
+
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpand %xmm10,%xmm11,%xmm7
+ vpxor 128-128(%rax),%xmm3,%xmm3
+
+ vpaddd %xmm7,%xmm12,%xmm12
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm11,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vmovdqu %xmm2,240-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm3,%xmm5
+ vpand %xmm14,%xmm6,%xmm6
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 48-128(%rax),%xmm1
+
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpand %xmm14,%xmm10,%xmm7
+ vpxor 144-128(%rax),%xmm4,%xmm4
+
+ vpaddd %xmm7,%xmm11,%xmm11
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm10,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vmovdqu %xmm3,0-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm4,%xmm5
+ vpand %xmm13,%xmm6,%xmm6
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 64-128(%rax),%xmm2
+
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpand %xmm13,%xmm14,%xmm7
+ vpxor 160-128(%rax),%xmm0,%xmm0
+
+ vpaddd %xmm7,%xmm10,%xmm10
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm14,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vmovdqu %xmm4,16-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm0,%xmm5
+ vpand %xmm12,%xmm6,%xmm6
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 80-128(%rax),%xmm3
+
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpand %xmm12,%xmm13,%xmm7
+ vpxor 176-128(%rax),%xmm1,%xmm1
+
+ vpaddd %xmm7,%xmm14,%xmm14
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm13,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vmovdqu %xmm0,32-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm1,%xmm5
+ vpand %xmm11,%xmm6,%xmm6
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 96-128(%rax),%xmm4
+
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpand %xmm11,%xmm12,%xmm7
+ vpxor 192-128(%rax),%xmm2,%xmm2
+
+ vpaddd %xmm7,%xmm13,%xmm13
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm12,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vmovdqu %xmm1,48-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm2,%xmm5
+ vpand %xmm10,%xmm6,%xmm6
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 112-128(%rax),%xmm0
+
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpand %xmm10,%xmm11,%xmm7
+ vpxor 208-128(%rax),%xmm3,%xmm3
+
+ vpaddd %xmm7,%xmm12,%xmm12
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm11,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vmovdqu %xmm2,64-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm3,%xmm5
+ vpand %xmm14,%xmm6,%xmm6
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 128-128(%rax),%xmm1
+
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpand %xmm14,%xmm10,%xmm7
+ vpxor 224-128(%rax),%xmm4,%xmm4
+
+ vpaddd %xmm7,%xmm11,%xmm11
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm10,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vmovdqu %xmm3,80-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm4,%xmm5
+ vpand %xmm13,%xmm6,%xmm6
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 144-128(%rax),%xmm2
+
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpand %xmm13,%xmm14,%xmm7
+ vpxor 240-128(%rax),%xmm0,%xmm0
+
+ vpaddd %xmm7,%xmm10,%xmm10
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm14,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vmovdqu %xmm4,96-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm0,%xmm5
+ vpand %xmm12,%xmm6,%xmm6
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 160-128(%rax),%xmm3
+
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpslld $5,%xmm10,%xmm8
+ vpand %xmm12,%xmm13,%xmm7
+ vpxor 0-128(%rax),%xmm1,%xmm1
+
+ vpaddd %xmm7,%xmm14,%xmm14
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm13,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vmovdqu %xmm0,112-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm1,%xmm5
+ vpand %xmm11,%xmm6,%xmm6
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 176-128(%rax),%xmm4
+
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpslld $5,%xmm14,%xmm8
+ vpand %xmm11,%xmm12,%xmm7
+ vpxor 16-128(%rax),%xmm2,%xmm2
+
+ vpaddd %xmm7,%xmm13,%xmm13
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm12,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vmovdqu %xmm1,128-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm2,%xmm5
+ vpand %xmm10,%xmm6,%xmm6
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpaddd %xmm6,%xmm13,%xmm13
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 192-128(%rax),%xmm0
+
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpslld $5,%xmm13,%xmm8
+ vpand %xmm10,%xmm11,%xmm7
+ vpxor 32-128(%rax),%xmm3,%xmm3
+
+ vpaddd %xmm7,%xmm12,%xmm12
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm11,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vmovdqu %xmm2,144-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm3,%xmm5
+ vpand %xmm14,%xmm6,%xmm6
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 208-128(%rax),%xmm1
+
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpslld $5,%xmm12,%xmm8
+ vpand %xmm14,%xmm10,%xmm7
+ vpxor 48-128(%rax),%xmm4,%xmm4
+
+ vpaddd %xmm7,%xmm11,%xmm11
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm10,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vmovdqu %xmm3,160-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm4,%xmm5
+ vpand %xmm13,%xmm6,%xmm6
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpaddd %xmm6,%xmm11,%xmm11
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 224-128(%rax),%xmm2
+
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpslld $5,%xmm11,%xmm8
+ vpand %xmm13,%xmm14,%xmm7
+ vpxor 64-128(%rax),%xmm0,%xmm0
+
+ vpaddd %xmm7,%xmm10,%xmm10
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm14,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vmovdqu %xmm4,176-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpor %xmm9,%xmm8,%xmm8
+ vpsrld $31,%xmm0,%xmm5
+ vpand %xmm12,%xmm6,%xmm6
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vmovdqa 64(%rbp),%xmm15
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 240-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,192-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 80-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 0-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,208-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 96-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 16-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vmovdqa %xmm2,224-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 112-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 32-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vmovdqa %xmm3,240-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 128-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 48-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vmovdqa %xmm4,0-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 144-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 64-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,16-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 160-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 80-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,32-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 176-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 96-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vmovdqa %xmm2,48-128(%rax)
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 192-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 112-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vmovdqa %xmm3,64-128(%rax)
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 208-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 128-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vmovdqa %xmm4,80-128(%rax)
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 224-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 144-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vmovdqa %xmm0,96-128(%rax)
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 240-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 160-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vmovdqa %xmm1,112-128(%rax)
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 0-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 176-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 16-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 192-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 32-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpxor %xmm2,%xmm0,%xmm0
+ vmovdqa 208-128(%rax),%xmm2
+
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor 48-128(%rax),%xmm0,%xmm0
+ vpsrld $27,%xmm11,%xmm9
+ vpxor %xmm13,%xmm6,%xmm6
+ vpxor %xmm2,%xmm0,%xmm0
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpsrld $31,%xmm0,%xmm5
+ vpaddd %xmm0,%xmm0,%xmm0
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm5,%xmm0,%xmm0
+ vpor %xmm7,%xmm12,%xmm12
+ vpxor %xmm3,%xmm1,%xmm1
+ vmovdqa 224-128(%rax),%xmm3
+
+ vpslld $5,%xmm10,%xmm8
+ vpaddd %xmm15,%xmm14,%xmm14
+ vpxor %xmm11,%xmm13,%xmm6
+ vpaddd %xmm0,%xmm14,%xmm14
+ vpxor 64-128(%rax),%xmm1,%xmm1
+ vpsrld $27,%xmm10,%xmm9
+ vpxor %xmm12,%xmm6,%xmm6
+ vpxor %xmm3,%xmm1,%xmm1
+
+ vpslld $30,%xmm11,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpsrld $31,%xmm1,%xmm5
+ vpaddd %xmm1,%xmm1,%xmm1
+
+ vpsrld $2,%xmm11,%xmm11
+ vpaddd %xmm8,%xmm14,%xmm14
+ vpor %xmm5,%xmm1,%xmm1
+ vpor %xmm7,%xmm11,%xmm11
+ vpxor %xmm4,%xmm2,%xmm2
+ vmovdqa 240-128(%rax),%xmm4
+
+ vpslld $5,%xmm14,%xmm8
+ vpaddd %xmm15,%xmm13,%xmm13
+ vpxor %xmm10,%xmm12,%xmm6
+ vpaddd %xmm1,%xmm13,%xmm13
+ vpxor 80-128(%rax),%xmm2,%xmm2
+ vpsrld $27,%xmm14,%xmm9
+ vpxor %xmm11,%xmm6,%xmm6
+ vpxor %xmm4,%xmm2,%xmm2
+
+ vpslld $30,%xmm10,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm13,%xmm13
+ vpsrld $31,%xmm2,%xmm5
+ vpaddd %xmm2,%xmm2,%xmm2
+
+ vpsrld $2,%xmm10,%xmm10
+ vpaddd %xmm8,%xmm13,%xmm13
+ vpor %xmm5,%xmm2,%xmm2
+ vpor %xmm7,%xmm10,%xmm10
+ vpxor %xmm0,%xmm3,%xmm3
+ vmovdqa 0-128(%rax),%xmm0
+
+ vpslld $5,%xmm13,%xmm8
+ vpaddd %xmm15,%xmm12,%xmm12
+ vpxor %xmm14,%xmm11,%xmm6
+ vpaddd %xmm2,%xmm12,%xmm12
+ vpxor 96-128(%rax),%xmm3,%xmm3
+ vpsrld $27,%xmm13,%xmm9
+ vpxor %xmm10,%xmm6,%xmm6
+ vpxor %xmm0,%xmm3,%xmm3
+
+ vpslld $30,%xmm14,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpsrld $31,%xmm3,%xmm5
+ vpaddd %xmm3,%xmm3,%xmm3
+
+ vpsrld $2,%xmm14,%xmm14
+ vpaddd %xmm8,%xmm12,%xmm12
+ vpor %xmm5,%xmm3,%xmm3
+ vpor %xmm7,%xmm14,%xmm14
+ vpxor %xmm1,%xmm4,%xmm4
+ vmovdqa 16-128(%rax),%xmm1
+
+ vpslld $5,%xmm12,%xmm8
+ vpaddd %xmm15,%xmm11,%xmm11
+ vpxor %xmm13,%xmm10,%xmm6
+ vpaddd %xmm3,%xmm11,%xmm11
+ vpxor 112-128(%rax),%xmm4,%xmm4
+ vpsrld $27,%xmm12,%xmm9
+ vpxor %xmm14,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm4
+
+ vpslld $30,%xmm13,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm11,%xmm11
+ vpsrld $31,%xmm4,%xmm5
+ vpaddd %xmm4,%xmm4,%xmm4
+
+ vpsrld $2,%xmm13,%xmm13
+ vpaddd %xmm8,%xmm11,%xmm11
+ vpor %xmm5,%xmm4,%xmm4
+ vpor %xmm7,%xmm13,%xmm13
+ vpslld $5,%xmm11,%xmm8
+ vpaddd %xmm15,%xmm10,%xmm10
+ vpxor %xmm12,%xmm14,%xmm6
+
+ vpsrld $27,%xmm11,%xmm9
+ vpaddd %xmm4,%xmm10,%xmm10
+ vpxor %xmm13,%xmm6,%xmm6
+
+ vpslld $30,%xmm12,%xmm7
+ vpor %xmm9,%xmm8,%xmm8
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpsrld $2,%xmm12,%xmm12
+ vpaddd %xmm8,%xmm10,%xmm10
+ vpor %xmm7,%xmm12,%xmm12
+ movl $1,%ecx
+ cmpl 0(%rbx),%ecx
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ cmovgeq %rbp,%r11
+ vmovdqu (%rbx),%xmm6
+ vpxor %xmm8,%xmm8,%xmm8
+ vmovdqa %xmm6,%xmm7
+ vpcmpgtd %xmm8,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpand %xmm7,%xmm10,%xmm10
+ vpand %xmm7,%xmm11,%xmm11
+ vpaddd 0(%rdi),%xmm10,%xmm10
+ vpand %xmm7,%xmm12,%xmm12
+ vpaddd 32(%rdi),%xmm11,%xmm11
+ vpand %xmm7,%xmm13,%xmm13
+ vpaddd 64(%rdi),%xmm12,%xmm12
+ vpand %xmm7,%xmm14,%xmm14
+ vpaddd 96(%rdi),%xmm13,%xmm13
+ vpaddd 128(%rdi),%xmm14,%xmm14
+ vmovdqu %xmm10,0(%rdi)
+ vmovdqu %xmm11,32(%rdi)
+ vmovdqu %xmm12,64(%rdi)
+ vmovdqu %xmm13,96(%rdi)
+ vmovdqu %xmm14,128(%rdi)
+
+ vmovdqu %xmm6,(%rbx)
+ vmovdqu 96(%rbp),%xmm5
+ decl %edx
+ jnz .Loop_avx
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande_avx
+
+.Ldone_avx:
+ movq 272(%rsp),%rax
+ vzeroupper
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue_avx:
+ .byte 0xf3,0xc3
+.size sha1_multi_block_avx,.-sha1_multi_block_avx
+.type sha1_multi_block_avx2, at function
+.align 32
+sha1_multi_block_avx2:
+_avx2_shortcut:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $576,%rsp
+ andq $-256,%rsp
+ movq %rax,544(%rsp)
+.Lbody_avx2:
+ leaq K_XX_XX(%rip),%rbp
+ shrl $1,%edx
+
+ vzeroupper
+.Loop_grande_avx2:
+ movl %edx,552(%rsp)
+ xorl %edx,%edx
+ leaq 512(%rsp),%rbx
+ movq 0(%rsi),%r12
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r12
+ movq 16(%rsi),%r13
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r13
+ movq 32(%rsi),%r14
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r14
+ movq 48(%rsi),%r15
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r15
+ movq 64(%rsi),%r8
+ movl 72(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,16(%rbx)
+ cmovleq %rbp,%r8
+ movq 80(%rsi),%r9
+ movl 88(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,20(%rbx)
+ cmovleq %rbp,%r9
+ movq 96(%rsi),%r10
+ movl 104(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,24(%rbx)
+ cmovleq %rbp,%r10
+ movq 112(%rsi),%r11
+ movl 120(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,28(%rbx)
+ cmovleq %rbp,%r11
+ vmovdqu 0(%rdi),%ymm0
+ leaq 128(%rsp),%rax
+ vmovdqu 32(%rdi),%ymm1
+ leaq 256+128(%rsp),%rbx
+ vmovdqu 64(%rdi),%ymm2
+ vmovdqu 96(%rdi),%ymm3
+ vmovdqu 128(%rdi),%ymm4
+ vmovdqu 96(%rbp),%ymm9
+ jmp .Loop_avx2
+
+.align 32
+.Loop_avx2:
+ vmovdqa -32(%rbp),%ymm15
+ vmovd (%r12),%xmm10
+ leaq 64(%r12),%r12
+ vmovd (%r8),%xmm12
+ leaq 64(%r8),%r8
+ vmovd (%r13),%xmm7
+ leaq 64(%r13),%r13
+ vmovd (%r9),%xmm6
+ leaq 64(%r9),%r9
+ vpinsrd $1,(%r14),%xmm10,%xmm10
+ leaq 64(%r14),%r14
+ vpinsrd $1,(%r10),%xmm12,%xmm12
+ leaq 64(%r10),%r10
+ vpinsrd $1,(%r15),%xmm7,%xmm7
+ leaq 64(%r15),%r15
+ vpunpckldq %ymm7,%ymm10,%ymm10
+ vpinsrd $1,(%r11),%xmm6,%xmm6
+ leaq 64(%r11),%r11
+ vpunpckldq %ymm6,%ymm12,%ymm12
+ vmovd -60(%r12),%xmm11
+ vinserti128 $1,%xmm12,%ymm10,%ymm10
+ vmovd -60(%r8),%xmm8
+ vpshufb %ymm9,%ymm10,%ymm10
+ vmovd -60(%r13),%xmm7
+ vmovd -60(%r9),%xmm6
+ vpinsrd $1,-60(%r14),%xmm11,%xmm11
+ vpinsrd $1,-60(%r10),%xmm8,%xmm8
+ vpinsrd $1,-60(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm11,%ymm11
+ vpinsrd $1,-60(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpandn %ymm3,%ymm1,%ymm6
+ vpand %ymm2,%ymm1,%ymm5
+
+ vmovdqa %ymm10,0-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vinserti128 $1,%xmm8,%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -56(%r12),%xmm12
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -56(%r8),%xmm8
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpshufb %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vmovd -56(%r13),%xmm7
+ vmovd -56(%r9),%xmm6
+ vpinsrd $1,-56(%r14),%xmm12,%xmm12
+ vpinsrd $1,-56(%r10),%xmm8,%xmm8
+ vpinsrd $1,-56(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm12,%ymm12
+ vpinsrd $1,-56(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpandn %ymm2,%ymm0,%ymm6
+ vpand %ymm1,%ymm0,%ymm5
+
+ vmovdqa %ymm11,32-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vinserti128 $1,%xmm8,%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -52(%r12),%xmm13
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -52(%r8),%xmm8
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpshufb %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vmovd -52(%r13),%xmm7
+ vmovd -52(%r9),%xmm6
+ vpinsrd $1,-52(%r14),%xmm13,%xmm13
+ vpinsrd $1,-52(%r10),%xmm8,%xmm8
+ vpinsrd $1,-52(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm13,%ymm13
+ vpinsrd $1,-52(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpandn %ymm1,%ymm4,%ymm6
+ vpand %ymm0,%ymm4,%ymm5
+
+ vmovdqa %ymm12,64-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vinserti128 $1,%xmm8,%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -48(%r12),%xmm14
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -48(%r8),%xmm8
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpshufb %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vmovd -48(%r13),%xmm7
+ vmovd -48(%r9),%xmm6
+ vpinsrd $1,-48(%r14),%xmm14,%xmm14
+ vpinsrd $1,-48(%r10),%xmm8,%xmm8
+ vpinsrd $1,-48(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm14,%ymm14
+ vpinsrd $1,-48(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpandn %ymm0,%ymm3,%ymm6
+ vpand %ymm4,%ymm3,%ymm5
+
+ vmovdqa %ymm13,96-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vinserti128 $1,%xmm8,%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -44(%r12),%xmm10
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -44(%r8),%xmm8
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpshufb %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vmovd -44(%r13),%xmm7
+ vmovd -44(%r9),%xmm6
+ vpinsrd $1,-44(%r14),%xmm10,%xmm10
+ vpinsrd $1,-44(%r10),%xmm8,%xmm8
+ vpinsrd $1,-44(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm10,%ymm10
+ vpinsrd $1,-44(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpandn %ymm4,%ymm2,%ymm6
+ vpand %ymm3,%ymm2,%ymm5
+
+ vmovdqa %ymm14,128-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vinserti128 $1,%xmm8,%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -40(%r12),%xmm11
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -40(%r8),%xmm8
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpshufb %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vmovd -40(%r13),%xmm7
+ vmovd -40(%r9),%xmm6
+ vpinsrd $1,-40(%r14),%xmm11,%xmm11
+ vpinsrd $1,-40(%r10),%xmm8,%xmm8
+ vpinsrd $1,-40(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm11,%ymm11
+ vpinsrd $1,-40(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpandn %ymm3,%ymm1,%ymm6
+ vpand %ymm2,%ymm1,%ymm5
+
+ vmovdqa %ymm10,160-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vinserti128 $1,%xmm8,%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -36(%r12),%xmm12
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -36(%r8),%xmm8
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpshufb %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vmovd -36(%r13),%xmm7
+ vmovd -36(%r9),%xmm6
+ vpinsrd $1,-36(%r14),%xmm12,%xmm12
+ vpinsrd $1,-36(%r10),%xmm8,%xmm8
+ vpinsrd $1,-36(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm12,%ymm12
+ vpinsrd $1,-36(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpandn %ymm2,%ymm0,%ymm6
+ vpand %ymm1,%ymm0,%ymm5
+
+ vmovdqa %ymm11,192-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vinserti128 $1,%xmm8,%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -32(%r12),%xmm13
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -32(%r8),%xmm8
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpshufb %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vmovd -32(%r13),%xmm7
+ vmovd -32(%r9),%xmm6
+ vpinsrd $1,-32(%r14),%xmm13,%xmm13
+ vpinsrd $1,-32(%r10),%xmm8,%xmm8
+ vpinsrd $1,-32(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm13,%ymm13
+ vpinsrd $1,-32(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpandn %ymm1,%ymm4,%ymm6
+ vpand %ymm0,%ymm4,%ymm5
+
+ vmovdqa %ymm12,224-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vinserti128 $1,%xmm8,%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -28(%r12),%xmm14
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -28(%r8),%xmm8
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpshufb %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vmovd -28(%r13),%xmm7
+ vmovd -28(%r9),%xmm6
+ vpinsrd $1,-28(%r14),%xmm14,%xmm14
+ vpinsrd $1,-28(%r10),%xmm8,%xmm8
+ vpinsrd $1,-28(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm14,%ymm14
+ vpinsrd $1,-28(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpandn %ymm0,%ymm3,%ymm6
+ vpand %ymm4,%ymm3,%ymm5
+
+ vmovdqa %ymm13,256-256-128(%rbx)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vinserti128 $1,%xmm8,%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -24(%r12),%xmm10
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -24(%r8),%xmm8
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpshufb %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vmovd -24(%r13),%xmm7
+ vmovd -24(%r9),%xmm6
+ vpinsrd $1,-24(%r14),%xmm10,%xmm10
+ vpinsrd $1,-24(%r10),%xmm8,%xmm8
+ vpinsrd $1,-24(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm10,%ymm10
+ vpinsrd $1,-24(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpandn %ymm4,%ymm2,%ymm6
+ vpand %ymm3,%ymm2,%ymm5
+
+ vmovdqa %ymm14,288-256-128(%rbx)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vinserti128 $1,%xmm8,%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -20(%r12),%xmm11
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -20(%r8),%xmm8
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpshufb %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vmovd -20(%r13),%xmm7
+ vmovd -20(%r9),%xmm6
+ vpinsrd $1,-20(%r14),%xmm11,%xmm11
+ vpinsrd $1,-20(%r10),%xmm8,%xmm8
+ vpinsrd $1,-20(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm11,%ymm11
+ vpinsrd $1,-20(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpandn %ymm3,%ymm1,%ymm6
+ vpand %ymm2,%ymm1,%ymm5
+
+ vmovdqa %ymm10,320-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vinserti128 $1,%xmm8,%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -16(%r12),%xmm12
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -16(%r8),%xmm8
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpshufb %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vmovd -16(%r13),%xmm7
+ vmovd -16(%r9),%xmm6
+ vpinsrd $1,-16(%r14),%xmm12,%xmm12
+ vpinsrd $1,-16(%r10),%xmm8,%xmm8
+ vpinsrd $1,-16(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm12,%ymm12
+ vpinsrd $1,-16(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpandn %ymm2,%ymm0,%ymm6
+ vpand %ymm1,%ymm0,%ymm5
+
+ vmovdqa %ymm11,352-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vinserti128 $1,%xmm8,%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -12(%r12),%xmm13
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -12(%r8),%xmm8
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpshufb %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vmovd -12(%r13),%xmm7
+ vmovd -12(%r9),%xmm6
+ vpinsrd $1,-12(%r14),%xmm13,%xmm13
+ vpinsrd $1,-12(%r10),%xmm8,%xmm8
+ vpinsrd $1,-12(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm13,%ymm13
+ vpinsrd $1,-12(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpandn %ymm1,%ymm4,%ymm6
+ vpand %ymm0,%ymm4,%ymm5
+
+ vmovdqa %ymm12,384-256-128(%rbx)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vinserti128 $1,%xmm8,%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -8(%r12),%xmm14
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -8(%r8),%xmm8
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpshufb %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vmovd -8(%r13),%xmm7
+ vmovd -8(%r9),%xmm6
+ vpinsrd $1,-8(%r14),%xmm14,%xmm14
+ vpinsrd $1,-8(%r10),%xmm8,%xmm8
+ vpinsrd $1,-8(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm14,%ymm14
+ vpinsrd $1,-8(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpandn %ymm0,%ymm3,%ymm6
+ vpand %ymm4,%ymm3,%ymm5
+
+ vmovdqa %ymm13,416-256-128(%rbx)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vinserti128 $1,%xmm8,%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovd -4(%r12),%xmm10
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vmovd -4(%r8),%xmm8
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpshufb %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vmovdqa 0-128(%rax),%ymm11
+ vmovd -4(%r13),%xmm7
+ vmovd -4(%r9),%xmm6
+ vpinsrd $1,-4(%r14),%xmm10,%xmm10
+ vpinsrd $1,-4(%r10),%xmm8,%xmm8
+ vpinsrd $1,-4(%r15),%xmm7,%xmm7
+ vpunpckldq %ymm7,%ymm10,%ymm10
+ vpinsrd $1,-4(%r11),%xmm6,%xmm6
+ vpunpckldq %ymm6,%ymm8,%ymm8
+ vpaddd %ymm15,%ymm0,%ymm0
+ prefetcht0 63(%r12)
+ vpslld $5,%ymm1,%ymm7
+ vpandn %ymm4,%ymm2,%ymm6
+ vpand %ymm3,%ymm2,%ymm5
+
+ vmovdqa %ymm14,448-256-128(%rbx)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vinserti128 $1,%xmm8,%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ prefetcht0 63(%r13)
+ vpxor %ymm6,%ymm5,%ymm5
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ prefetcht0 63(%r14)
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ prefetcht0 63(%r15)
+ vpshufb %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vmovdqa 32-128(%rax),%ymm12
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 64-128(%rax),%ymm13
+
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpandn %ymm3,%ymm1,%ymm6
+ prefetcht0 63(%r8)
+ vpand %ymm2,%ymm1,%ymm5
+
+ vmovdqa %ymm10,480-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 256-256-128(%rbx),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+ prefetcht0 63(%r9)
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ prefetcht0 63(%r10)
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ prefetcht0 63(%r11)
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 96-128(%rax),%ymm14
+
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpandn %ymm2,%ymm0,%ymm6
+
+ vpand %ymm1,%ymm0,%ymm5
+
+ vmovdqa %ymm11,0-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 288-256-128(%rbx),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 128-128(%rax),%ymm10
+
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpandn %ymm1,%ymm4,%ymm6
+
+ vpand %ymm0,%ymm4,%ymm5
+
+ vmovdqa %ymm12,32-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 320-256-128(%rbx),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 160-128(%rax),%ymm11
+
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpandn %ymm0,%ymm3,%ymm6
+
+ vpand %ymm4,%ymm3,%ymm5
+
+ vmovdqa %ymm13,64-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 352-256-128(%rbx),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 192-128(%rax),%ymm12
+
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpandn %ymm4,%ymm2,%ymm6
+
+ vpand %ymm3,%ymm2,%ymm5
+
+ vmovdqa %ymm14,96-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 384-256-128(%rbx),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm6,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vmovdqa 0(%rbp),%ymm15
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 224-128(%rax),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,128-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 416-256-128(%rbx),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 256-256-128(%rbx),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,160-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 448-256-128(%rbx),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 288-256-128(%rbx),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vmovdqa %ymm12,192-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 480-256-128(%rbx),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 320-256-128(%rbx),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vmovdqa %ymm13,224-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 0-128(%rax),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 352-256-128(%rbx),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vmovdqa %ymm14,256-256-128(%rbx)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 32-128(%rax),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 384-256-128(%rbx),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,288-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 64-128(%rax),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 416-256-128(%rbx),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,320-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 96-128(%rax),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 448-256-128(%rbx),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vmovdqa %ymm12,352-256-128(%rbx)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 128-128(%rax),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 480-256-128(%rbx),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vmovdqa %ymm13,384-256-128(%rbx)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 160-128(%rax),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 0-128(%rax),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vmovdqa %ymm14,416-256-128(%rbx)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 192-128(%rax),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 32-128(%rax),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,448-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 224-128(%rax),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 64-128(%rax),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,480-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 256-256-128(%rbx),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 96-128(%rax),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vmovdqa %ymm12,0-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 288-256-128(%rbx),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 128-128(%rax),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vmovdqa %ymm13,32-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 320-256-128(%rbx),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 160-128(%rax),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vmovdqa %ymm14,64-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 352-256-128(%rbx),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 192-128(%rax),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,96-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 384-256-128(%rbx),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 224-128(%rax),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,128-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 416-256-128(%rbx),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 256-256-128(%rbx),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vmovdqa %ymm12,160-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 448-256-128(%rbx),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 288-256-128(%rbx),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vmovdqa %ymm13,192-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 480-256-128(%rbx),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 320-256-128(%rbx),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vmovdqa %ymm14,224-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 0-128(%rax),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vmovdqa 32(%rbp),%ymm15
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 352-256-128(%rbx),%ymm13
+
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpand %ymm2,%ymm3,%ymm6
+ vpxor 32-128(%rax),%ymm11,%ymm11
+
+ vpaddd %ymm6,%ymm4,%ymm4
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm3,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vmovdqu %ymm10,256-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm11,%ymm9
+ vpand %ymm1,%ymm5,%ymm5
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 384-256-128(%rbx),%ymm14
+
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpand %ymm1,%ymm2,%ymm6
+ vpxor 64-128(%rax),%ymm12,%ymm12
+
+ vpaddd %ymm6,%ymm3,%ymm3
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm2,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vmovdqu %ymm11,288-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm12,%ymm9
+ vpand %ymm0,%ymm5,%ymm5
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 416-256-128(%rbx),%ymm10
+
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpand %ymm0,%ymm1,%ymm6
+ vpxor 96-128(%rax),%ymm13,%ymm13
+
+ vpaddd %ymm6,%ymm2,%ymm2
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm1,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vmovdqu %ymm12,320-256-128(%rbx)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm13,%ymm9
+ vpand %ymm4,%ymm5,%ymm5
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 448-256-128(%rbx),%ymm11
+
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpand %ymm4,%ymm0,%ymm6
+ vpxor 128-128(%rax),%ymm14,%ymm14
+
+ vpaddd %ymm6,%ymm1,%ymm1
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm0,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vmovdqu %ymm13,352-256-128(%rbx)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm14,%ymm9
+ vpand %ymm3,%ymm5,%ymm5
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 480-256-128(%rbx),%ymm12
+
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpand %ymm3,%ymm4,%ymm6
+ vpxor 160-128(%rax),%ymm10,%ymm10
+
+ vpaddd %ymm6,%ymm0,%ymm0
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm4,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vmovdqu %ymm14,384-256-128(%rbx)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm10,%ymm9
+ vpand %ymm2,%ymm5,%ymm5
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 0-128(%rax),%ymm13
+
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpand %ymm2,%ymm3,%ymm6
+ vpxor 192-128(%rax),%ymm11,%ymm11
+
+ vpaddd %ymm6,%ymm4,%ymm4
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm3,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vmovdqu %ymm10,416-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm11,%ymm9
+ vpand %ymm1,%ymm5,%ymm5
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 32-128(%rax),%ymm14
+
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpand %ymm1,%ymm2,%ymm6
+ vpxor 224-128(%rax),%ymm12,%ymm12
+
+ vpaddd %ymm6,%ymm3,%ymm3
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm2,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vmovdqu %ymm11,448-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm12,%ymm9
+ vpand %ymm0,%ymm5,%ymm5
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 64-128(%rax),%ymm10
+
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpand %ymm0,%ymm1,%ymm6
+ vpxor 256-256-128(%rbx),%ymm13,%ymm13
+
+ vpaddd %ymm6,%ymm2,%ymm2
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm1,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vmovdqu %ymm12,480-256-128(%rbx)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm13,%ymm9
+ vpand %ymm4,%ymm5,%ymm5
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 96-128(%rax),%ymm11
+
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpand %ymm4,%ymm0,%ymm6
+ vpxor 288-256-128(%rbx),%ymm14,%ymm14
+
+ vpaddd %ymm6,%ymm1,%ymm1
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm0,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vmovdqu %ymm13,0-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm14,%ymm9
+ vpand %ymm3,%ymm5,%ymm5
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 128-128(%rax),%ymm12
+
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpand %ymm3,%ymm4,%ymm6
+ vpxor 320-256-128(%rbx),%ymm10,%ymm10
+
+ vpaddd %ymm6,%ymm0,%ymm0
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm4,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vmovdqu %ymm14,32-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm10,%ymm9
+ vpand %ymm2,%ymm5,%ymm5
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 160-128(%rax),%ymm13
+
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpand %ymm2,%ymm3,%ymm6
+ vpxor 352-256-128(%rbx),%ymm11,%ymm11
+
+ vpaddd %ymm6,%ymm4,%ymm4
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm3,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vmovdqu %ymm10,64-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm11,%ymm9
+ vpand %ymm1,%ymm5,%ymm5
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 192-128(%rax),%ymm14
+
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpand %ymm1,%ymm2,%ymm6
+ vpxor 384-256-128(%rbx),%ymm12,%ymm12
+
+ vpaddd %ymm6,%ymm3,%ymm3
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm2,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vmovdqu %ymm11,96-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm12,%ymm9
+ vpand %ymm0,%ymm5,%ymm5
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 224-128(%rax),%ymm10
+
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpand %ymm0,%ymm1,%ymm6
+ vpxor 416-256-128(%rbx),%ymm13,%ymm13
+
+ vpaddd %ymm6,%ymm2,%ymm2
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm1,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vmovdqu %ymm12,128-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm13,%ymm9
+ vpand %ymm4,%ymm5,%ymm5
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 256-256-128(%rbx),%ymm11
+
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpand %ymm4,%ymm0,%ymm6
+ vpxor 448-256-128(%rbx),%ymm14,%ymm14
+
+ vpaddd %ymm6,%ymm1,%ymm1
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm0,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vmovdqu %ymm13,160-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm14,%ymm9
+ vpand %ymm3,%ymm5,%ymm5
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 288-256-128(%rbx),%ymm12
+
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpand %ymm3,%ymm4,%ymm6
+ vpxor 480-256-128(%rbx),%ymm10,%ymm10
+
+ vpaddd %ymm6,%ymm0,%ymm0
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm4,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vmovdqu %ymm14,192-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm10,%ymm9
+ vpand %ymm2,%ymm5,%ymm5
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 320-256-128(%rbx),%ymm13
+
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpslld $5,%ymm0,%ymm7
+ vpand %ymm2,%ymm3,%ymm6
+ vpxor 0-128(%rax),%ymm11,%ymm11
+
+ vpaddd %ymm6,%ymm4,%ymm4
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm3,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vmovdqu %ymm10,224-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm11,%ymm9
+ vpand %ymm1,%ymm5,%ymm5
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpaddd %ymm5,%ymm4,%ymm4
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 352-256-128(%rbx),%ymm14
+
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpslld $5,%ymm4,%ymm7
+ vpand %ymm1,%ymm2,%ymm6
+ vpxor 32-128(%rax),%ymm12,%ymm12
+
+ vpaddd %ymm6,%ymm3,%ymm3
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm2,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vmovdqu %ymm11,256-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm12,%ymm9
+ vpand %ymm0,%ymm5,%ymm5
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpaddd %ymm5,%ymm3,%ymm3
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 384-256-128(%rbx),%ymm10
+
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpslld $5,%ymm3,%ymm7
+ vpand %ymm0,%ymm1,%ymm6
+ vpxor 64-128(%rax),%ymm13,%ymm13
+
+ vpaddd %ymm6,%ymm2,%ymm2
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm1,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vmovdqu %ymm12,288-256-128(%rbx)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm13,%ymm9
+ vpand %ymm4,%ymm5,%ymm5
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpaddd %ymm5,%ymm2,%ymm2
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 416-256-128(%rbx),%ymm11
+
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpslld $5,%ymm2,%ymm7
+ vpand %ymm4,%ymm0,%ymm6
+ vpxor 96-128(%rax),%ymm14,%ymm14
+
+ vpaddd %ymm6,%ymm1,%ymm1
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm0,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vmovdqu %ymm13,320-256-128(%rbx)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm14,%ymm9
+ vpand %ymm3,%ymm5,%ymm5
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpaddd %ymm5,%ymm1,%ymm1
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 448-256-128(%rbx),%ymm12
+
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpslld $5,%ymm1,%ymm7
+ vpand %ymm3,%ymm4,%ymm6
+ vpxor 128-128(%rax),%ymm10,%ymm10
+
+ vpaddd %ymm6,%ymm0,%ymm0
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm4,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vmovdqu %ymm14,352-256-128(%rbx)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpor %ymm8,%ymm7,%ymm7
+ vpsrld $31,%ymm10,%ymm9
+ vpand %ymm2,%ymm5,%ymm5
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vmovdqa 64(%rbp),%ymm15
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 480-256-128(%rbx),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,384-256-128(%rbx)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 160-128(%rax),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 0-128(%rax),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,416-256-128(%rbx)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 192-128(%rax),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 32-128(%rax),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vmovdqa %ymm12,448-256-128(%rbx)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 224-128(%rax),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 64-128(%rax),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vmovdqa %ymm13,480-256-128(%rbx)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 256-256-128(%rbx),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 96-128(%rax),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vmovdqa %ymm14,0-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 288-256-128(%rbx),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 128-128(%rax),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,32-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 320-256-128(%rbx),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 160-128(%rax),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,64-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 352-256-128(%rbx),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 192-128(%rax),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vmovdqa %ymm12,96-128(%rax)
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 384-256-128(%rbx),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 224-128(%rax),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vmovdqa %ymm13,128-128(%rax)
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 416-256-128(%rbx),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 256-256-128(%rbx),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vmovdqa %ymm14,160-128(%rax)
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 448-256-128(%rbx),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 288-256-128(%rbx),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vmovdqa %ymm10,192-128(%rax)
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 480-256-128(%rbx),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 320-256-128(%rbx),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vmovdqa %ymm11,224-128(%rax)
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 0-128(%rax),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 352-256-128(%rbx),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 32-128(%rax),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 384-256-128(%rbx),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 64-128(%rax),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpxor %ymm12,%ymm10,%ymm10
+ vmovdqa 416-256-128(%rbx),%ymm12
+
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor 96-128(%rax),%ymm10,%ymm10
+ vpsrld $27,%ymm1,%ymm8
+ vpxor %ymm3,%ymm5,%ymm5
+ vpxor %ymm12,%ymm10,%ymm10
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpsrld $31,%ymm10,%ymm9
+ vpaddd %ymm10,%ymm10,%ymm10
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm9,%ymm10,%ymm10
+ vpor %ymm6,%ymm2,%ymm2
+ vpxor %ymm13,%ymm11,%ymm11
+ vmovdqa 448-256-128(%rbx),%ymm13
+
+ vpslld $5,%ymm0,%ymm7
+ vpaddd %ymm15,%ymm4,%ymm4
+ vpxor %ymm1,%ymm3,%ymm5
+ vpaddd %ymm10,%ymm4,%ymm4
+ vpxor 128-128(%rax),%ymm11,%ymm11
+ vpsrld $27,%ymm0,%ymm8
+ vpxor %ymm2,%ymm5,%ymm5
+ vpxor %ymm13,%ymm11,%ymm11
+
+ vpslld $30,%ymm1,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpsrld $31,%ymm11,%ymm9
+ vpaddd %ymm11,%ymm11,%ymm11
+
+ vpsrld $2,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm4,%ymm4
+ vpor %ymm9,%ymm11,%ymm11
+ vpor %ymm6,%ymm1,%ymm1
+ vpxor %ymm14,%ymm12,%ymm12
+ vmovdqa 480-256-128(%rbx),%ymm14
+
+ vpslld $5,%ymm4,%ymm7
+ vpaddd %ymm15,%ymm3,%ymm3
+ vpxor %ymm0,%ymm2,%ymm5
+ vpaddd %ymm11,%ymm3,%ymm3
+ vpxor 160-128(%rax),%ymm12,%ymm12
+ vpsrld $27,%ymm4,%ymm8
+ vpxor %ymm1,%ymm5,%ymm5
+ vpxor %ymm14,%ymm12,%ymm12
+
+ vpslld $30,%ymm0,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm3,%ymm3
+ vpsrld $31,%ymm12,%ymm9
+ vpaddd %ymm12,%ymm12,%ymm12
+
+ vpsrld $2,%ymm0,%ymm0
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpor %ymm9,%ymm12,%ymm12
+ vpor %ymm6,%ymm0,%ymm0
+ vpxor %ymm10,%ymm13,%ymm13
+ vmovdqa 0-128(%rax),%ymm10
+
+ vpslld $5,%ymm3,%ymm7
+ vpaddd %ymm15,%ymm2,%ymm2
+ vpxor %ymm4,%ymm1,%ymm5
+ vpaddd %ymm12,%ymm2,%ymm2
+ vpxor 192-128(%rax),%ymm13,%ymm13
+ vpsrld $27,%ymm3,%ymm8
+ vpxor %ymm0,%ymm5,%ymm5
+ vpxor %ymm10,%ymm13,%ymm13
+
+ vpslld $30,%ymm4,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm2,%ymm2
+ vpsrld $31,%ymm13,%ymm9
+ vpaddd %ymm13,%ymm13,%ymm13
+
+ vpsrld $2,%ymm4,%ymm4
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpor %ymm9,%ymm13,%ymm13
+ vpor %ymm6,%ymm4,%ymm4
+ vpxor %ymm11,%ymm14,%ymm14
+ vmovdqa 32-128(%rax),%ymm11
+
+ vpslld $5,%ymm2,%ymm7
+ vpaddd %ymm15,%ymm1,%ymm1
+ vpxor %ymm3,%ymm0,%ymm5
+ vpaddd %ymm13,%ymm1,%ymm1
+ vpxor 224-128(%rax),%ymm14,%ymm14
+ vpsrld $27,%ymm2,%ymm8
+ vpxor %ymm4,%ymm5,%ymm5
+ vpxor %ymm11,%ymm14,%ymm14
+
+ vpslld $30,%ymm3,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpsrld $31,%ymm14,%ymm9
+ vpaddd %ymm14,%ymm14,%ymm14
+
+ vpsrld $2,%ymm3,%ymm3
+ vpaddd %ymm7,%ymm1,%ymm1
+ vpor %ymm9,%ymm14,%ymm14
+ vpor %ymm6,%ymm3,%ymm3
+ vpslld $5,%ymm1,%ymm7
+ vpaddd %ymm15,%ymm0,%ymm0
+ vpxor %ymm2,%ymm4,%ymm5
+
+ vpsrld $27,%ymm1,%ymm8
+ vpaddd %ymm14,%ymm0,%ymm0
+ vpxor %ymm3,%ymm5,%ymm5
+
+ vpslld $30,%ymm2,%ymm6
+ vpor %ymm8,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+
+ vpsrld $2,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm0,%ymm0
+ vpor %ymm6,%ymm2,%ymm2
+ movl $1,%ecx
+ leaq 512(%rsp),%rbx
+ cmpl 0(%rbx),%ecx
+ cmovgeq %rbp,%r12
+ cmpl 4(%rbx),%ecx
+ cmovgeq %rbp,%r13
+ cmpl 8(%rbx),%ecx
+ cmovgeq %rbp,%r14
+ cmpl 12(%rbx),%ecx
+ cmovgeq %rbp,%r15
+ cmpl 16(%rbx),%ecx
+ cmovgeq %rbp,%r8
+ cmpl 20(%rbx),%ecx
+ cmovgeq %rbp,%r9
+ cmpl 24(%rbx),%ecx
+ cmovgeq %rbp,%r10
+ cmpl 28(%rbx),%ecx
+ cmovgeq %rbp,%r11
+ vmovdqu (%rbx),%ymm5
+ vpxor %ymm7,%ymm7,%ymm7
+ vmovdqa %ymm5,%ymm6
+ vpcmpgtd %ymm7,%ymm6,%ymm6
+ vpaddd %ymm6,%ymm5,%ymm5
+
+ vpand %ymm6,%ymm0,%ymm0
+ vpand %ymm6,%ymm1,%ymm1
+ vpaddd 0(%rdi),%ymm0,%ymm0
+ vpand %ymm6,%ymm2,%ymm2
+ vpaddd 32(%rdi),%ymm1,%ymm1
+ vpand %ymm6,%ymm3,%ymm3
+ vpaddd 64(%rdi),%ymm2,%ymm2
+ vpand %ymm6,%ymm4,%ymm4
+ vpaddd 96(%rdi),%ymm3,%ymm3
+ vpaddd 128(%rdi),%ymm4,%ymm4
+ vmovdqu %ymm0,0(%rdi)
+ vmovdqu %ymm1,32(%rdi)
+ vmovdqu %ymm2,64(%rdi)
+ vmovdqu %ymm3,96(%rdi)
+ vmovdqu %ymm4,128(%rdi)
+
+ vmovdqu %ymm5,(%rbx)
+ leaq 256+128(%rsp),%rbx
+ vmovdqu 96(%rbp),%ymm9
+ decl %edx
+ jnz .Loop_avx2
+
+
+
+
+
+
+
+.Ldone_avx2:
+ movq 544(%rsp),%rax
+ vzeroupper
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue_avx2:
+ .byte 0xf3,0xc3
+.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
+
+.align 256
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+K_XX_XX:
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Property changes on: trunk/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S
===================================================================
--- trunk/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S (rev 0)
+++ trunk/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,7905 @@
+/* $MidnightBSD$ */
+/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S 305153 2016-08-31 20:33:59Z jkim $ */
+/* Do not modify. This file is auto-generated from sha256-mb-x86_64.pl. */
+.text
+
+
+
+.globl sha256_multi_block
+.type sha256_multi_block, at function
+.align 32
+sha256_multi_block:
+ movq OPENSSL_ia32cap_P+4(%rip),%rcx
+ btq $61,%rcx
+ jc _shaext_shortcut
+ testl $268435456,%ecx
+ jnz _avx_shortcut
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+.Lbody:
+ leaq K256+128(%rip),%rbp
+ leaq 256(%rsp),%rbx
+ leaq 128(%rdi),%rdi
+
+.Loop_grande:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz .Ldone
+
+ movdqu 0-128(%rdi),%xmm8
+ leaq 128(%rsp),%rax
+ movdqu 32-128(%rdi),%xmm9
+ movdqu 64-128(%rdi),%xmm10
+ movdqu 96-128(%rdi),%xmm11
+ movdqu 128-128(%rdi),%xmm12
+ movdqu 160-128(%rdi),%xmm13
+ movdqu 192-128(%rdi),%xmm14
+ movdqu 224-128(%rdi),%xmm15
+ movdqu .Lpbswap(%rip),%xmm6
+ jmp .Loop
+
+.align 32
+.Loop:
+ movdqa %xmm10,%xmm4
+ pxor %xmm9,%xmm4
+ movd 0(%r8),%xmm5
+ movd 0(%r9),%xmm0
+ movd 0(%r10),%xmm1
+ movd 0(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 4(%r8),%xmm5
+ movd 4(%r9),%xmm0
+ movd 4(%r10),%xmm1
+ movd 4(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm11,%xmm7
+
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,16-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 8(%r8),%xmm5
+ movd 8(%r9),%xmm0
+ movd 8(%r10),%xmm1
+ movd 8(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 12(%r8),%xmm5
+ movd 12(%r9),%xmm0
+ movd 12(%r10),%xmm1
+ movd 12(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm9,%xmm7
+
+ movdqa %xmm9,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,48-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 16(%r8),%xmm5
+ movd 16(%r9),%xmm0
+ movd 16(%r10),%xmm1
+ movd 16(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 20(%r8),%xmm5
+ movd 20(%r9),%xmm0
+ movd 20(%r10),%xmm1
+ movd 20(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm15,%xmm7
+
+ movdqa %xmm15,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,80-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 24(%r8),%xmm5
+ movd 24(%r9),%xmm0
+ movd 24(%r10),%xmm1
+ movd 24(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 28(%r8),%xmm5
+ movd 28(%r9),%xmm0
+ movd 28(%r10),%xmm1
+ movd 28(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm13,%xmm7
+
+ movdqa %xmm13,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,112-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movd 32(%r8),%xmm5
+ movd 32(%r9),%xmm0
+ movd 32(%r10),%xmm1
+ movd 32(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 36(%r8),%xmm5
+ movd 36(%r9),%xmm0
+ movd 36(%r10),%xmm1
+ movd 36(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm11,%xmm7
+
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,144-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 40(%r8),%xmm5
+ movd 40(%r9),%xmm0
+ movd 40(%r10),%xmm1
+ movd 40(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 44(%r8),%xmm5
+ movd 44(%r9),%xmm0
+ movd 44(%r10),%xmm1
+ movd 44(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm9,%xmm7
+
+ movdqa %xmm9,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,176-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 48(%r8),%xmm5
+ movd 48(%r9),%xmm0
+ movd 48(%r10),%xmm1
+ movd 48(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 52(%r8),%xmm5
+ movd 52(%r9),%xmm0
+ movd 52(%r10),%xmm1
+ movd 52(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm15,%xmm7
+
+ movdqa %xmm15,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,208-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 56(%r8),%xmm5
+ movd 56(%r9),%xmm0
+ movd 56(%r10),%xmm1
+ movd 56(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 60(%r8),%xmm5
+ leaq 64(%r8),%r8
+ movd 60(%r9),%xmm0
+ leaq 64(%r9),%r9
+ movd 60(%r10),%xmm1
+ leaq 64(%r10),%r10
+ movd 60(%r11),%xmm2
+ leaq 64(%r11),%r11
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+ movdqa %xmm13,%xmm7
+
+ movdqa %xmm13,%xmm2
+.byte 102,15,56,0,238
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,240-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ prefetcht0 63(%r8)
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ prefetcht0 63(%r9)
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ prefetcht0 63(%r10)
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ prefetcht0 63(%r11)
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqu 0-128(%rax),%xmm5
+ movl $3,%ecx
+ jmp .Loop_16_xx
+.align 32
+.Loop_16_xx:
+ movdqa 16-128(%rax),%xmm6
+ paddd 144-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 224-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+
+ movdqa %xmm12,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 32-128(%rax),%xmm5
+ paddd 160-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 240-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+
+ movdqa %xmm11,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,16-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 48-128(%rax),%xmm6
+ paddd 176-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 0-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+
+ movdqa %xmm10,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 64-128(%rax),%xmm5
+ paddd 192-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 16-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+
+ movdqa %xmm9,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,48-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 80-128(%rax),%xmm6
+ paddd 208-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 32-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+
+ movdqa %xmm8,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 96-128(%rax),%xmm5
+ paddd 224-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 48-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+
+ movdqa %xmm15,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,80-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 112-128(%rax),%xmm6
+ paddd 240-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 64-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+
+ movdqa %xmm14,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 128-128(%rax),%xmm5
+ paddd 0-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 80-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+
+ movdqa %xmm13,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,112-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqa 144-128(%rax),%xmm6
+ paddd 16-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 96-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+
+ movdqa %xmm12,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 160-128(%rax),%xmm5
+ paddd 32-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 112-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+
+ movdqa %xmm11,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,144-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 176-128(%rax),%xmm6
+ paddd 48-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 128-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+
+ movdqa %xmm10,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 192-128(%rax),%xmm5
+ paddd 64-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 144-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+
+ movdqa %xmm9,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,176-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 208-128(%rax),%xmm6
+ paddd 80-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 160-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+
+ movdqa %xmm8,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 224-128(%rax),%xmm5
+ paddd 96-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 176-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+
+ movdqa %xmm15,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,208-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 240-128(%rax),%xmm6
+ paddd 112-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 192-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+
+ movdqa %xmm14,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 0-128(%rax),%xmm5
+ paddd 128-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 208-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+
+ movdqa %xmm13,%xmm2
+
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,240-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ decl %ecx
+ jnz .Loop_16_xx
+
+ movl $1,%ecx
+ leaq K256+128(%rip),%rbp
+
+ movdqa (%rbx),%xmm7
+ cmpl 0(%rbx),%ecx
+ pxor %xmm0,%xmm0
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ movdqa %xmm7,%xmm6
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ pcmpgtd %xmm0,%xmm6
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ paddd %xmm6,%xmm7
+ cmovgeq %rbp,%r11
+
+ movdqu 0-128(%rdi),%xmm0
+ pand %xmm6,%xmm8
+ movdqu 32-128(%rdi),%xmm1
+ pand %xmm6,%xmm9
+ movdqu 64-128(%rdi),%xmm2
+ pand %xmm6,%xmm10
+ movdqu 96-128(%rdi),%xmm5
+ pand %xmm6,%xmm11
+ paddd %xmm0,%xmm8
+ movdqu 128-128(%rdi),%xmm0
+ pand %xmm6,%xmm12
+ paddd %xmm1,%xmm9
+ movdqu 160-128(%rdi),%xmm1
+ pand %xmm6,%xmm13
+ paddd %xmm2,%xmm10
+ movdqu 192-128(%rdi),%xmm2
+ pand %xmm6,%xmm14
+ paddd %xmm5,%xmm11
+ movdqu 224-128(%rdi),%xmm5
+ pand %xmm6,%xmm15
+ paddd %xmm0,%xmm12
+ paddd %xmm1,%xmm13
+ movdqu %xmm8,0-128(%rdi)
+ paddd %xmm2,%xmm14
+ movdqu %xmm9,32-128(%rdi)
+ paddd %xmm5,%xmm15
+ movdqu %xmm10,64-128(%rdi)
+ movdqu %xmm11,96-128(%rdi)
+ movdqu %xmm12,128-128(%rdi)
+ movdqu %xmm13,160-128(%rdi)
+ movdqu %xmm14,192-128(%rdi)
+ movdqu %xmm15,224-128(%rdi)
+
+ movdqa %xmm7,(%rbx)
+ movdqa .Lpbswap(%rip),%xmm6
+ decl %edx
+ jnz .Loop
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande
+
+.Ldone:
+ movq 272(%rsp),%rax
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha256_multi_block,.-sha256_multi_block
+.type sha256_multi_block_shaext, at function
+.align 32
+sha256_multi_block_shaext:
+_shaext_shortcut:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ shll $1,%edx
+ andq $-256,%rsp
+ leaq 128(%rdi),%rdi
+ movq %rax,272(%rsp)
+.Lbody_shaext:
+ leaq 256(%rsp),%rbx
+ leaq K256_shaext+128(%rip),%rbp
+
+.Loop_grande_shaext:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rsp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rsp,%r9
+ testl %edx,%edx
+ jz .Ldone_shaext
+
+ movq 0-128(%rdi),%xmm12
+ movq 32-128(%rdi),%xmm4
+ movq 64-128(%rdi),%xmm13
+ movq 96-128(%rdi),%xmm5
+ movq 128-128(%rdi),%xmm8
+ movq 160-128(%rdi),%xmm9
+ movq 192-128(%rdi),%xmm10
+ movq 224-128(%rdi),%xmm11
+
+ punpckldq %xmm4,%xmm12
+ punpckldq %xmm5,%xmm13
+ punpckldq %xmm9,%xmm8
+ punpckldq %xmm11,%xmm10
+ movdqa K256_shaext-16(%rip),%xmm3
+
+ movdqa %xmm12,%xmm14
+ movdqa %xmm13,%xmm15
+ punpcklqdq %xmm8,%xmm12
+ punpcklqdq %xmm10,%xmm13
+ punpckhqdq %xmm8,%xmm14
+ punpckhqdq %xmm10,%xmm15
+
+ pshufd $27,%xmm12,%xmm12
+ pshufd $27,%xmm13,%xmm13
+ pshufd $27,%xmm14,%xmm14
+ pshufd $27,%xmm15,%xmm15
+ jmp .Loop_shaext
+
+.align 32
+.Loop_shaext:
+ movdqu 0(%r8),%xmm4
+ movdqu 0(%r9),%xmm8
+ movdqu 16(%r8),%xmm5
+ movdqu 16(%r9),%xmm9
+ movdqu 32(%r8),%xmm6
+.byte 102,15,56,0,227
+ movdqu 32(%r9),%xmm10
+.byte 102,68,15,56,0,195
+ movdqu 48(%r8),%xmm7
+ leaq 64(%r8),%r8
+ movdqu 48(%r9),%xmm11
+ leaq 64(%r9),%r9
+
+ movdqa 0-128(%rbp),%xmm0
+.byte 102,15,56,0,235
+ paddd %xmm4,%xmm0
+ pxor %xmm12,%xmm4
+ movdqa %xmm0,%xmm1
+ movdqa 0-128(%rbp),%xmm2
+.byte 102,68,15,56,0,203
+ paddd %xmm8,%xmm2
+ movdqa %xmm13,80(%rsp)
+.byte 69,15,56,203,236
+ pxor %xmm14,%xmm8
+ movdqa %xmm2,%xmm0
+ movdqa %xmm15,112(%rsp)
+.byte 69,15,56,203,254
+ pshufd $0x0e,%xmm1,%xmm0
+ pxor %xmm12,%xmm4
+ movdqa %xmm12,64(%rsp)
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ pxor %xmm14,%xmm8
+ movdqa %xmm14,96(%rsp)
+ movdqa 16-128(%rbp),%xmm1
+ paddd %xmm5,%xmm1
+.byte 102,15,56,0,243
+.byte 69,15,56,203,247
+
+ movdqa %xmm1,%xmm0
+ movdqa 16-128(%rbp),%xmm2
+ paddd %xmm9,%xmm2
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ prefetcht0 127(%r8)
+.byte 102,15,56,0,251
+.byte 102,68,15,56,0,211
+ prefetcht0 127(%r9)
+.byte 69,15,56,203,254
+ pshufd $0x0e,%xmm1,%xmm0
+.byte 102,68,15,56,0,219
+.byte 15,56,204,229
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 32-128(%rbp),%xmm1
+ paddd %xmm6,%xmm1
+.byte 69,15,56,203,247
+
+ movdqa %xmm1,%xmm0
+ movdqa 32-128(%rbp),%xmm2
+ paddd %xmm10,%xmm2
+.byte 69,15,56,203,236
+.byte 69,15,56,204,193
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm3
+.byte 69,15,56,203,254
+ pshufd $0x0e,%xmm1,%xmm0
+.byte 102,15,58,15,222,4
+ paddd %xmm3,%xmm4
+ movdqa %xmm11,%xmm3
+.byte 102,65,15,58,15,218,4
+.byte 15,56,204,238
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 48-128(%rbp),%xmm1
+ paddd %xmm7,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,202
+
+ movdqa %xmm1,%xmm0
+ movdqa 48-128(%rbp),%xmm2
+ paddd %xmm3,%xmm8
+ paddd %xmm11,%xmm2
+.byte 15,56,205,231
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,223,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,195
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm5
+ movdqa %xmm8,%xmm3
+.byte 102,65,15,58,15,219,4
+.byte 15,56,204,247
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 64-128(%rbp),%xmm1
+ paddd %xmm4,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,211
+ movdqa %xmm1,%xmm0
+ movdqa 64-128(%rbp),%xmm2
+ paddd %xmm3,%xmm9
+ paddd %xmm8,%xmm2
+.byte 15,56,205,236
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm5,%xmm3
+.byte 102,15,58,15,220,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,200
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm6
+ movdqa %xmm9,%xmm3
+.byte 102,65,15,58,15,216,4
+.byte 15,56,204,252
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 80-128(%rbp),%xmm1
+ paddd %xmm5,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,216
+ movdqa %xmm1,%xmm0
+ movdqa 80-128(%rbp),%xmm2
+ paddd %xmm3,%xmm10
+ paddd %xmm9,%xmm2
+.byte 15,56,205,245
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+.byte 102,15,58,15,221,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,209
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm7
+ movdqa %xmm10,%xmm3
+.byte 102,65,15,58,15,217,4
+.byte 15,56,204,229
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 96-128(%rbp),%xmm1
+ paddd %xmm6,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,193
+ movdqa %xmm1,%xmm0
+ movdqa 96-128(%rbp),%xmm2
+ paddd %xmm3,%xmm11
+ paddd %xmm10,%xmm2
+.byte 15,56,205,254
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm3
+.byte 102,15,58,15,222,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,218
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm4
+ movdqa %xmm11,%xmm3
+.byte 102,65,15,58,15,218,4
+.byte 15,56,204,238
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 112-128(%rbp),%xmm1
+ paddd %xmm7,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,202
+ movdqa %xmm1,%xmm0
+ movdqa 112-128(%rbp),%xmm2
+ paddd %xmm3,%xmm8
+ paddd %xmm11,%xmm2
+.byte 15,56,205,231
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,223,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,195
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm5
+ movdqa %xmm8,%xmm3
+.byte 102,65,15,58,15,219,4
+.byte 15,56,204,247
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 128-128(%rbp),%xmm1
+ paddd %xmm4,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,211
+ movdqa %xmm1,%xmm0
+ movdqa 128-128(%rbp),%xmm2
+ paddd %xmm3,%xmm9
+ paddd %xmm8,%xmm2
+.byte 15,56,205,236
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm5,%xmm3
+.byte 102,15,58,15,220,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,200
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm6
+ movdqa %xmm9,%xmm3
+.byte 102,65,15,58,15,216,4
+.byte 15,56,204,252
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 144-128(%rbp),%xmm1
+ paddd %xmm5,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,216
+ movdqa %xmm1,%xmm0
+ movdqa 144-128(%rbp),%xmm2
+ paddd %xmm3,%xmm10
+ paddd %xmm9,%xmm2
+.byte 15,56,205,245
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+.byte 102,15,58,15,221,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,209
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm7
+ movdqa %xmm10,%xmm3
+.byte 102,65,15,58,15,217,4
+.byte 15,56,204,229
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 160-128(%rbp),%xmm1
+ paddd %xmm6,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,193
+ movdqa %xmm1,%xmm0
+ movdqa 160-128(%rbp),%xmm2
+ paddd %xmm3,%xmm11
+ paddd %xmm10,%xmm2
+.byte 15,56,205,254
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm3
+.byte 102,15,58,15,222,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,218
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm4
+ movdqa %xmm11,%xmm3
+.byte 102,65,15,58,15,218,4
+.byte 15,56,204,238
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 176-128(%rbp),%xmm1
+ paddd %xmm7,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,202
+ movdqa %xmm1,%xmm0
+ movdqa 176-128(%rbp),%xmm2
+ paddd %xmm3,%xmm8
+ paddd %xmm11,%xmm2
+.byte 15,56,205,231
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,223,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,195
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm5
+ movdqa %xmm8,%xmm3
+.byte 102,65,15,58,15,219,4
+.byte 15,56,204,247
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 192-128(%rbp),%xmm1
+ paddd %xmm4,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,211
+ movdqa %xmm1,%xmm0
+ movdqa 192-128(%rbp),%xmm2
+ paddd %xmm3,%xmm9
+ paddd %xmm8,%xmm2
+.byte 15,56,205,236
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm5,%xmm3
+.byte 102,15,58,15,220,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,200
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm6
+ movdqa %xmm9,%xmm3
+.byte 102,65,15,58,15,216,4
+.byte 15,56,204,252
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 208-128(%rbp),%xmm1
+ paddd %xmm5,%xmm1
+.byte 69,15,56,203,247
+.byte 69,15,56,204,216
+ movdqa %xmm1,%xmm0
+ movdqa 208-128(%rbp),%xmm2
+ paddd %xmm3,%xmm10
+ paddd %xmm9,%xmm2
+.byte 15,56,205,245
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movdqa %xmm6,%xmm3
+.byte 102,15,58,15,221,4
+.byte 69,15,56,203,254
+.byte 69,15,56,205,209
+ pshufd $0x0e,%xmm1,%xmm0
+ paddd %xmm3,%xmm7
+ movdqa %xmm10,%xmm3
+.byte 102,65,15,58,15,217,4
+ nop
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 224-128(%rbp),%xmm1
+ paddd %xmm6,%xmm1
+.byte 69,15,56,203,247
+
+ movdqa %xmm1,%xmm0
+ movdqa 224-128(%rbp),%xmm2
+ paddd %xmm3,%xmm11
+ paddd %xmm10,%xmm2
+.byte 15,56,205,254
+ nop
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ movl $1,%ecx
+ pxor %xmm6,%xmm6
+.byte 69,15,56,203,254
+.byte 69,15,56,205,218
+ pshufd $0x0e,%xmm1,%xmm0
+ movdqa 240-128(%rbp),%xmm1
+ paddd %xmm7,%xmm1
+ movq (%rbx),%xmm7
+ nop
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ movdqa 240-128(%rbp),%xmm2
+ paddd %xmm11,%xmm2
+.byte 69,15,56,203,247
+
+ movdqa %xmm1,%xmm0
+ cmpl 0(%rbx),%ecx
+ cmovgeq %rsp,%r8
+ cmpl 4(%rbx),%ecx
+ cmovgeq %rsp,%r9
+ pshufd $0x00,%xmm7,%xmm9
+.byte 69,15,56,203,236
+ movdqa %xmm2,%xmm0
+ pshufd $0x55,%xmm7,%xmm10
+ movdqa %xmm7,%xmm11
+.byte 69,15,56,203,254
+ pshufd $0x0e,%xmm1,%xmm0
+ pcmpgtd %xmm6,%xmm9
+ pcmpgtd %xmm6,%xmm10
+.byte 69,15,56,203,229
+ pshufd $0x0e,%xmm2,%xmm0
+ pcmpgtd %xmm6,%xmm11
+ movdqa K256_shaext-16(%rip),%xmm3
+.byte 69,15,56,203,247
+
+ pand %xmm9,%xmm13
+ pand %xmm10,%xmm15
+ pand %xmm9,%xmm12
+ pand %xmm10,%xmm14
+ paddd %xmm7,%xmm11
+
+ paddd 80(%rsp),%xmm13
+ paddd 112(%rsp),%xmm15
+ paddd 64(%rsp),%xmm12
+ paddd 96(%rsp),%xmm14
+
+ movq %xmm11,(%rbx)
+ decl %edx
+ jnz .Loop_shaext
+
+ movl 280(%rsp),%edx
+
+ pshufd $27,%xmm12,%xmm12
+ pshufd $27,%xmm13,%xmm13
+ pshufd $27,%xmm14,%xmm14
+ pshufd $27,%xmm15,%xmm15
+
+ movdqa %xmm12,%xmm5
+ movdqa %xmm13,%xmm6
+ punpckldq %xmm14,%xmm12
+ punpckhdq %xmm14,%xmm5
+ punpckldq %xmm15,%xmm13
+ punpckhdq %xmm15,%xmm6
+
+ movq %xmm12,0-128(%rdi)
+ psrldq $8,%xmm12
+ movq %xmm5,128-128(%rdi)
+ psrldq $8,%xmm5
+ movq %xmm12,32-128(%rdi)
+ movq %xmm5,160-128(%rdi)
+
+ movq %xmm13,64-128(%rdi)
+ psrldq $8,%xmm13
+ movq %xmm6,192-128(%rdi)
+ psrldq $8,%xmm6
+ movq %xmm13,96-128(%rdi)
+ movq %xmm6,224-128(%rdi)
+
+ leaq 8(%rdi),%rdi
+ leaq 32(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande_shaext
+
+.Ldone_shaext:
+
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue_shaext:
+ .byte 0xf3,0xc3
+.size sha256_multi_block_shaext,.-sha256_multi_block_shaext
+.type sha256_multi_block_avx, at function
+.align 32
+sha256_multi_block_avx:
+_avx_shortcut:
+ shrq $32,%rcx
+ cmpl $2,%edx
+ jb .Lavx
+ testl $32,%ecx
+ jnz _avx2_shortcut
+ jmp .Lavx
+.align 32
+.Lavx:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+.Lbody_avx:
+ leaq K256+128(%rip),%rbp
+ leaq 256(%rsp),%rbx
+ leaq 128(%rdi),%rdi
+
+.Loop_grande_avx:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz .Ldone_avx
+
+ vmovdqu 0-128(%rdi),%xmm8
+ leaq 128(%rsp),%rax
+ vmovdqu 32-128(%rdi),%xmm9
+ vmovdqu 64-128(%rdi),%xmm10
+ vmovdqu 96-128(%rdi),%xmm11
+ vmovdqu 128-128(%rdi),%xmm12
+ vmovdqu 160-128(%rdi),%xmm13
+ vmovdqu 192-128(%rdi),%xmm14
+ vmovdqu 224-128(%rdi),%xmm15
+ vmovdqu .Lpbswap(%rip),%xmm6
+ jmp .Loop_avx
+
+.align 32
+.Loop_avx:
+ vpxor %xmm9,%xmm10,%xmm4
+ vmovd 0(%r8),%xmm5
+ vmovd 0(%r9),%xmm0
+ vpinsrd $1,0(%r10),%xmm5,%xmm5
+ vpinsrd $1,0(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm12,%xmm7
+ vpslld $26,%xmm12,%xmm2
+ vmovdqu %xmm5,0-128(%rax)
+ vpaddd %xmm15,%xmm5,%xmm5
+
+ vpsrld $11,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm12,%xmm2
+ vpaddd -128(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm12,%xmm2
+ vpandn %xmm14,%xmm12,%xmm0
+ vpand %xmm13,%xmm12,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm8,%xmm15
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm8,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm8,%xmm9,%xmm3
+
+ vpxor %xmm1,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm8,%xmm1
+
+ vpslld $19,%xmm8,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm15,%xmm7
+
+ vpsrld $22,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm8,%xmm2
+ vpxor %xmm4,%xmm9,%xmm15
+ vpaddd %xmm5,%xmm11,%xmm11
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm15,%xmm15
+ vmovd 4(%r8),%xmm5
+ vmovd 4(%r9),%xmm0
+ vpinsrd $1,4(%r10),%xmm5,%xmm5
+ vpinsrd $1,4(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm11,%xmm7
+ vpslld $26,%xmm11,%xmm2
+ vmovdqu %xmm5,16-128(%rax)
+ vpaddd %xmm14,%xmm5,%xmm5
+
+ vpsrld $11,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm11,%xmm2
+ vpaddd -96(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm11,%xmm2
+ vpandn %xmm13,%xmm11,%xmm0
+ vpand %xmm12,%xmm11,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm15,%xmm14
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm15,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm15,%xmm8,%xmm4
+
+ vpxor %xmm1,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm15,%xmm1
+
+ vpslld $19,%xmm15,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm14,%xmm7
+
+ vpsrld $22,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm15,%xmm2
+ vpxor %xmm3,%xmm8,%xmm14
+ vpaddd %xmm5,%xmm10,%xmm10
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm14,%xmm14
+ vmovd 8(%r8),%xmm5
+ vmovd 8(%r9),%xmm0
+ vpinsrd $1,8(%r10),%xmm5,%xmm5
+ vpinsrd $1,8(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm10,%xmm7
+ vpslld $26,%xmm10,%xmm2
+ vmovdqu %xmm5,32-128(%rax)
+ vpaddd %xmm13,%xmm5,%xmm5
+
+ vpsrld $11,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm10,%xmm2
+ vpaddd -64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm10,%xmm2
+ vpandn %xmm12,%xmm10,%xmm0
+ vpand %xmm11,%xmm10,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm14,%xmm13
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm14,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm14,%xmm15,%xmm3
+
+ vpxor %xmm1,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm14,%xmm1
+
+ vpslld $19,%xmm14,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm13,%xmm7
+
+ vpsrld $22,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm14,%xmm2
+ vpxor %xmm4,%xmm15,%xmm13
+ vpaddd %xmm5,%xmm9,%xmm9
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm13,%xmm13
+ vmovd 12(%r8),%xmm5
+ vmovd 12(%r9),%xmm0
+ vpinsrd $1,12(%r10),%xmm5,%xmm5
+ vpinsrd $1,12(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm9,%xmm7
+ vpslld $26,%xmm9,%xmm2
+ vmovdqu %xmm5,48-128(%rax)
+ vpaddd %xmm12,%xmm5,%xmm5
+
+ vpsrld $11,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm9,%xmm2
+ vpaddd -32(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm9,%xmm2
+ vpandn %xmm11,%xmm9,%xmm0
+ vpand %xmm10,%xmm9,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm13,%xmm12
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm13,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm13,%xmm14,%xmm4
+
+ vpxor %xmm1,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm13,%xmm1
+
+ vpslld $19,%xmm13,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm12,%xmm7
+
+ vpsrld $22,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm13,%xmm2
+ vpxor %xmm3,%xmm14,%xmm12
+ vpaddd %xmm5,%xmm8,%xmm8
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm12,%xmm12
+ vmovd 16(%r8),%xmm5
+ vmovd 16(%r9),%xmm0
+ vpinsrd $1,16(%r10),%xmm5,%xmm5
+ vpinsrd $1,16(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm8,%xmm7
+ vpslld $26,%xmm8,%xmm2
+ vmovdqu %xmm5,64-128(%rax)
+ vpaddd %xmm11,%xmm5,%xmm5
+
+ vpsrld $11,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm8,%xmm2
+ vpaddd 0(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm8,%xmm2
+ vpandn %xmm10,%xmm8,%xmm0
+ vpand %xmm9,%xmm8,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm12,%xmm11
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm12,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm12,%xmm13,%xmm3
+
+ vpxor %xmm1,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm12,%xmm1
+
+ vpslld $19,%xmm12,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm11,%xmm7
+
+ vpsrld $22,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm12,%xmm2
+ vpxor %xmm4,%xmm13,%xmm11
+ vpaddd %xmm5,%xmm15,%xmm15
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm11,%xmm11
+ vmovd 20(%r8),%xmm5
+ vmovd 20(%r9),%xmm0
+ vpinsrd $1,20(%r10),%xmm5,%xmm5
+ vpinsrd $1,20(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm15,%xmm7
+ vpslld $26,%xmm15,%xmm2
+ vmovdqu %xmm5,80-128(%rax)
+ vpaddd %xmm10,%xmm5,%xmm5
+
+ vpsrld $11,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm15,%xmm2
+ vpaddd 32(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm15,%xmm2
+ vpandn %xmm9,%xmm15,%xmm0
+ vpand %xmm8,%xmm15,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm11,%xmm10
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm11,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm11,%xmm12,%xmm4
+
+ vpxor %xmm1,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm11,%xmm1
+
+ vpslld $19,%xmm11,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm10,%xmm7
+
+ vpsrld $22,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm11,%xmm2
+ vpxor %xmm3,%xmm12,%xmm10
+ vpaddd %xmm5,%xmm14,%xmm14
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm10,%xmm10
+ vmovd 24(%r8),%xmm5
+ vmovd 24(%r9),%xmm0
+ vpinsrd $1,24(%r10),%xmm5,%xmm5
+ vpinsrd $1,24(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm14,%xmm7
+ vpslld $26,%xmm14,%xmm2
+ vmovdqu %xmm5,96-128(%rax)
+ vpaddd %xmm9,%xmm5,%xmm5
+
+ vpsrld $11,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm14,%xmm2
+ vpaddd 64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm14,%xmm2
+ vpandn %xmm8,%xmm14,%xmm0
+ vpand %xmm15,%xmm14,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm10,%xmm9
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm10,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm10,%xmm11,%xmm3
+
+ vpxor %xmm1,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm10,%xmm1
+
+ vpslld $19,%xmm10,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm9,%xmm7
+
+ vpsrld $22,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm10,%xmm2
+ vpxor %xmm4,%xmm11,%xmm9
+ vpaddd %xmm5,%xmm13,%xmm13
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm9,%xmm9
+ vmovd 28(%r8),%xmm5
+ vmovd 28(%r9),%xmm0
+ vpinsrd $1,28(%r10),%xmm5,%xmm5
+ vpinsrd $1,28(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm13,%xmm7
+ vpslld $26,%xmm13,%xmm2
+ vmovdqu %xmm5,112-128(%rax)
+ vpaddd %xmm8,%xmm5,%xmm5
+
+ vpsrld $11,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm13,%xmm2
+ vpaddd 96(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm13,%xmm2
+ vpandn %xmm15,%xmm13,%xmm0
+ vpand %xmm14,%xmm13,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm9,%xmm8
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm9,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm9,%xmm10,%xmm4
+
+ vpxor %xmm1,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm9,%xmm1
+
+ vpslld $19,%xmm9,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm8,%xmm7
+
+ vpsrld $22,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm9,%xmm2
+ vpxor %xmm3,%xmm10,%xmm8
+ vpaddd %xmm5,%xmm12,%xmm12
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm8,%xmm8
+ addq $256,%rbp
+ vmovd 32(%r8),%xmm5
+ vmovd 32(%r9),%xmm0
+ vpinsrd $1,32(%r10),%xmm5,%xmm5
+ vpinsrd $1,32(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm12,%xmm7
+ vpslld $26,%xmm12,%xmm2
+ vmovdqu %xmm5,128-128(%rax)
+ vpaddd %xmm15,%xmm5,%xmm5
+
+ vpsrld $11,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm12,%xmm2
+ vpaddd -128(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm12,%xmm2
+ vpandn %xmm14,%xmm12,%xmm0
+ vpand %xmm13,%xmm12,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm8,%xmm15
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm8,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm8,%xmm9,%xmm3
+
+ vpxor %xmm1,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm8,%xmm1
+
+ vpslld $19,%xmm8,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm15,%xmm7
+
+ vpsrld $22,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm8,%xmm2
+ vpxor %xmm4,%xmm9,%xmm15
+ vpaddd %xmm5,%xmm11,%xmm11
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm15,%xmm15
+ vmovd 36(%r8),%xmm5
+ vmovd 36(%r9),%xmm0
+ vpinsrd $1,36(%r10),%xmm5,%xmm5
+ vpinsrd $1,36(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm11,%xmm7
+ vpslld $26,%xmm11,%xmm2
+ vmovdqu %xmm5,144-128(%rax)
+ vpaddd %xmm14,%xmm5,%xmm5
+
+ vpsrld $11,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm11,%xmm2
+ vpaddd -96(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm11,%xmm2
+ vpandn %xmm13,%xmm11,%xmm0
+ vpand %xmm12,%xmm11,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm15,%xmm14
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm15,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm15,%xmm8,%xmm4
+
+ vpxor %xmm1,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm15,%xmm1
+
+ vpslld $19,%xmm15,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm14,%xmm7
+
+ vpsrld $22,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm15,%xmm2
+ vpxor %xmm3,%xmm8,%xmm14
+ vpaddd %xmm5,%xmm10,%xmm10
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm14,%xmm14
+ vmovd 40(%r8),%xmm5
+ vmovd 40(%r9),%xmm0
+ vpinsrd $1,40(%r10),%xmm5,%xmm5
+ vpinsrd $1,40(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm10,%xmm7
+ vpslld $26,%xmm10,%xmm2
+ vmovdqu %xmm5,160-128(%rax)
+ vpaddd %xmm13,%xmm5,%xmm5
+
+ vpsrld $11,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm10,%xmm2
+ vpaddd -64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm10,%xmm2
+ vpandn %xmm12,%xmm10,%xmm0
+ vpand %xmm11,%xmm10,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm14,%xmm13
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm14,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm14,%xmm15,%xmm3
+
+ vpxor %xmm1,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm14,%xmm1
+
+ vpslld $19,%xmm14,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm13,%xmm7
+
+ vpsrld $22,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm14,%xmm2
+ vpxor %xmm4,%xmm15,%xmm13
+ vpaddd %xmm5,%xmm9,%xmm9
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm13,%xmm13
+ vmovd 44(%r8),%xmm5
+ vmovd 44(%r9),%xmm0
+ vpinsrd $1,44(%r10),%xmm5,%xmm5
+ vpinsrd $1,44(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm9,%xmm7
+ vpslld $26,%xmm9,%xmm2
+ vmovdqu %xmm5,176-128(%rax)
+ vpaddd %xmm12,%xmm5,%xmm5
+
+ vpsrld $11,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm9,%xmm2
+ vpaddd -32(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm9,%xmm2
+ vpandn %xmm11,%xmm9,%xmm0
+ vpand %xmm10,%xmm9,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm13,%xmm12
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm13,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm13,%xmm14,%xmm4
+
+ vpxor %xmm1,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm13,%xmm1
+
+ vpslld $19,%xmm13,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm12,%xmm7
+
+ vpsrld $22,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm13,%xmm2
+ vpxor %xmm3,%xmm14,%xmm12
+ vpaddd %xmm5,%xmm8,%xmm8
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm12,%xmm12
+ vmovd 48(%r8),%xmm5
+ vmovd 48(%r9),%xmm0
+ vpinsrd $1,48(%r10),%xmm5,%xmm5
+ vpinsrd $1,48(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm8,%xmm7
+ vpslld $26,%xmm8,%xmm2
+ vmovdqu %xmm5,192-128(%rax)
+ vpaddd %xmm11,%xmm5,%xmm5
+
+ vpsrld $11,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm8,%xmm2
+ vpaddd 0(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm8,%xmm2
+ vpandn %xmm10,%xmm8,%xmm0
+ vpand %xmm9,%xmm8,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm12,%xmm11
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm12,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm12,%xmm13,%xmm3
+
+ vpxor %xmm1,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm12,%xmm1
+
+ vpslld $19,%xmm12,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm11,%xmm7
+
+ vpsrld $22,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm12,%xmm2
+ vpxor %xmm4,%xmm13,%xmm11
+ vpaddd %xmm5,%xmm15,%xmm15
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm11,%xmm11
+ vmovd 52(%r8),%xmm5
+ vmovd 52(%r9),%xmm0
+ vpinsrd $1,52(%r10),%xmm5,%xmm5
+ vpinsrd $1,52(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm15,%xmm7
+ vpslld $26,%xmm15,%xmm2
+ vmovdqu %xmm5,208-128(%rax)
+ vpaddd %xmm10,%xmm5,%xmm5
+
+ vpsrld $11,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm15,%xmm2
+ vpaddd 32(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm15,%xmm2
+ vpandn %xmm9,%xmm15,%xmm0
+ vpand %xmm8,%xmm15,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm11,%xmm10
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm11,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm11,%xmm12,%xmm4
+
+ vpxor %xmm1,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm11,%xmm1
+
+ vpslld $19,%xmm11,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm10,%xmm7
+
+ vpsrld $22,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm11,%xmm2
+ vpxor %xmm3,%xmm12,%xmm10
+ vpaddd %xmm5,%xmm14,%xmm14
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm10,%xmm10
+ vmovd 56(%r8),%xmm5
+ vmovd 56(%r9),%xmm0
+ vpinsrd $1,56(%r10),%xmm5,%xmm5
+ vpinsrd $1,56(%r11),%xmm0,%xmm0
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm14,%xmm7
+ vpslld $26,%xmm14,%xmm2
+ vmovdqu %xmm5,224-128(%rax)
+ vpaddd %xmm9,%xmm5,%xmm5
+
+ vpsrld $11,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm14,%xmm2
+ vpaddd 64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm14,%xmm2
+ vpandn %xmm8,%xmm14,%xmm0
+ vpand %xmm15,%xmm14,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm10,%xmm9
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm10,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm10,%xmm11,%xmm3
+
+ vpxor %xmm1,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm10,%xmm1
+
+ vpslld $19,%xmm10,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm9,%xmm7
+
+ vpsrld $22,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm10,%xmm2
+ vpxor %xmm4,%xmm11,%xmm9
+ vpaddd %xmm5,%xmm13,%xmm13
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm9,%xmm9
+ vmovd 60(%r8),%xmm5
+ leaq 64(%r8),%r8
+ vmovd 60(%r9),%xmm0
+ leaq 64(%r9),%r9
+ vpinsrd $1,60(%r10),%xmm5,%xmm5
+ leaq 64(%r10),%r10
+ vpinsrd $1,60(%r11),%xmm0,%xmm0
+ leaq 64(%r11),%r11
+ vpunpckldq %xmm0,%xmm5,%xmm5
+ vpshufb %xmm6,%xmm5,%xmm5
+ vpsrld $6,%xmm13,%xmm7
+ vpslld $26,%xmm13,%xmm2
+ vmovdqu %xmm5,240-128(%rax)
+ vpaddd %xmm8,%xmm5,%xmm5
+
+ vpsrld $11,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm13,%xmm2
+ vpaddd 96(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ prefetcht0 63(%r8)
+ vpslld $7,%xmm13,%xmm2
+ vpandn %xmm15,%xmm13,%xmm0
+ vpand %xmm14,%xmm13,%xmm4
+ prefetcht0 63(%r9)
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm9,%xmm8
+ vpxor %xmm2,%xmm7,%xmm7
+ prefetcht0 63(%r10)
+ vpslld $30,%xmm9,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm9,%xmm10,%xmm4
+ prefetcht0 63(%r11)
+ vpxor %xmm1,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm9,%xmm1
+
+ vpslld $19,%xmm9,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm8,%xmm7
+
+ vpsrld $22,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm9,%xmm2
+ vpxor %xmm3,%xmm10,%xmm8
+ vpaddd %xmm5,%xmm12,%xmm12
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm8,%xmm8
+ addq $256,%rbp
+ vmovdqu 0-128(%rax),%xmm5
+ movl $3,%ecx
+ jmp .Loop_16_xx_avx
+.align 32
+.Loop_16_xx_avx:
+ vmovdqu 16-128(%rax),%xmm6
+ vpaddd 144-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 224-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm12,%xmm7
+ vpslld $26,%xmm12,%xmm2
+ vmovdqu %xmm5,0-128(%rax)
+ vpaddd %xmm15,%xmm5,%xmm5
+
+ vpsrld $11,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm12,%xmm2
+ vpaddd -128(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm12,%xmm2
+ vpandn %xmm14,%xmm12,%xmm0
+ vpand %xmm13,%xmm12,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm8,%xmm15
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm8,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm8,%xmm9,%xmm3
+
+ vpxor %xmm1,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm8,%xmm1
+
+ vpslld $19,%xmm8,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm15,%xmm7
+
+ vpsrld $22,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm8,%xmm2
+ vpxor %xmm4,%xmm9,%xmm15
+ vpaddd %xmm5,%xmm11,%xmm11
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm15,%xmm15
+ vmovdqu 32-128(%rax),%xmm5
+ vpaddd 160-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 240-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm11,%xmm7
+ vpslld $26,%xmm11,%xmm2
+ vmovdqu %xmm6,16-128(%rax)
+ vpaddd %xmm14,%xmm6,%xmm6
+
+ vpsrld $11,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm11,%xmm2
+ vpaddd -96(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm11,%xmm2
+ vpandn %xmm13,%xmm11,%xmm0
+ vpand %xmm12,%xmm11,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm15,%xmm14
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm15,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm15,%xmm8,%xmm4
+
+ vpxor %xmm1,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm15,%xmm1
+
+ vpslld $19,%xmm15,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm14,%xmm7
+
+ vpsrld $22,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm15,%xmm2
+ vpxor %xmm3,%xmm8,%xmm14
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm14,%xmm14
+ vmovdqu 48-128(%rax),%xmm6
+ vpaddd 176-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 0-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm10,%xmm7
+ vpslld $26,%xmm10,%xmm2
+ vmovdqu %xmm5,32-128(%rax)
+ vpaddd %xmm13,%xmm5,%xmm5
+
+ vpsrld $11,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm10,%xmm2
+ vpaddd -64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm10,%xmm2
+ vpandn %xmm12,%xmm10,%xmm0
+ vpand %xmm11,%xmm10,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm14,%xmm13
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm14,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm14,%xmm15,%xmm3
+
+ vpxor %xmm1,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm14,%xmm1
+
+ vpslld $19,%xmm14,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm13,%xmm7
+
+ vpsrld $22,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm14,%xmm2
+ vpxor %xmm4,%xmm15,%xmm13
+ vpaddd %xmm5,%xmm9,%xmm9
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm13,%xmm13
+ vmovdqu 64-128(%rax),%xmm5
+ vpaddd 192-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 16-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm9,%xmm7
+ vpslld $26,%xmm9,%xmm2
+ vmovdqu %xmm6,48-128(%rax)
+ vpaddd %xmm12,%xmm6,%xmm6
+
+ vpsrld $11,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm9,%xmm2
+ vpaddd -32(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm9,%xmm2
+ vpandn %xmm11,%xmm9,%xmm0
+ vpand %xmm10,%xmm9,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm13,%xmm12
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm13,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm13,%xmm14,%xmm4
+
+ vpxor %xmm1,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm13,%xmm1
+
+ vpslld $19,%xmm13,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm12,%xmm7
+
+ vpsrld $22,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm13,%xmm2
+ vpxor %xmm3,%xmm14,%xmm12
+ vpaddd %xmm6,%xmm8,%xmm8
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm12,%xmm12
+ vmovdqu 80-128(%rax),%xmm6
+ vpaddd 208-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 32-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm8,%xmm7
+ vpslld $26,%xmm8,%xmm2
+ vmovdqu %xmm5,64-128(%rax)
+ vpaddd %xmm11,%xmm5,%xmm5
+
+ vpsrld $11,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm8,%xmm2
+ vpaddd 0(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm8,%xmm2
+ vpandn %xmm10,%xmm8,%xmm0
+ vpand %xmm9,%xmm8,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm12,%xmm11
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm12,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm12,%xmm13,%xmm3
+
+ vpxor %xmm1,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm12,%xmm1
+
+ vpslld $19,%xmm12,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm11,%xmm7
+
+ vpsrld $22,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm12,%xmm2
+ vpxor %xmm4,%xmm13,%xmm11
+ vpaddd %xmm5,%xmm15,%xmm15
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm11,%xmm11
+ vmovdqu 96-128(%rax),%xmm5
+ vpaddd 224-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 48-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm15,%xmm7
+ vpslld $26,%xmm15,%xmm2
+ vmovdqu %xmm6,80-128(%rax)
+ vpaddd %xmm10,%xmm6,%xmm6
+
+ vpsrld $11,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm15,%xmm2
+ vpaddd 32(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm15,%xmm2
+ vpandn %xmm9,%xmm15,%xmm0
+ vpand %xmm8,%xmm15,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm11,%xmm10
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm11,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm11,%xmm12,%xmm4
+
+ vpxor %xmm1,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm11,%xmm1
+
+ vpslld $19,%xmm11,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm10,%xmm7
+
+ vpsrld $22,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm11,%xmm2
+ vpxor %xmm3,%xmm12,%xmm10
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm10,%xmm10
+ vmovdqu 112-128(%rax),%xmm6
+ vpaddd 240-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 64-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm14,%xmm7
+ vpslld $26,%xmm14,%xmm2
+ vmovdqu %xmm5,96-128(%rax)
+ vpaddd %xmm9,%xmm5,%xmm5
+
+ vpsrld $11,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm14,%xmm2
+ vpaddd 64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm14,%xmm2
+ vpandn %xmm8,%xmm14,%xmm0
+ vpand %xmm15,%xmm14,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm10,%xmm9
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm10,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm10,%xmm11,%xmm3
+
+ vpxor %xmm1,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm10,%xmm1
+
+ vpslld $19,%xmm10,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm9,%xmm7
+
+ vpsrld $22,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm10,%xmm2
+ vpxor %xmm4,%xmm11,%xmm9
+ vpaddd %xmm5,%xmm13,%xmm13
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm9,%xmm9
+ vmovdqu 128-128(%rax),%xmm5
+ vpaddd 0-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 80-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm13,%xmm7
+ vpslld $26,%xmm13,%xmm2
+ vmovdqu %xmm6,112-128(%rax)
+ vpaddd %xmm8,%xmm6,%xmm6
+
+ vpsrld $11,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm13,%xmm2
+ vpaddd 96(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm13,%xmm2
+ vpandn %xmm15,%xmm13,%xmm0
+ vpand %xmm14,%xmm13,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm9,%xmm8
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm9,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm9,%xmm10,%xmm4
+
+ vpxor %xmm1,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm9,%xmm1
+
+ vpslld $19,%xmm9,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm8,%xmm7
+
+ vpsrld $22,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm9,%xmm2
+ vpxor %xmm3,%xmm10,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm8,%xmm8
+ addq $256,%rbp
+ vmovdqu 144-128(%rax),%xmm6
+ vpaddd 16-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 96-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm12,%xmm7
+ vpslld $26,%xmm12,%xmm2
+ vmovdqu %xmm5,128-128(%rax)
+ vpaddd %xmm15,%xmm5,%xmm5
+
+ vpsrld $11,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm12,%xmm2
+ vpaddd -128(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm12,%xmm2
+ vpandn %xmm14,%xmm12,%xmm0
+ vpand %xmm13,%xmm12,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm8,%xmm15
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm8,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm8,%xmm9,%xmm3
+
+ vpxor %xmm1,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm8,%xmm1
+
+ vpslld $19,%xmm8,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm15,%xmm7
+
+ vpsrld $22,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm8,%xmm2
+ vpxor %xmm4,%xmm9,%xmm15
+ vpaddd %xmm5,%xmm11,%xmm11
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm15,%xmm15
+ vpaddd %xmm7,%xmm15,%xmm15
+ vmovdqu 160-128(%rax),%xmm5
+ vpaddd 32-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 112-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm11,%xmm7
+ vpslld $26,%xmm11,%xmm2
+ vmovdqu %xmm6,144-128(%rax)
+ vpaddd %xmm14,%xmm6,%xmm6
+
+ vpsrld $11,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm11,%xmm2
+ vpaddd -96(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm11,%xmm2
+ vpandn %xmm13,%xmm11,%xmm0
+ vpand %xmm12,%xmm11,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm15,%xmm14
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm15,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm15,%xmm8,%xmm4
+
+ vpxor %xmm1,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm15,%xmm1
+
+ vpslld $19,%xmm15,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm14,%xmm7
+
+ vpsrld $22,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm15,%xmm2
+ vpxor %xmm3,%xmm8,%xmm14
+ vpaddd %xmm6,%xmm10,%xmm10
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm14,%xmm14
+ vpaddd %xmm7,%xmm14,%xmm14
+ vmovdqu 176-128(%rax),%xmm6
+ vpaddd 48-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 128-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm10,%xmm7
+ vpslld $26,%xmm10,%xmm2
+ vmovdqu %xmm5,160-128(%rax)
+ vpaddd %xmm13,%xmm5,%xmm5
+
+ vpsrld $11,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm10,%xmm2
+ vpaddd -64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm10,%xmm2
+ vpandn %xmm12,%xmm10,%xmm0
+ vpand %xmm11,%xmm10,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm14,%xmm13
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm14,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm14,%xmm15,%xmm3
+
+ vpxor %xmm1,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm14,%xmm1
+
+ vpslld $19,%xmm14,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm13,%xmm7
+
+ vpsrld $22,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm14,%xmm2
+ vpxor %xmm4,%xmm15,%xmm13
+ vpaddd %xmm5,%xmm9,%xmm9
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm13,%xmm13
+ vpaddd %xmm7,%xmm13,%xmm13
+ vmovdqu 192-128(%rax),%xmm5
+ vpaddd 64-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 144-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm9,%xmm7
+ vpslld $26,%xmm9,%xmm2
+ vmovdqu %xmm6,176-128(%rax)
+ vpaddd %xmm12,%xmm6,%xmm6
+
+ vpsrld $11,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm9,%xmm2
+ vpaddd -32(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm9,%xmm2
+ vpandn %xmm11,%xmm9,%xmm0
+ vpand %xmm10,%xmm9,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm13,%xmm12
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm13,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm13,%xmm14,%xmm4
+
+ vpxor %xmm1,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm13,%xmm1
+
+ vpslld $19,%xmm13,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm12,%xmm7
+
+ vpsrld $22,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm13,%xmm2
+ vpxor %xmm3,%xmm14,%xmm12
+ vpaddd %xmm6,%xmm8,%xmm8
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm12,%xmm12
+ vpaddd %xmm7,%xmm12,%xmm12
+ vmovdqu 208-128(%rax),%xmm6
+ vpaddd 80-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 160-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm8,%xmm7
+ vpslld $26,%xmm8,%xmm2
+ vmovdqu %xmm5,192-128(%rax)
+ vpaddd %xmm11,%xmm5,%xmm5
+
+ vpsrld $11,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm8,%xmm2
+ vpaddd 0(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm8,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm8,%xmm2
+ vpandn %xmm10,%xmm8,%xmm0
+ vpand %xmm9,%xmm8,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm12,%xmm11
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm12,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm12,%xmm13,%xmm3
+
+ vpxor %xmm1,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm12,%xmm1
+
+ vpslld $19,%xmm12,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm11,%xmm7
+
+ vpsrld $22,%xmm12,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm12,%xmm2
+ vpxor %xmm4,%xmm13,%xmm11
+ vpaddd %xmm5,%xmm15,%xmm15
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm11,%xmm11
+ vpaddd %xmm7,%xmm11,%xmm11
+ vmovdqu 224-128(%rax),%xmm5
+ vpaddd 96-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 176-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm15,%xmm7
+ vpslld $26,%xmm15,%xmm2
+ vmovdqu %xmm6,208-128(%rax)
+ vpaddd %xmm10,%xmm6,%xmm6
+
+ vpsrld $11,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm15,%xmm2
+ vpaddd 32(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm15,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm15,%xmm2
+ vpandn %xmm9,%xmm15,%xmm0
+ vpand %xmm8,%xmm15,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm11,%xmm10
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm11,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm11,%xmm12,%xmm4
+
+ vpxor %xmm1,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm11,%xmm1
+
+ vpslld $19,%xmm11,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm10,%xmm7
+
+ vpsrld $22,%xmm11,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm11,%xmm2
+ vpxor %xmm3,%xmm12,%xmm10
+ vpaddd %xmm6,%xmm14,%xmm14
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm10,%xmm10
+ vpaddd %xmm7,%xmm10,%xmm10
+ vmovdqu 240-128(%rax),%xmm6
+ vpaddd 112-128(%rax),%xmm5,%xmm5
+
+ vpsrld $3,%xmm6,%xmm7
+ vpsrld $7,%xmm6,%xmm1
+ vpslld $25,%xmm6,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm6,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm6,%xmm2
+ vmovdqu 192-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpxor %xmm1,%xmm3,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm5,%xmm5
+ vpsrld $6,%xmm14,%xmm7
+ vpslld $26,%xmm14,%xmm2
+ vmovdqu %xmm5,224-128(%rax)
+ vpaddd %xmm9,%xmm5,%xmm5
+
+ vpsrld $11,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm14,%xmm2
+ vpaddd 64(%rbp),%xmm5,%xmm5
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm14,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm14,%xmm2
+ vpandn %xmm8,%xmm14,%xmm0
+ vpand %xmm15,%xmm14,%xmm3
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm10,%xmm9
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm10,%xmm1
+ vpxor %xmm3,%xmm0,%xmm0
+ vpxor %xmm10,%xmm11,%xmm3
+
+ vpxor %xmm1,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm5,%xmm5
+
+ vpsrld $13,%xmm10,%xmm1
+
+ vpslld $19,%xmm10,%xmm2
+ vpaddd %xmm0,%xmm5,%xmm5
+ vpand %xmm3,%xmm4,%xmm4
+
+ vpxor %xmm1,%xmm9,%xmm7
+
+ vpsrld $22,%xmm10,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm10,%xmm2
+ vpxor %xmm4,%xmm11,%xmm9
+ vpaddd %xmm5,%xmm13,%xmm13
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm5,%xmm9,%xmm9
+ vpaddd %xmm7,%xmm9,%xmm9
+ vmovdqu 0-128(%rax),%xmm5
+ vpaddd 128-128(%rax),%xmm6,%xmm6
+
+ vpsrld $3,%xmm5,%xmm7
+ vpsrld $7,%xmm5,%xmm1
+ vpslld $25,%xmm5,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $18,%xmm5,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $14,%xmm5,%xmm2
+ vmovdqu 208-128(%rax),%xmm0
+ vpsrld $10,%xmm0,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpsrld $17,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $15,%xmm0,%xmm2
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpxor %xmm1,%xmm4,%xmm7
+ vpsrld $19,%xmm0,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $13,%xmm0,%xmm2
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+ vpaddd %xmm7,%xmm6,%xmm6
+ vpsrld $6,%xmm13,%xmm7
+ vpslld $26,%xmm13,%xmm2
+ vmovdqu %xmm6,240-128(%rax)
+ vpaddd %xmm8,%xmm6,%xmm6
+
+ vpsrld $11,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+ vpslld $21,%xmm13,%xmm2
+ vpaddd 96(%rbp),%xmm6,%xmm6
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $25,%xmm13,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $7,%xmm13,%xmm2
+ vpandn %xmm15,%xmm13,%xmm0
+ vpand %xmm14,%xmm13,%xmm4
+
+ vpxor %xmm1,%xmm7,%xmm7
+
+ vpsrld $2,%xmm9,%xmm8
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $30,%xmm9,%xmm1
+ vpxor %xmm4,%xmm0,%xmm0
+ vpxor %xmm9,%xmm10,%xmm4
+
+ vpxor %xmm1,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm6,%xmm6
+
+ vpsrld $13,%xmm9,%xmm1
+
+ vpslld $19,%xmm9,%xmm2
+ vpaddd %xmm0,%xmm6,%xmm6
+ vpand %xmm4,%xmm3,%xmm3
+
+ vpxor %xmm1,%xmm8,%xmm7
+
+ vpsrld $22,%xmm9,%xmm1
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpslld $10,%xmm9,%xmm2
+ vpxor %xmm3,%xmm10,%xmm8
+ vpaddd %xmm6,%xmm12,%xmm12
+
+ vpxor %xmm1,%xmm7,%xmm7
+ vpxor %xmm2,%xmm7,%xmm7
+
+ vpaddd %xmm6,%xmm8,%xmm8
+ vpaddd %xmm7,%xmm8,%xmm8
+ addq $256,%rbp
+ decl %ecx
+ jnz .Loop_16_xx_avx
+
+ movl $1,%ecx
+ leaq K256+128(%rip),%rbp
+ cmpl 0(%rbx),%ecx
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ cmovgeq %rbp,%r11
+ vmovdqa (%rbx),%xmm7
+ vpxor %xmm0,%xmm0,%xmm0
+ vmovdqa %xmm7,%xmm6
+ vpcmpgtd %xmm0,%xmm6,%xmm6
+ vpaddd %xmm6,%xmm7,%xmm7
+
+ vmovdqu 0-128(%rdi),%xmm0
+ vpand %xmm6,%xmm8,%xmm8
+ vmovdqu 32-128(%rdi),%xmm1
+ vpand %xmm6,%xmm9,%xmm9
+ vmovdqu 64-128(%rdi),%xmm2
+ vpand %xmm6,%xmm10,%xmm10
+ vmovdqu 96-128(%rdi),%xmm5
+ vpand %xmm6,%xmm11,%xmm11
+ vpaddd %xmm0,%xmm8,%xmm8
+ vmovdqu 128-128(%rdi),%xmm0
+ vpand %xmm6,%xmm12,%xmm12
+ vpaddd %xmm1,%xmm9,%xmm9
+ vmovdqu 160-128(%rdi),%xmm1
+ vpand %xmm6,%xmm13,%xmm13
+ vpaddd %xmm2,%xmm10,%xmm10
+ vmovdqu 192-128(%rdi),%xmm2
+ vpand %xmm6,%xmm14,%xmm14
+ vpaddd %xmm5,%xmm11,%xmm11
+ vmovdqu 224-128(%rdi),%xmm5
+ vpand %xmm6,%xmm15,%xmm15
+ vpaddd %xmm0,%xmm12,%xmm12
+ vpaddd %xmm1,%xmm13,%xmm13
+ vmovdqu %xmm8,0-128(%rdi)
+ vpaddd %xmm2,%xmm14,%xmm14
+ vmovdqu %xmm9,32-128(%rdi)
+ vpaddd %xmm5,%xmm15,%xmm15
+ vmovdqu %xmm10,64-128(%rdi)
+ vmovdqu %xmm11,96-128(%rdi)
+ vmovdqu %xmm12,128-128(%rdi)
+ vmovdqu %xmm13,160-128(%rdi)
+ vmovdqu %xmm14,192-128(%rdi)
+ vmovdqu %xmm15,224-128(%rdi)
+
+ vmovdqu %xmm7,(%rbx)
+ vmovdqu .Lpbswap(%rip),%xmm6
+ decl %edx
+ jnz .Loop_avx
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande_avx
+
+.Ldone_avx:
+ movq 272(%rsp),%rax
+ vzeroupper
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue_avx:
+ .byte 0xf3,0xc3
+.size sha256_multi_block_avx,.-sha256_multi_block_avx
+.type sha256_multi_block_avx2, at function
+.align 32
+sha256_multi_block_avx2:
+_avx2_shortcut:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $576,%rsp
+ andq $-256,%rsp
+ movq %rax,544(%rsp)
+.Lbody_avx2:
+ leaq K256+128(%rip),%rbp
+ leaq 128(%rdi),%rdi
+
+.Loop_grande_avx2:
+ movl %edx,552(%rsp)
+ xorl %edx,%edx
+ leaq 512(%rsp),%rbx
+ movq 0(%rsi),%r12
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r12
+ movq 16(%rsi),%r13
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r13
+ movq 32(%rsi),%r14
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r14
+ movq 48(%rsi),%r15
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r15
+ movq 64(%rsi),%r8
+ movl 72(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,16(%rbx)
+ cmovleq %rbp,%r8
+ movq 80(%rsi),%r9
+ movl 88(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,20(%rbx)
+ cmovleq %rbp,%r9
+ movq 96(%rsi),%r10
+ movl 104(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,24(%rbx)
+ cmovleq %rbp,%r10
+ movq 112(%rsi),%r11
+ movl 120(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,28(%rbx)
+ cmovleq %rbp,%r11
+ vmovdqu 0-128(%rdi),%ymm8
+ leaq 128(%rsp),%rax
+ vmovdqu 32-128(%rdi),%ymm9
+ leaq 256+128(%rsp),%rbx
+ vmovdqu 64-128(%rdi),%ymm10
+ vmovdqu 96-128(%rdi),%ymm11
+ vmovdqu 128-128(%rdi),%ymm12
+ vmovdqu 160-128(%rdi),%ymm13
+ vmovdqu 192-128(%rdi),%ymm14
+ vmovdqu 224-128(%rdi),%ymm15
+ vmovdqu .Lpbswap(%rip),%ymm6
+ jmp .Loop_avx2
+
+.align 32
+.Loop_avx2:
+ vpxor %ymm9,%ymm10,%ymm4
+ vmovd 0(%r12),%xmm5
+ vmovd 0(%r8),%xmm0
+ vmovd 0(%r13),%xmm1
+ vmovd 0(%r9),%xmm2
+ vpinsrd $1,0(%r14),%xmm5,%xmm5
+ vpinsrd $1,0(%r10),%xmm0,%xmm0
+ vpinsrd $1,0(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,0(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm12,%ymm7
+ vpslld $26,%ymm12,%ymm2
+ vmovdqu %ymm5,0-128(%rax)
+ vpaddd %ymm15,%ymm5,%ymm5
+
+ vpsrld $11,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm12,%ymm2
+ vpaddd -128(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm12,%ymm2
+ vpandn %ymm14,%ymm12,%ymm0
+ vpand %ymm13,%ymm12,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm8,%ymm15
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm8,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm8,%ymm9,%ymm3
+
+ vpxor %ymm1,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm8,%ymm1
+
+ vpslld $19,%ymm8,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm15,%ymm7
+
+ vpsrld $22,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm8,%ymm2
+ vpxor %ymm4,%ymm9,%ymm15
+ vpaddd %ymm5,%ymm11,%ymm11
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm15,%ymm15
+ vmovd 4(%r12),%xmm5
+ vmovd 4(%r8),%xmm0
+ vmovd 4(%r13),%xmm1
+ vmovd 4(%r9),%xmm2
+ vpinsrd $1,4(%r14),%xmm5,%xmm5
+ vpinsrd $1,4(%r10),%xmm0,%xmm0
+ vpinsrd $1,4(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,4(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm11,%ymm7
+ vpslld $26,%ymm11,%ymm2
+ vmovdqu %ymm5,32-128(%rax)
+ vpaddd %ymm14,%ymm5,%ymm5
+
+ vpsrld $11,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm11,%ymm2
+ vpaddd -96(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm11,%ymm2
+ vpandn %ymm13,%ymm11,%ymm0
+ vpand %ymm12,%ymm11,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm15,%ymm14
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm15,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm15,%ymm8,%ymm4
+
+ vpxor %ymm1,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm15,%ymm1
+
+ vpslld $19,%ymm15,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm14,%ymm7
+
+ vpsrld $22,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm15,%ymm2
+ vpxor %ymm3,%ymm8,%ymm14
+ vpaddd %ymm5,%ymm10,%ymm10
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm14,%ymm14
+ vmovd 8(%r12),%xmm5
+ vmovd 8(%r8),%xmm0
+ vmovd 8(%r13),%xmm1
+ vmovd 8(%r9),%xmm2
+ vpinsrd $1,8(%r14),%xmm5,%xmm5
+ vpinsrd $1,8(%r10),%xmm0,%xmm0
+ vpinsrd $1,8(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,8(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm10,%ymm7
+ vpslld $26,%ymm10,%ymm2
+ vmovdqu %ymm5,64-128(%rax)
+ vpaddd %ymm13,%ymm5,%ymm5
+
+ vpsrld $11,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm10,%ymm2
+ vpaddd -64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm10,%ymm2
+ vpandn %ymm12,%ymm10,%ymm0
+ vpand %ymm11,%ymm10,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm14,%ymm13
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm14,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm14,%ymm15,%ymm3
+
+ vpxor %ymm1,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm14,%ymm1
+
+ vpslld $19,%ymm14,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm13,%ymm7
+
+ vpsrld $22,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm14,%ymm2
+ vpxor %ymm4,%ymm15,%ymm13
+ vpaddd %ymm5,%ymm9,%ymm9
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm13,%ymm13
+ vmovd 12(%r12),%xmm5
+ vmovd 12(%r8),%xmm0
+ vmovd 12(%r13),%xmm1
+ vmovd 12(%r9),%xmm2
+ vpinsrd $1,12(%r14),%xmm5,%xmm5
+ vpinsrd $1,12(%r10),%xmm0,%xmm0
+ vpinsrd $1,12(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,12(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm9,%ymm7
+ vpslld $26,%ymm9,%ymm2
+ vmovdqu %ymm5,96-128(%rax)
+ vpaddd %ymm12,%ymm5,%ymm5
+
+ vpsrld $11,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm9,%ymm2
+ vpaddd -32(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm9,%ymm2
+ vpandn %ymm11,%ymm9,%ymm0
+ vpand %ymm10,%ymm9,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm13,%ymm12
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm13,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm13,%ymm14,%ymm4
+
+ vpxor %ymm1,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm13,%ymm1
+
+ vpslld $19,%ymm13,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm12,%ymm7
+
+ vpsrld $22,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm13,%ymm2
+ vpxor %ymm3,%ymm14,%ymm12
+ vpaddd %ymm5,%ymm8,%ymm8
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm12,%ymm12
+ vmovd 16(%r12),%xmm5
+ vmovd 16(%r8),%xmm0
+ vmovd 16(%r13),%xmm1
+ vmovd 16(%r9),%xmm2
+ vpinsrd $1,16(%r14),%xmm5,%xmm5
+ vpinsrd $1,16(%r10),%xmm0,%xmm0
+ vpinsrd $1,16(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,16(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm8,%ymm7
+ vpslld $26,%ymm8,%ymm2
+ vmovdqu %ymm5,128-128(%rax)
+ vpaddd %ymm11,%ymm5,%ymm5
+
+ vpsrld $11,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm8,%ymm2
+ vpaddd 0(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm8,%ymm2
+ vpandn %ymm10,%ymm8,%ymm0
+ vpand %ymm9,%ymm8,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm12,%ymm11
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm12,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm12,%ymm13,%ymm3
+
+ vpxor %ymm1,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm12,%ymm1
+
+ vpslld $19,%ymm12,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm11,%ymm7
+
+ vpsrld $22,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm12,%ymm2
+ vpxor %ymm4,%ymm13,%ymm11
+ vpaddd %ymm5,%ymm15,%ymm15
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm11,%ymm11
+ vmovd 20(%r12),%xmm5
+ vmovd 20(%r8),%xmm0
+ vmovd 20(%r13),%xmm1
+ vmovd 20(%r9),%xmm2
+ vpinsrd $1,20(%r14),%xmm5,%xmm5
+ vpinsrd $1,20(%r10),%xmm0,%xmm0
+ vpinsrd $1,20(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,20(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm15,%ymm7
+ vpslld $26,%ymm15,%ymm2
+ vmovdqu %ymm5,160-128(%rax)
+ vpaddd %ymm10,%ymm5,%ymm5
+
+ vpsrld $11,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm15,%ymm2
+ vpaddd 32(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm15,%ymm2
+ vpandn %ymm9,%ymm15,%ymm0
+ vpand %ymm8,%ymm15,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm11,%ymm10
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm11,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm11,%ymm12,%ymm4
+
+ vpxor %ymm1,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm11,%ymm1
+
+ vpslld $19,%ymm11,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm10,%ymm7
+
+ vpsrld $22,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm11,%ymm2
+ vpxor %ymm3,%ymm12,%ymm10
+ vpaddd %ymm5,%ymm14,%ymm14
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm10,%ymm10
+ vmovd 24(%r12),%xmm5
+ vmovd 24(%r8),%xmm0
+ vmovd 24(%r13),%xmm1
+ vmovd 24(%r9),%xmm2
+ vpinsrd $1,24(%r14),%xmm5,%xmm5
+ vpinsrd $1,24(%r10),%xmm0,%xmm0
+ vpinsrd $1,24(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,24(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm14,%ymm7
+ vpslld $26,%ymm14,%ymm2
+ vmovdqu %ymm5,192-128(%rax)
+ vpaddd %ymm9,%ymm5,%ymm5
+
+ vpsrld $11,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm14,%ymm2
+ vpaddd 64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm14,%ymm2
+ vpandn %ymm8,%ymm14,%ymm0
+ vpand %ymm15,%ymm14,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm10,%ymm9
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm10,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm10,%ymm11,%ymm3
+
+ vpxor %ymm1,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm10,%ymm1
+
+ vpslld $19,%ymm10,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm9,%ymm7
+
+ vpsrld $22,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm10,%ymm2
+ vpxor %ymm4,%ymm11,%ymm9
+ vpaddd %ymm5,%ymm13,%ymm13
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm9,%ymm9
+ vmovd 28(%r12),%xmm5
+ vmovd 28(%r8),%xmm0
+ vmovd 28(%r13),%xmm1
+ vmovd 28(%r9),%xmm2
+ vpinsrd $1,28(%r14),%xmm5,%xmm5
+ vpinsrd $1,28(%r10),%xmm0,%xmm0
+ vpinsrd $1,28(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,28(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm13,%ymm7
+ vpslld $26,%ymm13,%ymm2
+ vmovdqu %ymm5,224-128(%rax)
+ vpaddd %ymm8,%ymm5,%ymm5
+
+ vpsrld $11,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm13,%ymm2
+ vpaddd 96(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm13,%ymm2
+ vpandn %ymm15,%ymm13,%ymm0
+ vpand %ymm14,%ymm13,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm9,%ymm8
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm9,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm9,%ymm10,%ymm4
+
+ vpxor %ymm1,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm9,%ymm1
+
+ vpslld $19,%ymm9,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm8,%ymm7
+
+ vpsrld $22,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm9,%ymm2
+ vpxor %ymm3,%ymm10,%ymm8
+ vpaddd %ymm5,%ymm12,%ymm12
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm8,%ymm8
+ addq $256,%rbp
+ vmovd 32(%r12),%xmm5
+ vmovd 32(%r8),%xmm0
+ vmovd 32(%r13),%xmm1
+ vmovd 32(%r9),%xmm2
+ vpinsrd $1,32(%r14),%xmm5,%xmm5
+ vpinsrd $1,32(%r10),%xmm0,%xmm0
+ vpinsrd $1,32(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,32(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm12,%ymm7
+ vpslld $26,%ymm12,%ymm2
+ vmovdqu %ymm5,256-256-128(%rbx)
+ vpaddd %ymm15,%ymm5,%ymm5
+
+ vpsrld $11,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm12,%ymm2
+ vpaddd -128(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm12,%ymm2
+ vpandn %ymm14,%ymm12,%ymm0
+ vpand %ymm13,%ymm12,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm8,%ymm15
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm8,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm8,%ymm9,%ymm3
+
+ vpxor %ymm1,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm8,%ymm1
+
+ vpslld $19,%ymm8,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm15,%ymm7
+
+ vpsrld $22,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm8,%ymm2
+ vpxor %ymm4,%ymm9,%ymm15
+ vpaddd %ymm5,%ymm11,%ymm11
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm15,%ymm15
+ vmovd 36(%r12),%xmm5
+ vmovd 36(%r8),%xmm0
+ vmovd 36(%r13),%xmm1
+ vmovd 36(%r9),%xmm2
+ vpinsrd $1,36(%r14),%xmm5,%xmm5
+ vpinsrd $1,36(%r10),%xmm0,%xmm0
+ vpinsrd $1,36(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,36(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm11,%ymm7
+ vpslld $26,%ymm11,%ymm2
+ vmovdqu %ymm5,288-256-128(%rbx)
+ vpaddd %ymm14,%ymm5,%ymm5
+
+ vpsrld $11,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm11,%ymm2
+ vpaddd -96(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm11,%ymm2
+ vpandn %ymm13,%ymm11,%ymm0
+ vpand %ymm12,%ymm11,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm15,%ymm14
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm15,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm15,%ymm8,%ymm4
+
+ vpxor %ymm1,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm15,%ymm1
+
+ vpslld $19,%ymm15,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm14,%ymm7
+
+ vpsrld $22,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm15,%ymm2
+ vpxor %ymm3,%ymm8,%ymm14
+ vpaddd %ymm5,%ymm10,%ymm10
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm14,%ymm14
+ vmovd 40(%r12),%xmm5
+ vmovd 40(%r8),%xmm0
+ vmovd 40(%r13),%xmm1
+ vmovd 40(%r9),%xmm2
+ vpinsrd $1,40(%r14),%xmm5,%xmm5
+ vpinsrd $1,40(%r10),%xmm0,%xmm0
+ vpinsrd $1,40(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,40(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm10,%ymm7
+ vpslld $26,%ymm10,%ymm2
+ vmovdqu %ymm5,320-256-128(%rbx)
+ vpaddd %ymm13,%ymm5,%ymm5
+
+ vpsrld $11,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm10,%ymm2
+ vpaddd -64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm10,%ymm2
+ vpandn %ymm12,%ymm10,%ymm0
+ vpand %ymm11,%ymm10,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm14,%ymm13
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm14,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm14,%ymm15,%ymm3
+
+ vpxor %ymm1,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm14,%ymm1
+
+ vpslld $19,%ymm14,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm13,%ymm7
+
+ vpsrld $22,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm14,%ymm2
+ vpxor %ymm4,%ymm15,%ymm13
+ vpaddd %ymm5,%ymm9,%ymm9
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm13,%ymm13
+ vmovd 44(%r12),%xmm5
+ vmovd 44(%r8),%xmm0
+ vmovd 44(%r13),%xmm1
+ vmovd 44(%r9),%xmm2
+ vpinsrd $1,44(%r14),%xmm5,%xmm5
+ vpinsrd $1,44(%r10),%xmm0,%xmm0
+ vpinsrd $1,44(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,44(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm9,%ymm7
+ vpslld $26,%ymm9,%ymm2
+ vmovdqu %ymm5,352-256-128(%rbx)
+ vpaddd %ymm12,%ymm5,%ymm5
+
+ vpsrld $11,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm9,%ymm2
+ vpaddd -32(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm9,%ymm2
+ vpandn %ymm11,%ymm9,%ymm0
+ vpand %ymm10,%ymm9,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm13,%ymm12
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm13,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm13,%ymm14,%ymm4
+
+ vpxor %ymm1,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm13,%ymm1
+
+ vpslld $19,%ymm13,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm12,%ymm7
+
+ vpsrld $22,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm13,%ymm2
+ vpxor %ymm3,%ymm14,%ymm12
+ vpaddd %ymm5,%ymm8,%ymm8
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm12,%ymm12
+ vmovd 48(%r12),%xmm5
+ vmovd 48(%r8),%xmm0
+ vmovd 48(%r13),%xmm1
+ vmovd 48(%r9),%xmm2
+ vpinsrd $1,48(%r14),%xmm5,%xmm5
+ vpinsrd $1,48(%r10),%xmm0,%xmm0
+ vpinsrd $1,48(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,48(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm8,%ymm7
+ vpslld $26,%ymm8,%ymm2
+ vmovdqu %ymm5,384-256-128(%rbx)
+ vpaddd %ymm11,%ymm5,%ymm5
+
+ vpsrld $11,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm8,%ymm2
+ vpaddd 0(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm8,%ymm2
+ vpandn %ymm10,%ymm8,%ymm0
+ vpand %ymm9,%ymm8,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm12,%ymm11
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm12,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm12,%ymm13,%ymm3
+
+ vpxor %ymm1,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm12,%ymm1
+
+ vpslld $19,%ymm12,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm11,%ymm7
+
+ vpsrld $22,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm12,%ymm2
+ vpxor %ymm4,%ymm13,%ymm11
+ vpaddd %ymm5,%ymm15,%ymm15
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm11,%ymm11
+ vmovd 52(%r12),%xmm5
+ vmovd 52(%r8),%xmm0
+ vmovd 52(%r13),%xmm1
+ vmovd 52(%r9),%xmm2
+ vpinsrd $1,52(%r14),%xmm5,%xmm5
+ vpinsrd $1,52(%r10),%xmm0,%xmm0
+ vpinsrd $1,52(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,52(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm15,%ymm7
+ vpslld $26,%ymm15,%ymm2
+ vmovdqu %ymm5,416-256-128(%rbx)
+ vpaddd %ymm10,%ymm5,%ymm5
+
+ vpsrld $11,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm15,%ymm2
+ vpaddd 32(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm15,%ymm2
+ vpandn %ymm9,%ymm15,%ymm0
+ vpand %ymm8,%ymm15,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm11,%ymm10
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm11,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm11,%ymm12,%ymm4
+
+ vpxor %ymm1,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm11,%ymm1
+
+ vpslld $19,%ymm11,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm10,%ymm7
+
+ vpsrld $22,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm11,%ymm2
+ vpxor %ymm3,%ymm12,%ymm10
+ vpaddd %ymm5,%ymm14,%ymm14
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm10,%ymm10
+ vmovd 56(%r12),%xmm5
+ vmovd 56(%r8),%xmm0
+ vmovd 56(%r13),%xmm1
+ vmovd 56(%r9),%xmm2
+ vpinsrd $1,56(%r14),%xmm5,%xmm5
+ vpinsrd $1,56(%r10),%xmm0,%xmm0
+ vpinsrd $1,56(%r15),%xmm1,%xmm1
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,56(%r11),%xmm2,%xmm2
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm14,%ymm7
+ vpslld $26,%ymm14,%ymm2
+ vmovdqu %ymm5,448-256-128(%rbx)
+ vpaddd %ymm9,%ymm5,%ymm5
+
+ vpsrld $11,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm14,%ymm2
+ vpaddd 64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm14,%ymm2
+ vpandn %ymm8,%ymm14,%ymm0
+ vpand %ymm15,%ymm14,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm10,%ymm9
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm10,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm10,%ymm11,%ymm3
+
+ vpxor %ymm1,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm10,%ymm1
+
+ vpslld $19,%ymm10,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm9,%ymm7
+
+ vpsrld $22,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm10,%ymm2
+ vpxor %ymm4,%ymm11,%ymm9
+ vpaddd %ymm5,%ymm13,%ymm13
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm9,%ymm9
+ vmovd 60(%r12),%xmm5
+ leaq 64(%r12),%r12
+ vmovd 60(%r8),%xmm0
+ leaq 64(%r8),%r8
+ vmovd 60(%r13),%xmm1
+ leaq 64(%r13),%r13
+ vmovd 60(%r9),%xmm2
+ leaq 64(%r9),%r9
+ vpinsrd $1,60(%r14),%xmm5,%xmm5
+ leaq 64(%r14),%r14
+ vpinsrd $1,60(%r10),%xmm0,%xmm0
+ leaq 64(%r10),%r10
+ vpinsrd $1,60(%r15),%xmm1,%xmm1
+ leaq 64(%r15),%r15
+ vpunpckldq %ymm1,%ymm5,%ymm5
+ vpinsrd $1,60(%r11),%xmm2,%xmm2
+ leaq 64(%r11),%r11
+ vpunpckldq %ymm2,%ymm0,%ymm0
+ vinserti128 $1,%xmm0,%ymm5,%ymm5
+ vpshufb %ymm6,%ymm5,%ymm5
+ vpsrld $6,%ymm13,%ymm7
+ vpslld $26,%ymm13,%ymm2
+ vmovdqu %ymm5,480-256-128(%rbx)
+ vpaddd %ymm8,%ymm5,%ymm5
+
+ vpsrld $11,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm13,%ymm2
+ vpaddd 96(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ prefetcht0 63(%r12)
+ vpslld $7,%ymm13,%ymm2
+ vpandn %ymm15,%ymm13,%ymm0
+ vpand %ymm14,%ymm13,%ymm4
+ prefetcht0 63(%r13)
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm9,%ymm8
+ vpxor %ymm2,%ymm7,%ymm7
+ prefetcht0 63(%r14)
+ vpslld $30,%ymm9,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm9,%ymm10,%ymm4
+ prefetcht0 63(%r15)
+ vpxor %ymm1,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm9,%ymm1
+ prefetcht0 63(%r8)
+ vpslld $19,%ymm9,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm4,%ymm3,%ymm3
+ prefetcht0 63(%r9)
+ vpxor %ymm1,%ymm8,%ymm7
+
+ vpsrld $22,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ prefetcht0 63(%r10)
+ vpslld $10,%ymm9,%ymm2
+ vpxor %ymm3,%ymm10,%ymm8
+ vpaddd %ymm5,%ymm12,%ymm12
+ prefetcht0 63(%r11)
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm8,%ymm8
+ addq $256,%rbp
+ vmovdqu 0-128(%rax),%ymm5
+ movl $3,%ecx
+ jmp .Loop_16_xx_avx2
+.align 32
+.Loop_16_xx_avx2:
+ vmovdqu 32-128(%rax),%ymm6
+ vpaddd 288-256-128(%rbx),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 448-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm12,%ymm7
+ vpslld $26,%ymm12,%ymm2
+ vmovdqu %ymm5,0-128(%rax)
+ vpaddd %ymm15,%ymm5,%ymm5
+
+ vpsrld $11,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm12,%ymm2
+ vpaddd -128(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm12,%ymm2
+ vpandn %ymm14,%ymm12,%ymm0
+ vpand %ymm13,%ymm12,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm8,%ymm15
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm8,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm8,%ymm9,%ymm3
+
+ vpxor %ymm1,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm8,%ymm1
+
+ vpslld $19,%ymm8,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm15,%ymm7
+
+ vpsrld $22,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm8,%ymm2
+ vpxor %ymm4,%ymm9,%ymm15
+ vpaddd %ymm5,%ymm11,%ymm11
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm15,%ymm15
+ vmovdqu 64-128(%rax),%ymm5
+ vpaddd 320-256-128(%rbx),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 480-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm11,%ymm7
+ vpslld $26,%ymm11,%ymm2
+ vmovdqu %ymm6,32-128(%rax)
+ vpaddd %ymm14,%ymm6,%ymm6
+
+ vpsrld $11,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm11,%ymm2
+ vpaddd -96(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm11,%ymm2
+ vpandn %ymm13,%ymm11,%ymm0
+ vpand %ymm12,%ymm11,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm15,%ymm14
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm15,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm15,%ymm8,%ymm4
+
+ vpxor %ymm1,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm15,%ymm1
+
+ vpslld $19,%ymm15,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm14,%ymm7
+
+ vpsrld $22,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm15,%ymm2
+ vpxor %ymm3,%ymm8,%ymm14
+ vpaddd %ymm6,%ymm10,%ymm10
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm14,%ymm14
+ vmovdqu 96-128(%rax),%ymm6
+ vpaddd 352-256-128(%rbx),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 0-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm10,%ymm7
+ vpslld $26,%ymm10,%ymm2
+ vmovdqu %ymm5,64-128(%rax)
+ vpaddd %ymm13,%ymm5,%ymm5
+
+ vpsrld $11,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm10,%ymm2
+ vpaddd -64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm10,%ymm2
+ vpandn %ymm12,%ymm10,%ymm0
+ vpand %ymm11,%ymm10,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm14,%ymm13
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm14,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm14,%ymm15,%ymm3
+
+ vpxor %ymm1,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm14,%ymm1
+
+ vpslld $19,%ymm14,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm13,%ymm7
+
+ vpsrld $22,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm14,%ymm2
+ vpxor %ymm4,%ymm15,%ymm13
+ vpaddd %ymm5,%ymm9,%ymm9
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm13,%ymm13
+ vmovdqu 128-128(%rax),%ymm5
+ vpaddd 384-256-128(%rbx),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 32-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm9,%ymm7
+ vpslld $26,%ymm9,%ymm2
+ vmovdqu %ymm6,96-128(%rax)
+ vpaddd %ymm12,%ymm6,%ymm6
+
+ vpsrld $11,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm9,%ymm2
+ vpaddd -32(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm9,%ymm2
+ vpandn %ymm11,%ymm9,%ymm0
+ vpand %ymm10,%ymm9,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm13,%ymm12
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm13,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm13,%ymm14,%ymm4
+
+ vpxor %ymm1,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm13,%ymm1
+
+ vpslld $19,%ymm13,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm12,%ymm7
+
+ vpsrld $22,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm13,%ymm2
+ vpxor %ymm3,%ymm14,%ymm12
+ vpaddd %ymm6,%ymm8,%ymm8
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm12,%ymm12
+ vmovdqu 160-128(%rax),%ymm6
+ vpaddd 416-256-128(%rbx),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 64-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm8,%ymm7
+ vpslld $26,%ymm8,%ymm2
+ vmovdqu %ymm5,128-128(%rax)
+ vpaddd %ymm11,%ymm5,%ymm5
+
+ vpsrld $11,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm8,%ymm2
+ vpaddd 0(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm8,%ymm2
+ vpandn %ymm10,%ymm8,%ymm0
+ vpand %ymm9,%ymm8,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm12,%ymm11
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm12,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm12,%ymm13,%ymm3
+
+ vpxor %ymm1,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm12,%ymm1
+
+ vpslld $19,%ymm12,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm11,%ymm7
+
+ vpsrld $22,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm12,%ymm2
+ vpxor %ymm4,%ymm13,%ymm11
+ vpaddd %ymm5,%ymm15,%ymm15
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm11,%ymm11
+ vmovdqu 192-128(%rax),%ymm5
+ vpaddd 448-256-128(%rbx),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 96-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm15,%ymm7
+ vpslld $26,%ymm15,%ymm2
+ vmovdqu %ymm6,160-128(%rax)
+ vpaddd %ymm10,%ymm6,%ymm6
+
+ vpsrld $11,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm15,%ymm2
+ vpaddd 32(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm15,%ymm2
+ vpandn %ymm9,%ymm15,%ymm0
+ vpand %ymm8,%ymm15,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm11,%ymm10
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm11,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm11,%ymm12,%ymm4
+
+ vpxor %ymm1,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm11,%ymm1
+
+ vpslld $19,%ymm11,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm10,%ymm7
+
+ vpsrld $22,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm11,%ymm2
+ vpxor %ymm3,%ymm12,%ymm10
+ vpaddd %ymm6,%ymm14,%ymm14
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm10,%ymm10
+ vmovdqu 224-128(%rax),%ymm6
+ vpaddd 480-256-128(%rbx),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 128-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm14,%ymm7
+ vpslld $26,%ymm14,%ymm2
+ vmovdqu %ymm5,192-128(%rax)
+ vpaddd %ymm9,%ymm5,%ymm5
+
+ vpsrld $11,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm14,%ymm2
+ vpaddd 64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm14,%ymm2
+ vpandn %ymm8,%ymm14,%ymm0
+ vpand %ymm15,%ymm14,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm10,%ymm9
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm10,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm10,%ymm11,%ymm3
+
+ vpxor %ymm1,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm10,%ymm1
+
+ vpslld $19,%ymm10,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm9,%ymm7
+
+ vpsrld $22,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm10,%ymm2
+ vpxor %ymm4,%ymm11,%ymm9
+ vpaddd %ymm5,%ymm13,%ymm13
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm9,%ymm9
+ vmovdqu 256-256-128(%rbx),%ymm5
+ vpaddd 0-128(%rax),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 160-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm13,%ymm7
+ vpslld $26,%ymm13,%ymm2
+ vmovdqu %ymm6,224-128(%rax)
+ vpaddd %ymm8,%ymm6,%ymm6
+
+ vpsrld $11,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm13,%ymm2
+ vpaddd 96(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm13,%ymm2
+ vpandn %ymm15,%ymm13,%ymm0
+ vpand %ymm14,%ymm13,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm9,%ymm8
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm9,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm9,%ymm10,%ymm4
+
+ vpxor %ymm1,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm9,%ymm1
+
+ vpslld $19,%ymm9,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm8,%ymm7
+
+ vpsrld $22,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm9,%ymm2
+ vpxor %ymm3,%ymm10,%ymm8
+ vpaddd %ymm6,%ymm12,%ymm12
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm8,%ymm8
+ addq $256,%rbp
+ vmovdqu 288-256-128(%rbx),%ymm6
+ vpaddd 32-128(%rax),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 192-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm12,%ymm7
+ vpslld $26,%ymm12,%ymm2
+ vmovdqu %ymm5,256-256-128(%rbx)
+ vpaddd %ymm15,%ymm5,%ymm5
+
+ vpsrld $11,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm12,%ymm2
+ vpaddd -128(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm12,%ymm2
+ vpandn %ymm14,%ymm12,%ymm0
+ vpand %ymm13,%ymm12,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm8,%ymm15
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm8,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm8,%ymm9,%ymm3
+
+ vpxor %ymm1,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm8,%ymm1
+
+ vpslld $19,%ymm8,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm15,%ymm7
+
+ vpsrld $22,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm8,%ymm2
+ vpxor %ymm4,%ymm9,%ymm15
+ vpaddd %ymm5,%ymm11,%ymm11
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm15,%ymm15
+ vpaddd %ymm7,%ymm15,%ymm15
+ vmovdqu 320-256-128(%rbx),%ymm5
+ vpaddd 64-128(%rax),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 224-128(%rax),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm11,%ymm7
+ vpslld $26,%ymm11,%ymm2
+ vmovdqu %ymm6,288-256-128(%rbx)
+ vpaddd %ymm14,%ymm6,%ymm6
+
+ vpsrld $11,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm11,%ymm2
+ vpaddd -96(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm11,%ymm2
+ vpandn %ymm13,%ymm11,%ymm0
+ vpand %ymm12,%ymm11,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm15,%ymm14
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm15,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm15,%ymm8,%ymm4
+
+ vpxor %ymm1,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm15,%ymm1
+
+ vpslld $19,%ymm15,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm14,%ymm7
+
+ vpsrld $22,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm15,%ymm2
+ vpxor %ymm3,%ymm8,%ymm14
+ vpaddd %ymm6,%ymm10,%ymm10
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm14,%ymm14
+ vpaddd %ymm7,%ymm14,%ymm14
+ vmovdqu 352-256-128(%rbx),%ymm6
+ vpaddd 96-128(%rax),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 256-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm10,%ymm7
+ vpslld $26,%ymm10,%ymm2
+ vmovdqu %ymm5,320-256-128(%rbx)
+ vpaddd %ymm13,%ymm5,%ymm5
+
+ vpsrld $11,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm10,%ymm2
+ vpaddd -64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm10,%ymm2
+ vpandn %ymm12,%ymm10,%ymm0
+ vpand %ymm11,%ymm10,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm14,%ymm13
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm14,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm14,%ymm15,%ymm3
+
+ vpxor %ymm1,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm14,%ymm1
+
+ vpslld $19,%ymm14,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm13,%ymm7
+
+ vpsrld $22,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm14,%ymm2
+ vpxor %ymm4,%ymm15,%ymm13
+ vpaddd %ymm5,%ymm9,%ymm9
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm13,%ymm13
+ vpaddd %ymm7,%ymm13,%ymm13
+ vmovdqu 384-256-128(%rbx),%ymm5
+ vpaddd 128-128(%rax),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 288-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm9,%ymm7
+ vpslld $26,%ymm9,%ymm2
+ vmovdqu %ymm6,352-256-128(%rbx)
+ vpaddd %ymm12,%ymm6,%ymm6
+
+ vpsrld $11,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm9,%ymm2
+ vpaddd -32(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm9,%ymm2
+ vpandn %ymm11,%ymm9,%ymm0
+ vpand %ymm10,%ymm9,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm13,%ymm12
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm13,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm13,%ymm14,%ymm4
+
+ vpxor %ymm1,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm13,%ymm1
+
+ vpslld $19,%ymm13,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm12,%ymm7
+
+ vpsrld $22,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm13,%ymm2
+ vpxor %ymm3,%ymm14,%ymm12
+ vpaddd %ymm6,%ymm8,%ymm8
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm12,%ymm12
+ vpaddd %ymm7,%ymm12,%ymm12
+ vmovdqu 416-256-128(%rbx),%ymm6
+ vpaddd 160-128(%rax),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 320-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm8,%ymm7
+ vpslld $26,%ymm8,%ymm2
+ vmovdqu %ymm5,384-256-128(%rbx)
+ vpaddd %ymm11,%ymm5,%ymm5
+
+ vpsrld $11,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm8,%ymm2
+ vpaddd 0(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm8,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm8,%ymm2
+ vpandn %ymm10,%ymm8,%ymm0
+ vpand %ymm9,%ymm8,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm12,%ymm11
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm12,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm12,%ymm13,%ymm3
+
+ vpxor %ymm1,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm12,%ymm1
+
+ vpslld $19,%ymm12,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm11,%ymm7
+
+ vpsrld $22,%ymm12,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm12,%ymm2
+ vpxor %ymm4,%ymm13,%ymm11
+ vpaddd %ymm5,%ymm15,%ymm15
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm11,%ymm11
+ vpaddd %ymm7,%ymm11,%ymm11
+ vmovdqu 448-256-128(%rbx),%ymm5
+ vpaddd 192-128(%rax),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 352-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm15,%ymm7
+ vpslld $26,%ymm15,%ymm2
+ vmovdqu %ymm6,416-256-128(%rbx)
+ vpaddd %ymm10,%ymm6,%ymm6
+
+ vpsrld $11,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm15,%ymm2
+ vpaddd 32(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm15,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm15,%ymm2
+ vpandn %ymm9,%ymm15,%ymm0
+ vpand %ymm8,%ymm15,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm11,%ymm10
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm11,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm11,%ymm12,%ymm4
+
+ vpxor %ymm1,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm11,%ymm1
+
+ vpslld $19,%ymm11,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm10,%ymm7
+
+ vpsrld $22,%ymm11,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm11,%ymm2
+ vpxor %ymm3,%ymm12,%ymm10
+ vpaddd %ymm6,%ymm14,%ymm14
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm10,%ymm10
+ vpaddd %ymm7,%ymm10,%ymm10
+ vmovdqu 480-256-128(%rbx),%ymm6
+ vpaddd 224-128(%rax),%ymm5,%ymm5
+
+ vpsrld $3,%ymm6,%ymm7
+ vpsrld $7,%ymm6,%ymm1
+ vpslld $25,%ymm6,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm6,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm6,%ymm2
+ vmovdqu 384-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpxor %ymm1,%ymm3,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm5,%ymm5
+ vpsrld $6,%ymm14,%ymm7
+ vpslld $26,%ymm14,%ymm2
+ vmovdqu %ymm5,448-256-128(%rbx)
+ vpaddd %ymm9,%ymm5,%ymm5
+
+ vpsrld $11,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm14,%ymm2
+ vpaddd 64(%rbp),%ymm5,%ymm5
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm14,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm14,%ymm2
+ vpandn %ymm8,%ymm14,%ymm0
+ vpand %ymm15,%ymm14,%ymm3
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm10,%ymm9
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm10,%ymm1
+ vpxor %ymm3,%ymm0,%ymm0
+ vpxor %ymm10,%ymm11,%ymm3
+
+ vpxor %ymm1,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm5,%ymm5
+
+ vpsrld $13,%ymm10,%ymm1
+
+ vpslld $19,%ymm10,%ymm2
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpand %ymm3,%ymm4,%ymm4
+
+ vpxor %ymm1,%ymm9,%ymm7
+
+ vpsrld $22,%ymm10,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm10,%ymm2
+ vpxor %ymm4,%ymm11,%ymm9
+ vpaddd %ymm5,%ymm13,%ymm13
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm5,%ymm9,%ymm9
+ vpaddd %ymm7,%ymm9,%ymm9
+ vmovdqu 0-128(%rax),%ymm5
+ vpaddd 256-256-128(%rbx),%ymm6,%ymm6
+
+ vpsrld $3,%ymm5,%ymm7
+ vpsrld $7,%ymm5,%ymm1
+ vpslld $25,%ymm5,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $18,%ymm5,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $14,%ymm5,%ymm2
+ vmovdqu 416-256-128(%rbx),%ymm0
+ vpsrld $10,%ymm0,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpsrld $17,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $15,%ymm0,%ymm2
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm1,%ymm4,%ymm7
+ vpsrld $19,%ymm0,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $13,%ymm0,%ymm2
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpsrld $6,%ymm13,%ymm7
+ vpslld $26,%ymm13,%ymm2
+ vmovdqu %ymm6,480-256-128(%rbx)
+ vpaddd %ymm8,%ymm6,%ymm6
+
+ vpsrld $11,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+ vpslld $21,%ymm13,%ymm2
+ vpaddd 96(%rbp),%ymm6,%ymm6
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $25,%ymm13,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $7,%ymm13,%ymm2
+ vpandn %ymm15,%ymm13,%ymm0
+ vpand %ymm14,%ymm13,%ymm4
+
+ vpxor %ymm1,%ymm7,%ymm7
+
+ vpsrld $2,%ymm9,%ymm8
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $30,%ymm9,%ymm1
+ vpxor %ymm4,%ymm0,%ymm0
+ vpxor %ymm9,%ymm10,%ymm4
+
+ vpxor %ymm1,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm6,%ymm6
+
+ vpsrld $13,%ymm9,%ymm1
+
+ vpslld $19,%ymm9,%ymm2
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpand %ymm4,%ymm3,%ymm3
+
+ vpxor %ymm1,%ymm8,%ymm7
+
+ vpsrld $22,%ymm9,%ymm1
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpslld $10,%ymm9,%ymm2
+ vpxor %ymm3,%ymm10,%ymm8
+ vpaddd %ymm6,%ymm12,%ymm12
+
+ vpxor %ymm1,%ymm7,%ymm7
+ vpxor %ymm2,%ymm7,%ymm7
+
+ vpaddd %ymm6,%ymm8,%ymm8
+ vpaddd %ymm7,%ymm8,%ymm8
+ addq $256,%rbp
+ decl %ecx
+ jnz .Loop_16_xx_avx2
+
+ movl $1,%ecx
+ leaq 512(%rsp),%rbx
+ leaq K256+128(%rip),%rbp
+ cmpl 0(%rbx),%ecx
+ cmovgeq %rbp,%r12
+ cmpl 4(%rbx),%ecx
+ cmovgeq %rbp,%r13
+ cmpl 8(%rbx),%ecx
+ cmovgeq %rbp,%r14
+ cmpl 12(%rbx),%ecx
+ cmovgeq %rbp,%r15
+ cmpl 16(%rbx),%ecx
+ cmovgeq %rbp,%r8
+ cmpl 20(%rbx),%ecx
+ cmovgeq %rbp,%r9
+ cmpl 24(%rbx),%ecx
+ cmovgeq %rbp,%r10
+ cmpl 28(%rbx),%ecx
+ cmovgeq %rbp,%r11
+ vmovdqa (%rbx),%ymm7
+ vpxor %ymm0,%ymm0,%ymm0
+ vmovdqa %ymm7,%ymm6
+ vpcmpgtd %ymm0,%ymm6,%ymm6
+ vpaddd %ymm6,%ymm7,%ymm7
+
+ vmovdqu 0-128(%rdi),%ymm0
+ vpand %ymm6,%ymm8,%ymm8
+ vmovdqu 32-128(%rdi),%ymm1
+ vpand %ymm6,%ymm9,%ymm9
+ vmovdqu 64-128(%rdi),%ymm2
+ vpand %ymm6,%ymm10,%ymm10
+ vmovdqu 96-128(%rdi),%ymm5
+ vpand %ymm6,%ymm11,%ymm11
+ vpaddd %ymm0,%ymm8,%ymm8
+ vmovdqu 128-128(%rdi),%ymm0
+ vpand %ymm6,%ymm12,%ymm12
+ vpaddd %ymm1,%ymm9,%ymm9
+ vmovdqu 160-128(%rdi),%ymm1
+ vpand %ymm6,%ymm13,%ymm13
+ vpaddd %ymm2,%ymm10,%ymm10
+ vmovdqu 192-128(%rdi),%ymm2
+ vpand %ymm6,%ymm14,%ymm14
+ vpaddd %ymm5,%ymm11,%ymm11
+ vmovdqu 224-128(%rdi),%ymm5
+ vpand %ymm6,%ymm15,%ymm15
+ vpaddd %ymm0,%ymm12,%ymm12
+ vpaddd %ymm1,%ymm13,%ymm13
+ vmovdqu %ymm8,0-128(%rdi)
+ vpaddd %ymm2,%ymm14,%ymm14
+ vmovdqu %ymm9,32-128(%rdi)
+ vpaddd %ymm5,%ymm15,%ymm15
+ vmovdqu %ymm10,64-128(%rdi)
+ vmovdqu %ymm11,96-128(%rdi)
+ vmovdqu %ymm12,128-128(%rdi)
+ vmovdqu %ymm13,160-128(%rdi)
+ vmovdqu %ymm14,192-128(%rdi)
+ vmovdqu %ymm15,224-128(%rdi)
+
+ vmovdqu %ymm7,(%rbx)
+ leaq 256+128(%rsp),%rbx
+ vmovdqu .Lpbswap(%rip),%ymm6
+ decl %edx
+ jnz .Loop_avx2
+
+
+
+
+
+
+
+.Ldone_avx2:
+ movq 544(%rsp),%rax
+ vzeroupper
+ movq -48(%rax),%r15
+ movq -40(%rax),%r14
+ movq -32(%rax),%r13
+ movq -24(%rax),%r12
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+.Lepilogue_avx2:
+ .byte 0xf3,0xc3
+.size sha256_multi_block_avx2,.-sha256_multi_block_avx2
+.align 256
+K256:
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1899447441,1899447441,1899447441,1899447441
+.long 1899447441,1899447441,1899447441,1899447441
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3921009573,3921009573,3921009573,3921009573
+.long 3921009573,3921009573,3921009573,3921009573
+.long 961987163,961987163,961987163,961987163
+.long 961987163,961987163,961987163,961987163
+.long 1508970993,1508970993,1508970993,1508970993
+.long 1508970993,1508970993,1508970993,1508970993
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2870763221,2870763221,2870763221,2870763221
+.long 2870763221,2870763221,2870763221,2870763221
+.long 3624381080,3624381080,3624381080,3624381080
+.long 3624381080,3624381080,3624381080,3624381080
+.long 310598401,310598401,310598401,310598401
+.long 310598401,310598401,310598401,310598401
+.long 607225278,607225278,607225278,607225278
+.long 607225278,607225278,607225278,607225278
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1925078388,1925078388,1925078388,1925078388
+.long 1925078388,1925078388,1925078388,1925078388
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2614888103,2614888103,2614888103,2614888103
+.long 2614888103,2614888103,2614888103,2614888103
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3835390401,3835390401,3835390401,3835390401
+.long 3835390401,3835390401,3835390401,3835390401
+.long 4022224774,4022224774,4022224774,4022224774
+.long 4022224774,4022224774,4022224774,4022224774
+.long 264347078,264347078,264347078,264347078
+.long 264347078,264347078,264347078,264347078
+.long 604807628,604807628,604807628,604807628
+.long 604807628,604807628,604807628,604807628
+.long 770255983,770255983,770255983,770255983
+.long 770255983,770255983,770255983,770255983
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1996064986,1996064986,1996064986,1996064986
+.long 1996064986,1996064986,1996064986,1996064986
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2952996808,2952996808,2952996808,2952996808
+.long 2952996808,2952996808,2952996808,2952996808
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3584528711,3584528711,3584528711,3584528711
+.long 3584528711,3584528711,3584528711,3584528711
+.long 113926993,113926993,113926993,113926993
+.long 113926993,113926993,113926993,113926993
+.long 338241895,338241895,338241895,338241895
+.long 338241895,338241895,338241895,338241895
+.long 666307205,666307205,666307205,666307205
+.long 666307205,666307205,666307205,666307205
+.long 773529912,773529912,773529912,773529912
+.long 773529912,773529912,773529912,773529912
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1986661051,1986661051,1986661051,1986661051
+.long 1986661051,1986661051,1986661051,1986661051
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2820302411,2820302411,2820302411,2820302411
+.long 2820302411,2820302411,2820302411,2820302411
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3600352804,3600352804,3600352804,3600352804
+.long 3600352804,3600352804,3600352804,3600352804
+.long 4094571909,4094571909,4094571909,4094571909
+.long 4094571909,4094571909,4094571909,4094571909
+.long 275423344,275423344,275423344,275423344
+.long 275423344,275423344,275423344,275423344
+.long 430227734,430227734,430227734,430227734
+.long 430227734,430227734,430227734,430227734
+.long 506948616,506948616,506948616,506948616
+.long 506948616,506948616,506948616,506948616
+.long 659060556,659060556,659060556,659060556
+.long 659060556,659060556,659060556,659060556
+.long 883997877,883997877,883997877,883997877
+.long 883997877,883997877,883997877,883997877
+.long 958139571,958139571,958139571,958139571
+.long 958139571,958139571,958139571,958139571
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1955562222,1955562222,1955562222,1955562222
+.long 1955562222,1955562222,1955562222,1955562222
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2756734187,2756734187,2756734187,2756734187
+.long 2756734187,2756734187,2756734187,2756734187
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3329325298,3329325298,3329325298,3329325298
+.long 3329325298,3329325298,3329325298,3329325298
+.Lpbswap:
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+K256_shaext:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Property changes on: trunk/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/lib4758cca/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/lib4758cca/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/lib4758cca/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/lib4758cca/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/lib4758cca/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libaep/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libaep/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libaep/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libaep/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libaep/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libatalla/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libatalla/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libatalla/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libatalla/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libatalla/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libcapi/Makefile
===================================================================
--- trunk/secure/lib/libcrypto/engines/libcapi/Makefile (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libcapi/Makefile 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,7 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libcapi/Makefile 290207 2015-10-30 20:51:33Z jkim $
+
+SHLIB_NAME?= libcapi.so
+SRCS= e_capi.c
+
+.include <bsd.lib.mk>
Property changes on: trunk/secure/lib/libcrypto/engines/libcapi/Makefile
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libcapi/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libcapi/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libcapi/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libcapi/Makefile.depend 291563 2015-12-01 05:23:19Z bdrewery $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libcapi/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libchil/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libchil/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libchil/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libchil/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libchil/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libcswift/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libcswift/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libcswift/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libcswift/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libcswift/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libgost/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libgost/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libgost/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libgost/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libgost/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libnuron/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libnuron/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libnuron/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libnuron/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libnuron/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libsureware/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libsureware/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libsureware/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libsureware/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libsureware/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/engines/libubsec/Makefile.depend
===================================================================
--- trunk/secure/lib/libcrypto/engines/libubsec/Makefile.depend (rev 0)
+++ trunk/secure/lib/libcrypto/engines/libubsec/Makefile.depend 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,20 @@
+# $MidnightBSD$
+# $FreeBSD: stable/11/secure/lib/libcrypto/engines/libubsec/Makefile.depend 284345 2015-06-13 19:20:56Z sjg $
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+ gnu/lib/csu \
+ gnu/lib/libgcc \
+ include \
+ include/xlocale \
+ lib/${CSU_DIR} \
+ lib/libc \
+ lib/libcompiler_rt \
+ secure/lib/libcrypto \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
Property changes on: trunk/secure/lib/libcrypto/engines/libubsec/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/ASN1_TIME_set.3
===================================================================
--- trunk/secure/lib/libcrypto/man/ASN1_TIME_set.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/ASN1_TIME_set.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,261 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "ASN1_TIME_set 3"
+.TH ASN1_TIME_set 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+ASN1_TIME_set, ASN1_TIME_adj, ASN1_TIME_check, ASN1_TIME_set_string,
+ASN1_TIME_print, ASN1_TIME_diff \- ASN.1 Time functions.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 6
+\& ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t);
+\& ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t,
+\& int offset_day, long offset_sec);
+\& int ASN1_TIME_set_string(ASN1_TIME *s, const char *str);
+\& int ASN1_TIME_check(const ASN1_TIME *t);
+\& int ASN1_TIME_print(BIO *b, const ASN1_TIME *s);
+\&
+\& int ASN1_TIME_diff(int *pday, int *psec,
+\& const ASN1_TIME *from, const ASN1_TIME *to);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+The function \fIASN1_TIME_set()\fR sets the \s-1ASN1_TIME\s0 structure \fBs\fR to the
+time represented by the time_t value \fBt\fR. If \fBs\fR is \s-1NULL\s0 a new \s-1ASN1_TIME\s0
+structure is allocated and returned.
+.PP
+\&\fIASN1_TIME_adj()\fR sets the \s-1ASN1_TIME\s0 structure \fBs\fR to the time represented
+by the time \fBoffset_day\fR and \fBoffset_sec\fR after the time_t value \fBt\fR.
+The values of \fBoffset_day\fR or \fBoffset_sec\fR can be negative to set a
+time before \fBt\fR. The \fBoffset_sec\fR value can also exceed the number of
+seconds in a day. If \fBs\fR is \s-1NULL\s0 a new \s-1ASN1_TIME\s0 structure is allocated
+and returned.
+.PP
+\&\fIASN1_TIME_set_string()\fR sets \s-1ASN1_TIME\s0 structure \fBs\fR to the time
+represented by string \fBstr\fR which must be in appropriate \s-1ASN.1\s0 time
+format (for example \s-1YYMMDDHHMMSSZ\s0 or \s-1YYYYMMDDHHMMSSZ\s0).
+.PP
+\&\fIASN1_TIME_check()\fR checks the syntax of \s-1ASN1_TIME\s0 structure \fBs\fR.
+.PP
+\&\fIASN1_TIME_print()\fR prints out the time \fBs\fR to \s-1BIO\s0 \fBb\fR in human readable
+format. It will be of the format \s-1MMM DD HH:MM:SS YYYY\s0 [\s-1GMT\s0], for example
+\&\*(L"Feb 3 00:55:52 2015 \s-1GMT\*(R"\s0 it does not include a newline. If the time
+structure has invalid format it prints out \*(L"Bad time value\*(R" and returns
+an error.
+.PP
+\&\fIASN1_TIME_diff()\fR sets \fB*pday\fR and \fB*psec\fR to the time difference between
+\&\fBfrom\fR and \fBto\fR. If \fBto\fR represents a time later than \fBfrom\fR then
+one or both (depending on the time difference) of \fB*pday\fR and \fB*psec\fR
+will be positive. If \fBto\fR represents a time earlier than \fBfrom\fR then
+one or both of \fB*pday\fR and \fB*psec\fR will be negative. If \fBto\fR and \fBfrom\fR
+represent the same time then \fB*pday\fR and \fB*psec\fR will both be zero.
+If both \fB*pday\fR and \fB*psec\fR are non-zero they will always have the same
+sign. The value of \fB*psec\fR will always be less than the number of seconds
+in a day. If \fBfrom\fR or \fBto\fR is \s-1NULL\s0 the current time is used.
+.SH "NOTES"
+.IX Header "NOTES"
+The \s-1ASN1_TIME\s0 structure corresponds to the \s-1ASN.1\s0 structure \fBTime\fR
+defined in \s-1RFC5280\s0 et al. The time setting functions obey the rules outlined
+in \s-1RFC5280:\s0 if the date can be represented by UTCTime it is used, else
+GeneralizedTime is used.
+.PP
+The \s-1ASN1_TIME\s0 structure is represented as an \s-1ASN1_STRING\s0 internally and can
+be freed up using \fIASN1_STRING_free()\fR.
+.PP
+The \s-1ASN1_TIME\s0 structure can represent years from 0000 to 9999 but no attempt
+is made to correct ancient calendar changes (for example from Julian to
+Gregorian calendars).
+.PP
+Some applications add offset times directly to a time_t value and pass the
+results to \fIASN1_TIME_set()\fR (or equivalent). This can cause problems as the
+time_t value can overflow on some systems resulting in unexpected results.
+New applications should use \fIASN1_TIME_adj()\fR instead and pass the offset value
+in the \fBoffset_sec\fR and \fBoffset_day\fR parameters instead of directly
+manipulating a time_t value.
+.SH "BUGS"
+.IX Header "BUGS"
+\&\fIASN1_TIME_print()\fR currently does not print out the time zone: it either prints
+out \*(L"\s-1GMT\*(R"\s0 or nothing. But all certificates complying with \s-1RFC5280\s0 et al use \s-1GMT\s0
+anyway.
+.SH "EXAMPLES"
+.IX Header "EXAMPLES"
+Set a time structure to one hour after the current time and print it out:
+.PP
+.Vb 11
+\& #include <time.h>
+\& #include <openssl/asn1.h>
+\& ASN1_TIME *tm;
+\& time_t t;
+\& BIO *b;
+\& t = time(NULL);
+\& tm = ASN1_TIME_adj(NULL, t, 0, 60 * 60);
+\& b = BIO_new_fp(stdout, BIO_NOCLOSE);
+\& ASN1_TIME_print(b, tm);
+\& ASN1_STRING_free(tm);
+\& BIO_free(b);
+.Ve
+.PP
+Determine if one time is later or sooner than the current time:
+.PP
+.Vb 1
+\& int day, sec;
+\&
+\& if (!ASN1_TIME_diff(&day, &sec, NULL, to))
+\& /* Invalid time format */
+\&
+\& if (day > 0 || sec > 0)
+\& printf("Later\en");
+\& else if (day < 0 || sec < 0)
+\& printf("Sooner\en");
+\& else
+\& printf("Same\en");
+.Ve
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+\&\fIASN1_TIME_set()\fR and \fIASN1_TIME_adj()\fR return a pointer to an \s-1ASN1_TIME\s0 structure
+or \s-1NULL\s0 if an error occurred.
+.PP
+\&\fIASN1_TIME_set_string()\fR returns 1 if the time value is successfully set and
+0 otherwise.
+.PP
+\&\fIASN1_TIME_check()\fR returns 1 if the structure is syntactically correct and 0
+otherwise.
+.PP
+\&\fIASN1_TIME_print()\fR returns 1 if the time is successfully printed out and 0 if
+an error occurred (I/O error or invalid time format).
+.PP
+\&\fIASN1_TIME_diff()\fR returns 1 for sucess and 0 for failure. It can fail if the
+pass \s-1ASN1_TIME\s0 structure has invalid syntax for example.
Property changes on: trunk/secure/lib/libcrypto/man/ASN1_TIME_set.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EC_GFp_simple_method.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EC_GFp_simple_method.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EC_GFp_simple_method.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,190 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EC_GFp_simple_method 3"
+.TH EC_GFp_simple_method 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EC_GFp_simple_method, EC_GFp_mont_method, EC_GFp_nist_method, EC_GFp_nistp224_method, EC_GFp_nistp256_method, EC_GFp_nistp521_method, EC_GF2m_simple_method, EC_METHOD_get_field_type \- Functions for obtaining EC_METHOD objects.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& #include <openssl/ec.h>
+\&
+\& const EC_METHOD *EC_GFp_simple_method(void);
+\& const EC_METHOD *EC_GFp_mont_method(void);
+\& const EC_METHOD *EC_GFp_nist_method(void);
+\& const EC_METHOD *EC_GFp_nistp224_method(void);
+\& const EC_METHOD *EC_GFp_nistp256_method(void);
+\& const EC_METHOD *EC_GFp_nistp521_method(void);
+\&
+\& const EC_METHOD *EC_GF2m_simple_method(void);
+\&
+\& int EC_METHOD_get_field_type(const EC_METHOD *meth);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+The Elliptic Curve library provides a number of different implementations through a single common interface.
+When constructing a curve using EC_GROUP_new (see \fIEC_GROUP_new\fR\|(3)) an
+implementation method must be provided. The functions described here all return a const pointer to an
+\&\fB\s-1EC_METHOD\s0\fR structure that can be passed to \s-1EC_GROUP_NEW.\s0 It is important that the correct implementation
+type for the form of curve selected is used.
+.PP
+For F2^m curves there is only one implementation choice, i.e. EC_GF2_simple_method.
+.PP
+For Fp curves the lowest common denominator implementation is the EC_GFp_simple_method implementation. All
+other implementations are based on this one. EC_GFp_mont_method builds on EC_GFp_simple_method but adds the
+use of montgomery multiplication (see \fIBN_mod_mul_montgomery\fR\|(3)). EC_GFp_nist_method
+offers an implementation optimised for use with \s-1NIST\s0 recommended curves (\s-1NIST\s0 curves are available through
+EC_GROUP_new_by_curve_name as described in \fIEC_GROUP_new\fR\|(3)).
+.PP
+The functions EC_GFp_nistp224_method, EC_GFp_nistp256_method and EC_GFp_nistp521_method offer 64 bit
+optimised implementations for the \s-1NIST P224, P256\s0 and P521 curves respectively. Note, however, that these
+implementations are not available on all platforms.
+.PP
+EC_METHOD_get_field_type identifies what type of field the \s-1EC_METHOD\s0 structure supports, which will be either
+F2^m or Fp. If the field type is Fp then the value \fBNID_X9_62_prime_field\fR is returned. If the field type is
+F2^m then the value \fBNID_X9_62_characteristic_two_field\fR is returned. These values are defined in the
+obj_mac.h header file.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+All EC_GFp* functions and EC_GF2m_simple_method always return a const pointer to an \s-1EC_METHOD\s0 structure.
+.PP
+EC_METHOD_get_field_type returns an integer that identifies the type of field the \s-1EC_METHOD\s0 structure supports.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_new\fR\|(3), \fIEC_GROUP_copy\fR\|(3),
+\&\fIEC_POINT_new\fR\|(3), \fIEC_POINT_add\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fId2i_ECPKParameters\fR\|(3),
+\&\fIBN_mod_mul_montgomery\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/EC_GFp_simple_method.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EC_GROUP_copy.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EC_GROUP_copy.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EC_GROUP_copy.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,305 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EC_GROUP_copy 3"
+.TH EC_GROUP_copy 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EC_GROUP_copy, EC_GROUP_dup, EC_GROUP_method_of, EC_GROUP_set_generator, EC_GROUP_get0_generator, EC_GROUP_get_order, EC_GROUP_get_cofactor, EC_GROUP_set_curve_name, EC_GROUP_get_curve_name, EC_GROUP_set_asn1_flag, EC_GROUP_get_asn1_flag, EC_GROUP_set_point_conversion_form, EC_GROUP_get_point_conversion_form, EC_GROUP_get0_seed, EC_GROUP_get_seed_len, EC_GROUP_set_seed, EC_GROUP_get_degree, EC_GROUP_check, EC_GROUP_check_discriminant, EC_GROUP_cmp, EC_GROUP_get_basis_type, EC_GROUP_get_trinomial_basis, EC_GROUP_get_pentanomial_basis \- Functions for manipulating EC_GROUP objects.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\& #include <openssl/ec.h>
+\& #include <openssl/bn.h>
+\&
+\& int EC_GROUP_copy(EC_GROUP *dst, const EC_GROUP *src);
+\& EC_GROUP *EC_GROUP_dup(const EC_GROUP *src);
+\&
+\& const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group);
+\&
+\& int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor);
+\& const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group);
+\&
+\& int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx);
+\& int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx);
+\&
+\& void EC_GROUP_set_curve_name(EC_GROUP *group, int nid);
+\& int EC_GROUP_get_curve_name(const EC_GROUP *group);
+\&
+\& void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag);
+\& int EC_GROUP_get_asn1_flag(const EC_GROUP *group);
+\&
+\& void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form);
+\& point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *);
+\&
+\& unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x);
+\& size_t EC_GROUP_get_seed_len(const EC_GROUP *);
+\& size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len);
+\&
+\& int EC_GROUP_get_degree(const EC_GROUP *group);
+\&
+\& int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx);
+\&
+\& int EC_GROUP_check_discriminant(const EC_GROUP *group, BN_CTX *ctx);
+\&
+\& int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx);
+\&
+\& int EC_GROUP_get_basis_type(const EC_GROUP *);
+\& int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k);
+\& int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1,
+\& unsigned int *k2, unsigned int *k3);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+EC_GROUP_copy copies the curve \fBsrc\fR into \fBdst\fR. Both \fBsrc\fR and \fBdst\fR must use the same \s-1EC_METHOD.\s0
+.PP
+EC_GROUP_dup creates a new \s-1EC_GROUP\s0 object and copies the content from \fBsrc\fR to the newly created
+\&\s-1EC_GROUP\s0 object.
+.PP
+EC_GROUP_method_of obtains the \s-1EC_METHOD\s0 of \fBgroup\fR.
+.PP
+EC_GROUP_set_generator sets curve paramaters that must be agreed by all participants using the curve. These
+paramaters include the \fBgenerator\fR, the \fBorder\fR and the \fBcofactor\fR. The \fBgenerator\fR is a well defined point on the
+curve chosen for cryptographic operations. Integers used for point multiplications will be between 0 and
+n\-1 where n is the \fBorder\fR. The \fBorder\fR multipied by the \fBcofactor\fR gives the number of points on the curve.
+.PP
+EC_GROUP_get0_generator returns the generator for the identified \fBgroup\fR.
+.PP
+The functions EC_GROUP_get_order and EC_GROUP_get_cofactor populate the provided \fBorder\fR and \fBcofactor\fR parameters
+with the respective order and cofactors for the \fBgroup\fR.
+.PP
+The functions EC_GROUP_set_curve_name and EC_GROUP_get_curve_name, set and get the \s-1NID\s0 for the curve respectively
+(see \fIEC_GROUP_new\fR\|(3)). If a curve does not have a \s-1NID\s0 associated with it, then EC_GROUP_get_curve_name
+will return 0.
+.PP
+The asn1_flag value on a curve is used to determine whether there is a specific \s-1ASN1 OID\s0 to describe the curve or not.
+If the asn1_flag is 1 then this is a named curve with an associated \s-1ASN1 OID.\s0 If not then asn1_flag is 0. The functions
+EC_GROUP_get_asn1_flag and EC_GROUP_set_asn1_flag get and set the status of the asn1_flag for the curve. If set then
+the curve_name must also be set.
+.PP
+The point_coversion_form for a curve controls how \s-1EC_POINT\s0 data is encoded as \s-1ASN1\s0 as defined in X9.62 (\s-1ECDSA\s0).
+point_conversion_form_t is an enum defined as follows:
+.PP
+.Vb 10
+\& typedef enum {
+\& /** the point is encoded as z||x, where the octet z specifies
+\& * which solution of the quadratic equation y is */
+\& POINT_CONVERSION_COMPRESSED = 2,
+\& /** the point is encoded as z||x||y, where z is the octet 0x02 */
+\& POINT_CONVERSION_UNCOMPRESSED = 4,
+\& /** the point is encoded as z||x||y, where the octet z specifies
+\& * which solution of the quadratic equation y is */
+\& POINT_CONVERSION_HYBRID = 6
+\& } point_conversion_form_t;
+.Ve
+.PP
+For \s-1POINT_CONVERSION_UNCOMPRESSED\s0 the point is encoded as an octet signifying the \s-1UNCOMPRESSED\s0 form has been used followed by
+the octets for x, followed by the octets for y.
+.PP
+For any given x co-ordinate for a point on a curve it is possible to derive two possible y values. For
+\&\s-1POINT_CONVERSION_COMPRESSED\s0 the point is encoded as an octet signifying that the \s-1COMPRESSED\s0 form has been used \s-1AND\s0 which of
+the two possible solutions for y has been used, followed by the octets for x.
+.PP
+For \s-1POINT_CONVERSION_HYBRID\s0 the point is encoded as an octet signifying the \s-1HYBRID\s0 form has been used \s-1AND\s0 which of the two
+possible solutions for y has been used, followed by the octets for x, followed by the octets for y.
+.PP
+The functions EC_GROUP_set_point_conversion_form and EC_GROUP_get_point_conversion_form set and get the point_conversion_form
+for the curve respectively.
+.PP
+\&\s-1ANSI X9.62\s0 (\s-1ECDSA\s0 standard) defines a method of generating the curve parameter b from a random number. This provides advantages
+in that a parameter obtained in this way is highly unlikely to be susceptible to special purpose attacks, or have any trapdoors in it.
+If the seed is present for a curve then the b parameter was generated in a verifiable fashion using that seed. The OpenSSL \s-1EC\s0 library
+does not use this seed value but does enable you to inspect it using EC_GROUP_get0_seed. This returns a pointer to a memory block
+containing the seed that was used. The length of the memory block can be obtained using EC_GROUP_get_seed_len. A number of the
+builtin curves within the library provide seed values that can be obtained. It is also possible to set a custom seed using
+EC_GROUP_set_seed and passing a pointer to a memory block, along with the length of the seed. Again, the \s-1EC\s0 library will not use
+this seed value, although it will be preserved in any \s-1ASN1\s0 based communications.
+.PP
+EC_GROUP_get_degree gets the degree of the field. For Fp fields this will be the number of bits in p. For F2^m fields this will be
+the value m.
+.PP
+The function EC_GROUP_check_discriminant calculates the discriminant for the curve and verifies that it is valid.
+For a curve defined over Fp the discriminant is given by the formula 4*a^3 + 27*b^2 whilst for F2^m curves the discriminant is
+simply b. In either case for the curve to be valid the discriminant must be non zero.
+.PP
+The function EC_GROUP_check performs a number of checks on a curve to verify that it is valid. Checks performed include
+verifying that the discriminant is non zero; that a generator has been defined; that the generator is on the curve and has
+the correct order.
+.PP
+EC_GROUP_cmp compares \fBa\fR and \fBb\fR to determine whether they represent the same curve or not.
+.PP
+The functions EC_GROUP_get_basis_type, EC_GROUP_get_trinomial_basis and EC_GROUP_get_pentanomial_basis should only be called for curves
+defined over an F2^m field. Addition and multiplication operations within an F2^m field are performed using an irreducible polynomial
+function f(x). This function is either a trinomial of the form:
+.PP
+f(x) = x^m + x^k + 1 with m > k >= 1
+.PP
+or a pentanomial of the form:
+.PP
+f(x) = x^m + x^k3 + x^k2 + x^k1 + 1 with m > k3 > k2 > k1 >= 1
+.PP
+The function EC_GROUP_get_basis_type returns a \s-1NID\s0 identifying whether a trinomial or pentanomial is in use for the field. The
+function EC_GROUP_get_trinomial_basis must only be called where f(x) is of the trinomial form, and returns the value of \fBk\fR. Similary
+the function EC_GROUP_get_pentanomial_basis must only be called where f(x) is of the pentanomial form, and returns the values of \fBk1\fR,
+\&\fBk2\fR and \fBk3\fR respectively.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+The following functions return 1 on success or 0 on error: EC_GROUP_copy, EC_GROUP_set_generator, EC_GROUP_check,
+EC_GROUP_check_discriminant, EC_GROUP_get_trinomial_basis and EC_GROUP_get_pentanomial_basis.
+.PP
+EC_GROUP_dup returns a pointer to the duplicated curve, or \s-1NULL\s0 on error.
+.PP
+EC_GROUP_method_of returns the \s-1EC_METHOD\s0 implementation in use for the given curve or \s-1NULL\s0 on error.
+.PP
+EC_GROUP_get0_generator returns the generator for the given curve or \s-1NULL\s0 on error.
+.PP
+EC_GROUP_get_order, EC_GROUP_get_cofactor, EC_GROUP_get_curve_name, EC_GROUP_get_asn1_flag, EC_GROUP_get_point_conversion_form
+and EC_GROUP_get_degree return the order, cofactor, curve name (\s-1NID\s0), \s-1ASN1\s0 flag, point_conversion_form and degree for the
+specified curve respectively. If there is no curve name associated with a curve then EC_GROUP_get_curve_name will return 0.
+.PP
+EC_GROUP_get0_seed returns a pointer to the seed that was used to generate the parameter b, or \s-1NULL\s0 if the seed is not
+specified. EC_GROUP_get_seed_len returns the length of the seed or 0 if the seed is not specified.
+.PP
+EC_GROUP_set_seed returns the length of the seed that has been set. If the supplied seed is \s-1NULL,\s0 or the supplied seed length is
+0, the return value will be 1. On error 0 is returned.
+.PP
+EC_GROUP_cmp returns 0 if the curves are equal, 1 if they are not equal, or \-1 on error.
+.PP
+EC_GROUP_get_basis_type returns the values NID_X9_62_tpBasis or NID_X9_62_ppBasis (as defined in <openssl/obj_mac.h>) for a
+trinomial or pentanomial respectively. Alternatively in the event of an error a 0 is returned.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_new\fR\|(3),
+\&\fIEC_POINT_new\fR\|(3), \fIEC_POINT_add\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3), \fId2i_ECPKParameters\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/EC_GROUP_copy.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EC_GROUP_new.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EC_GROUP_new.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EC_GROUP_new.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,227 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EC_GROUP_new 3"
+.TH EC_GROUP_new 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EC_GROUP_new, EC_GROUP_free, EC_GROUP_clear_free, EC_GROUP_new_curve_GFp, EC_GROUP_new_curve_GF2m, EC_GROUP_new_by_curve_name, EC_GROUP_set_curve_GFp, EC_GROUP_get_curve_GFp, EC_GROUP_set_curve_GF2m, EC_GROUP_get_curve_GF2m, EC_get_builtin_curves \- Functions for creating and destroying EC_GROUP objects.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\& #include <openssl/ec.h>
+\& #include <openssl/bn.h>
+\&
+\& EC_GROUP *EC_GROUP_new(const EC_METHOD *meth);
+\& void EC_GROUP_free(EC_GROUP *group);
+\& void EC_GROUP_clear_free(EC_GROUP *group);
+\&
+\& EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& EC_GROUP *EC_GROUP_new_by_curve_name(int nid);
+\&
+\& int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+\&
+\& size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+Within the library there are two forms of elliptic curve that are of interest. The first form is those defined over the
+prime field Fp. The elements of Fp are the integers 0 to p\-1, where p is a prime number. This gives us a revised
+elliptic curve equation as follows:
+.PP
+y^2 mod p = x^3 +ax + b mod p
+.PP
+The second form is those defined over a binary field F2^m where the elements of the field are integers of length at
+most m bits. For this form the elliptic curve equation is modified to:
+.PP
+y^2 + xy = x^3 + ax^2 + b (where b != 0)
+.PP
+Operations in a binary field are performed relative to an \fBirreducible polynomial\fR. All such curves with OpenSSL
+use a trinomial or a pentanomial for this parameter.
+.PP
+A new curve can be constructed by calling EC_GROUP_new, using the implementation provided by \fBmeth\fR (see
+\&\fIEC_GFp_simple_method\fR\|(3)). It is then necessary to call either EC_GROUP_set_curve_GFp or
+EC_GROUP_set_curve_GF2m as appropriate to create a curve defined over Fp or over F2^m respectively.
+.PP
+EC_GROUP_set_curve_GFp sets the curve parameters \fBp\fR, \fBa\fR and \fBb\fR for a curve over Fp stored in \fBgroup\fR.
+EC_group_get_curve_GFp obtains the previously set curve parameters.
+.PP
+EC_GROUP_set_curve_GF2m sets the equivalent curve parameters for a curve over F2^m. In this case \fBp\fR represents
+the irreducible polybnomial \- each bit represents a term in the polynomial. Therefore there will either be three
+or five bits set dependant on whether the polynomial is a trinomial or a pentanomial.
+EC_group_get_curve_GF2m obtains the previously set curve parameters.
+.PP
+The functions EC_GROUP_new_curve_GFp and EC_GROUP_new_curve_GF2m are shortcuts for calling EC_GROUP_new and the
+appropriate EC_group_set_curve function. An appropriate default implementation method will be used.
+.PP
+Whilst the library can be used to create any curve using the functions described above, there are also a number of
+predefined curves that are available. In order to obtain a list of all of the predefined curves, call the function
+EC_get_builtin_curves. The parameter \fBr\fR should be an array of EC_builtin_curve structures of size \fBnitems\fR. The function
+will populate the \fBr\fR array with information about the builtin curves. If \fBnitems\fR is less than the total number of
+curves available, then the first \fBnitems\fR curves will be returned. Otherwise the total number of curves will be
+provided. The return value is the total number of curves available (whether that number has been populated in \fBr\fR or
+not). Passing a \s-1NULL\s0 \fBr\fR, or setting \fBnitems\fR to 0 will do nothing other than return the total number of curves available.
+The EC_builtin_curve structure is defined as follows:
+.PP
+.Vb 4
+\& typedef struct {
+\& int nid;
+\& const char *comment;
+\& } EC_builtin_curve;
+.Ve
+.PP
+Each EC_builtin_curve item has a unique integer id (\fBnid\fR), and a human readable comment string describing the curve.
+.PP
+In order to construct a builtin curve use the function EC_GROUP_new_by_curve_name and provide the \fBnid\fR of the curve to
+be constructed.
+.PP
+EC_GROUP_free frees the memory associated with the \s-1EC_GROUP.\s0
+.PP
+EC_GROUP_clear_free destroys any sensitive data held within the \s-1EC_GROUP\s0 and then frees its memory.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+All EC_GROUP_new* functions return a pointer to the newly constructed group, or \s-1NULL\s0 on error.
+.PP
+EC_get_builtin_curves returns the number of builtin curves that are available.
+.PP
+EC_GROUP_set_curve_GFp, EC_GROUP_get_curve_GFp, EC_GROUP_set_curve_GF2m, EC_GROUP_get_curve_GF2m return 1 on success or 0 on error.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_copy\fR\|(3),
+\&\fIEC_POINT_new\fR\|(3), \fIEC_POINT_add\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3), \fId2i_ECPKParameters\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/EC_GROUP_new.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EC_KEY_new.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EC_KEY_new.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EC_KEY_new.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,236 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EC_KEY_new 3"
+.TH EC_KEY_new 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EC_KEY_new, EC_KEY_get_flags, EC_KEY_set_flags, EC_KEY_clear_flags, EC_KEY_new_by_curve_name, EC_KEY_free, EC_KEY_copy, EC_KEY_dup, EC_KEY_up_ref, EC_KEY_get0_group, EC_KEY_set_group, EC_KEY_get0_private_key, EC_KEY_set_private_key, EC_KEY_get0_public_key, EC_KEY_set_public_key, EC_KEY_get_enc_flags, EC_KEY_set_enc_flags, EC_KEY_get_conv_form, EC_KEY_set_conv_form, EC_KEY_get_key_method_data, EC_KEY_insert_key_method_data, EC_KEY_set_asn1_flag, EC_KEY_precompute_mult, EC_KEY_generate_key, EC_KEY_check_key, EC_KEY_set_public_key_affine_coordinates \- Functions for creating, destroying and manipulating EC_KEY objects.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\& #include <openssl/ec.h>
+\& #include <openssl/bn.h>
+\&
+\& EC_KEY *EC_KEY_new(void);
+\& int EC_KEY_get_flags(const EC_KEY *key);
+\& void EC_KEY_set_flags(EC_KEY *key, int flags);
+\& void EC_KEY_clear_flags(EC_KEY *key, int flags);
+\& EC_KEY *EC_KEY_new_by_curve_name(int nid);
+\& void EC_KEY_free(EC_KEY *key);
+\& EC_KEY *EC_KEY_copy(EC_KEY *dst, const EC_KEY *src);
+\& EC_KEY *EC_KEY_dup(const EC_KEY *src);
+\& int EC_KEY_up_ref(EC_KEY *key);
+\& const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key);
+\& int EC_KEY_set_group(EC_KEY *key, const EC_GROUP *group);
+\& const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key);
+\& int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *prv);
+\& const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key);
+\& int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub);
+\& point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key);
+\& void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform);
+\& void *EC_KEY_get_key_method_data(EC_KEY *key,
+\& void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+\& void EC_KEY_insert_key_method_data(EC_KEY *key, void *data,
+\& void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+\& void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+\& int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx);
+\& int EC_KEY_generate_key(EC_KEY *key);
+\& int EC_KEY_check_key(const EC_KEY *key);
+\& int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+An \s-1EC_KEY\s0 represents a public key and (optionaly) an associated private key. A new \s-1EC_KEY\s0 (with no associated curve) can be constructed by calling EC_KEY_new.
+The reference count for the newly created \s-1EC_KEY\s0 is initially set to 1. A curve can be associated with the \s-1EC_KEY\s0 by calling
+EC_KEY_set_group.
+.PP
+Alternatively a new \s-1EC_KEY\s0 can be constructed by calling EC_KEY_new_by_curve_name and supplying the nid of the associated curve. Refer to \fIEC_GROUP_new\fR\|(3) for a description of curve names. This function simply wraps calls to EC_KEY_new and
+EC_GROUP_new_by_curve_name.
+.PP
+Calling EC_KEY_free decrements the reference count for the \s-1EC_KEY\s0 object, and if it has dropped to zero then frees the memory associated
+with it.
+.PP
+EC_KEY_copy copies the contents of the \s-1EC_KEY\s0 in \fBsrc\fR into \fBdest\fR.
+.PP
+EC_KEY_dup creates a new \s-1EC_KEY\s0 object and copies \fBec_key\fR into it.
+.PP
+EC_KEY_up_ref increments the reference count associated with the \s-1EC_KEY\s0 object.
+.PP
+EC_KEY_generate_key generates a new public and private key for the supplied \fBeckey\fR object. \fBeckey\fR must have an \s-1EC_GROUP\s0 object
+associated with it before calling this function. The private key is a random integer (0 < priv_key < order, where order is the order
+of the \s-1EC_GROUP\s0 object). The public key is an \s-1EC_POINT\s0 on the curve calculated by multiplying the generator for the curve by the
+private key.
+.PP
+EC_KEY_check_key performs various sanity checks on the \s-1EC_KEY\s0 object to confirm that it is valid.
+.PP
+EC_KEY_set_public_key_affine_coordinates sets the public key for \fBkey\fR based on its affine co-ordinates, i.e. it constructs an \s-1EC_POINT\s0
+object based on the supplied \fBx\fR and \fBy\fR values and sets the public key to be this \s-1EC_POINT.\s0 It will also performs certain sanity checks
+on the key to confirm that it is valid.
+.PP
+The functions EC_KEY_get0_group, EC_KEY_set_group, EC_KEY_get0_private_key, EC_KEY_set_private_key, EC_KEY_get0_public_key, and EC_KEY_set_public_key get and set the \s-1EC_GROUP\s0 object, the private key and the \s-1EC_POINT\s0 public key for the \fBkey\fR respectively.
+.PP
+The functions EC_KEY_get_conv_form and EC_KEY_set_conv_form get and set the point_conversion_form for the \fBkey\fR. For a description
+of point_conversion_forms please refer to \fIEC_POINT_new\fR\|(3).
+.PP
+EC_KEY_insert_key_method_data and EC_KEY_get_key_method_data enable the caller to associate arbitrary additional data specific to the
+elliptic curve scheme being used with the \s-1EC_KEY\s0 object. This data is treated as a \*(L"black box\*(R" by the ec library. The data to be stored by EC_KEY_insert_key_method_data is provided in the \fBdata\fR parameter, which must have associated functions for duplicating, freeing and \*(L"clear_freeing\*(R" the data item. If a subsequent EC_KEY_get_key_method_data call is issued, the functions for duplicating, freeing and \*(L"clear_freeing\*(R" the data item must be provided again, and they must be the same as they were when the data item was inserted.
+.PP
+EC_KEY_set_flags sets the flags in the \fBflags\fR parameter on the \s-1EC_KEY\s0 object. Any flags that are already set are left set. The currently defined standard flags are \s-1EC_FLAG_NON_FIPS_ALLOW\s0 and \s-1EC_FLAG_FIPS_CHECKED.\s0 In addition there is the flag \s-1EC_FLAG_COFACTOR_ECDH\s0 which is specific to \s-1ECDH\s0 and is defined in ecdh.h. EC_KEY_get_flags returns the current flags that are set for this \s-1EC_KEY.\s0 EC_KEY_clear_flags clears the flags indicated by the \fBflags\fR parameter. All other flags are left in their existing state.
+.PP
+EC_KEY_set_asn1_flag sets the asn1_flag on the underlying \s-1EC_GROUP\s0 object (if set). Refer to \fIEC_GROUP_copy\fR\|(3) for further information on the asn1_flag.
+.PP
+EC_KEY_precompute_mult stores multiples of the underlying \s-1EC_GROUP\s0 generator for faster point multiplication. See also \fIEC_POINT_add\fR\|(3).
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+EC_KEY_new, EC_KEY_new_by_curve_name and EC_KEY_dup return a pointer to the newly created \s-1EC_KEY\s0 object, or \s-1NULL\s0 on error.
+.PP
+EC_KEY_get_flags returns the flags associated with the \s-1EC_KEY\s0 object as an integer.
+.PP
+EC_KEY_copy returns a pointer to the destination key, or \s-1NULL\s0 on error.
+.PP
+EC_KEY_up_ref, EC_KEY_set_group, EC_KEY_set_private_key, EC_KEY_set_public_key, EC_KEY_precompute_mult, EC_KEY_generate_key, EC_KEY_check_key and EC_KEY_set_public_key_affine_coordinates return 1 on success or 0 on error.
+.PP
+EC_KEY_get0_group returns the \s-1EC_GROUP\s0 associated with the \s-1EC_KEY.\s0
+.PP
+EC_KEY_get0_private_key returns the private key associated with the \s-1EC_KEY.\s0
+.PP
+EC_KEY_get_conv_form return the point_conversion_form for the \s-1EC_KEY.\s0
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_new\fR\|(3),
+\&\fIEC_GROUP_copy\fR\|(3), \fIEC_POINT_new\fR\|(3),
+\&\fIEC_POINT_add\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3),
+\&\fId2i_ECPKParameters\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/EC_KEY_new.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EC_POINT_add.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EC_POINT_add.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EC_POINT_add.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,200 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EC_POINT_add 3"
+.TH EC_POINT_add 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EC_POINT_add, EC_POINT_dbl, EC_POINT_invert, EC_POINT_is_at_infinity, EC_POINT_is_on_curve, EC_POINT_cmp, EC_POINT_make_affine, EC_POINTs_make_affine, EC_POINTs_mul, EC_POINT_mul, EC_GROUP_precompute_mult, EC_GROUP_have_precompute_mult \- Functions for performing mathematical operations and tests on EC_POINT objects.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\& #include <openssl/ec.h>
+\& #include <openssl/bn.h>
+\&
+\& int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+\& int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx);
+\& int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx);
+\& int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *p);
+\& int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx);
+\& int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+\& int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx);
+\& int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx);
+\& int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, size_t num, const EC_POINT *p[], const BIGNUM *m[], BN_CTX *ctx);
+\& int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, const EC_POINT *q, const BIGNUM *m, BN_CTX *ctx);
+\& int EC_GROUP_precompute_mult(EC_GROUP *group, BN_CTX *ctx);
+\& int EC_GROUP_have_precompute_mult(const EC_GROUP *group);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+EC_POINT_add adds the two points \fBa\fR and \fBb\fR and places the result in \fBr\fR. Similarly EC_POINT_dbl doubles the point \fBa\fR and places the
+result in \fBr\fR. In both cases it is valid for \fBr\fR to be one of \fBa\fR or \fBb\fR.
+.PP
+EC_POINT_invert calculates the inverse of the supplied point \fBa\fR. The result is placed back in \fBa\fR.
+.PP
+The function EC_POINT_is_at_infinity tests whether the supplied point is at infinity or not.
+.PP
+EC_POINT_is_on_curve tests whether the supplied point is on the curve or not.
+.PP
+EC_POINT_cmp compares the two supplied points and tests whether or not they are equal.
+.PP
+The functions EC_POINT_make_affine and EC_POINTs_make_affine force the internal representation of the \s-1EC_POINT\s0(s) into the affine
+co-ordinate system. In the case of EC_POINTs_make_affine the value \fBnum\fR provides the number of points in the array \fBpoints\fR to be
+forced.
+.PP
+EC_POINT_mul calculates the value generator * \fBn\fR + \fBq\fR * \fBm\fR and stores the result in \fBr\fR. The value \fBn\fR may be \s-1NULL\s0 in which case the result is just \fBq\fR * \fBm\fR.
+.PP
+EC_POINTs_mul calculates the value generator * \fBn\fR + \fBq[0]\fR * \fBm[0]\fR + ... + \fBq[num\-1]\fR * \fBm[num\-1]\fR. As for EC_POINT_mul the value
+\&\fBn\fR may be \s-1NULL.\s0
+.PP
+The function EC_GROUP_precompute_mult stores multiples of the generator for faster point multiplication, whilst
+EC_GROUP_have_precompute_mult tests whether precomputation has already been done. See \fIEC_GROUP_copy\fR\|(3) for information
+about the generator.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+The following functions return 1 on success or 0 on error: EC_POINT_add, EC_POINT_dbl, EC_POINT_invert, EC_POINT_make_affine,
+EC_POINTs_make_affine, EC_POINTs_make_affine, EC_POINT_mul, EC_POINTs_mul and EC_GROUP_precompute_mult.
+.PP
+EC_POINT_is_at_infinity returns 1 if the point is at infinity, or 0 otherwise.
+.PP
+EC_POINT_is_on_curve returns 1 if the point is on the curve, 0 if not, or \-1 on error.
+.PP
+EC_POINT_cmp returns 1 if the points are not equal, 0 if they are, or \-1 on error.
+.PP
+EC_GROUP_have_precompute_mult return 1 if a precomputation has been done, or 0 if not.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_new\fR\|(3), \fIEC_GROUP_copy\fR\|(3),
+\&\fIEC_POINT_new\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3), \fId2i_ECPKParameters\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/EC_POINT_add.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EC_POINT_new.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EC_POINT_new.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EC_POINT_new.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,257 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EC_POINT_new 3"
+.TH EC_POINT_new 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EC_POINT_new, EC_POINT_free, EC_POINT_clear_free, EC_POINT_copy, EC_POINT_dup, EC_POINT_method_of, EC_POINT_set_to_infinity, EC_POINT_set_Jprojective_coordinates, EC_POINT_get_Jprojective_coordinates_GFp, EC_POINT_set_affine_coordinates_GFp, EC_POINT_get_affine_coordinates_GFp, EC_POINT_set_compressed_coordinates_GFp, EC_POINT_set_affine_coordinates_GF2m, EC_POINT_get_affine_coordinates_GF2m, EC_POINT_set_compressed_coordinates_GF2m, EC_POINT_point2oct, EC_POINT_oct2point, EC_POINT_point2bn, EC_POINT_bn2point, EC_POINT_point2hex, EC_POINT_hex2point \- Functions for creating, destroying and manipulating EC_POINT objects.
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\& #include <openssl/ec.h>
+\& #include <openssl/bn.h>
+\&
+\& EC_POINT *EC_POINT_new(const EC_GROUP *group);
+\& void EC_POINT_free(EC_POINT *point);
+\& void EC_POINT_clear_free(EC_POINT *point);
+\& int EC_POINT_copy(EC_POINT *dst, const EC_POINT *src);
+\& EC_POINT *EC_POINT_dup(const EC_POINT *src, const EC_GROUP *group);
+\& const EC_METHOD *EC_POINT_method_of(const EC_POINT *point);
+\& int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point);
+\& int EC_POINT_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, const BIGNUM *y, const BIGNUM *z, BN_CTX *ctx);
+\& int EC_POINT_get_Jprojective_coordinates_GFp(const EC_GROUP *group,
+\& const EC_POINT *p, BIGNUM *x, BIGNUM *y, BIGNUM *z, BN_CTX *ctx);
+\& int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
+\& const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, int y_bit, BN_CTX *ctx);
+\& int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group,
+\& const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, int y_bit, BN_CTX *ctx);
+\& size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *p,
+\& point_conversion_form_t form,
+\& unsigned char *buf, size_t len, BN_CTX *ctx);
+\& int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *p,
+\& const unsigned char *buf, size_t len, BN_CTX *ctx);
+\& BIGNUM *EC_POINT_point2bn(const EC_GROUP *, const EC_POINT *,
+\& point_conversion_form_t form, BIGNUM *, BN_CTX *);
+\& EC_POINT *EC_POINT_bn2point(const EC_GROUP *, const BIGNUM *,
+\& EC_POINT *, BN_CTX *);
+\& char *EC_POINT_point2hex(const EC_GROUP *, const EC_POINT *,
+\& point_conversion_form_t form, BN_CTX *);
+\& EC_POINT *EC_POINT_hex2point(const EC_GROUP *, const char *,
+\& EC_POINT *, BN_CTX *);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+An \s-1EC_POINT\s0 represents a point on a curve. A new point is constructed by calling the function EC_POINT_new and providing the \fBgroup\fR
+object that the point relates to.
+.PP
+EC_POINT_free frees the memory associated with the \s-1EC_POINT.\s0
+.PP
+EC_POINT_clear_free destroys any sensitive data held within the \s-1EC_POINT\s0 and then frees its memory.
+.PP
+EC_POINT_copy copies the point \fBsrc\fR into \fBdst\fR. Both \fBsrc\fR and \fBdst\fR must use the same \s-1EC_METHOD.\s0
+.PP
+EC_POINT_dup creates a new \s-1EC_POINT\s0 object and copies the content from \fBsrc\fR to the newly created
+\&\s-1EC_POINT\s0 object.
+.PP
+EC_POINT_method_of obtains the \s-1EC_METHOD\s0 associated with \fBpoint\fR.
+.PP
+A valid point on a curve is the special point at infinity. A point is set to be at infinity by calling EC_POINT_set_to_infinity.
+.PP
+The affine co-ordinates for a point describe a point in terms of its x and y position. The functions
+EC_POINT_set_affine_coordinates_GFp and EC_POINT_set_affine_coordinates_GF2m set the \fBx\fR and \fBy\fR co-ordinates for the point
+\&\fBp\fR defined over the curve given in \fBgroup\fR.
+.PP
+As well as the affine co-ordinates, a point can alternatively be described in terms of its Jacobian
+projective co-ordinates (for Fp curves only). Jacobian projective co-ordinates are expressed as three values x, y and z. Working in
+this co-ordinate system provides more efficient point multiplication operations.
+A mapping exists between Jacobian projective co-ordinates and affine co-ordinates. A Jacobian projective co-ordinate (x, y, z) can be written as an affine co-ordinate as (x/(z^2), y/(z^3)). Conversion to Jacobian projective to affine co-ordinates is simple. The co-ordinate (x, y) is
+mapped to (x, y, 1). To set or get the projective co-ordinates use EC_POINT_set_Jprojective_coordinates_GFp and
+EC_POINT_get_Jprojective_coordinates_GFp respectively.
+.PP
+Points can also be described in terms of their compressed co-ordinates. For a point (x, y), for any given value for x such that the point is
+on the curve there will only ever be two possible values for y. Therefore a point can be set using the EC_POINT_set_compressed_coordinates_GFp
+and EC_POINT_set_compressed_coordinates_GF2m functions where \fBx\fR is the x co-ordinate and \fBy_bit\fR is a value 0 or 1 to identify which of
+the two possible values for y should be used.
+.PP
+In addition EC_POINTs can be converted to and from various external
+representations. Supported representations are octet strings, BIGNUMs and
+hexadecimal. Octet strings are stored in a buffer along with an associated
+buffer length. A point held in a \s-1BIGNUM\s0 is calculated by converting the point to
+an octet string and then converting that octet string into a \s-1BIGNUM\s0 integer.
+Points in hexadecimal format are stored in a \s-1NULL\s0 terminated character string
+where each character is one of the printable values 0\-9 or A\-F (or a\-f).
+.PP
+The functions EC_POINT_point2oct, EC_POINT_oct2point, EC_POINT_point2bn, EC_POINT_bn2point, EC_POINT_point2hex and EC_POINT_hex2point convert
+from and to EC_POINTs for the formats: octet string, \s-1BIGNUM\s0 and hexadecimal respectively.
+.PP
+The function EC_POINT_point2oct must be supplied with a buffer long enough to store the octet string. The return value provides the number of
+octets stored. Calling the function with a \s-1NULL\s0 buffer will not perform the conversion but will still return the required buffer length.
+.PP
+The function EC_POINT_point2hex will allocate sufficient memory to store the hexadecimal string. It is the caller's responsibility to free
+this memory with a subsequent call to \fIOPENSSL_free()\fR.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+EC_POINT_new and EC_POINT_dup return the newly allocated \s-1EC_POINT\s0 or \s-1NULL\s0 on error.
+.PP
+The following functions return 1 on success or 0 on error: EC_POINT_copy, EC_POINT_set_to_infinity, EC_POINT_set_Jprojective_coordinates_GFp,
+EC_POINT_get_Jprojective_coordinates_GFp, EC_POINT_set_affine_coordinates_GFp, EC_POINT_get_affine_coordinates_GFp,
+EC_POINT_set_compressed_coordinates_GFp, EC_POINT_set_affine_coordinates_GF2m, EC_POINT_get_affine_coordinates_GF2m,
+EC_POINT_set_compressed_coordinates_GF2m and EC_POINT_oct2point.
+.PP
+EC_POINT_method_of returns the \s-1EC_METHOD\s0 associated with the supplied \s-1EC_POINT.\s0
+.PP
+EC_POINT_point2oct returns the length of the required buffer, or 0 on error.
+.PP
+EC_POINT_point2bn returns the pointer to the \s-1BIGNUM\s0 supplied, or \s-1NULL\s0 on error.
+.PP
+EC_POINT_bn2point returns the pointer to the \s-1EC_POINT\s0 supplied, or \s-1NULL\s0 on error.
+.PP
+EC_POINT_point2hex returns a pointer to the hex string, or \s-1NULL\s0 on error.
+.PP
+EC_POINT_hex2point returns the pointer to the \s-1EC_POINT\s0 supplied, or \s-1NULL\s0 on error.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_new\fR\|(3), \fIEC_GROUP_copy\fR\|(3),
+\&\fIEC_POINT_add\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3), \fId2i_ECPKParameters\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/EC_POINT_new.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/EVP_PKEY_meth_new.3
===================================================================
--- trunk/secure/lib/libcrypto/man/EVP_PKEY_meth_new.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/EVP_PKEY_meth_new.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,532 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "EVP_PKEY_meth_new 3"
+.TH EVP_PKEY_meth_new 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+EVP_PKEY_meth_new, EVP_PKEY_meth_free, EVP_PKEY_meth_copy, EVP_PKEY_meth_find,
+EVP_PKEY_meth_add0, EVP_PKEY_METHOD,
+EVP_PKEY_meth_set_init, EVP_PKEY_meth_set_copy, EVP_PKEY_meth_set_cleanup,
+EVP_PKEY_meth_set_paramgen, EVP_PKEY_meth_set_keygen, EVP_PKEY_meth_set_sign,
+EVP_PKEY_meth_set_verify, EVP_PKEY_meth_set_verify_recover, EVP_PKEY_meth_set_signctx,
+EVP_PKEY_meth_set_verifyctx, EVP_PKEY_meth_set_encrypt, EVP_PKEY_meth_set_decrypt,
+EVP_PKEY_meth_set_derive, EVP_PKEY_meth_set_ctrl,
+EVP_PKEY_meth_get_init, EVP_PKEY_meth_get_copy, EVP_PKEY_meth_get_cleanup,
+EVP_PKEY_meth_get_paramgen, EVP_PKEY_meth_get_keygen, EVP_PKEY_meth_get_sign,
+EVP_PKEY_meth_get_verify, EVP_PKEY_meth_get_verify_recover, EVP_PKEY_meth_get_signctx,
+EVP_PKEY_meth_get_verifyctx, EVP_PKEY_meth_get_encrypt, EVP_PKEY_meth_get_decrypt,
+EVP_PKEY_meth_get_derive, EVP_PKEY_meth_get_ctrl
+\&\- manipulating EVP_PKEY_METHOD structure
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& #include <openssl/evp.h>
+\&
+\& typedef struct evp_pkey_method_st EVP_PKEY_METHOD;
+\&
+\& EVP_PKEY_METHOD *EVP_PKEY_meth_new(int id, int flags);
+\& void EVP_PKEY_meth_free(EVP_PKEY_METHOD *pmeth);
+\& void EVP_PKEY_meth_copy(EVP_PKEY_METHOD *dst, const EVP_PKEY_METHOD *src);
+\& const EVP_PKEY_METHOD *EVP_PKEY_meth_find(int type);
+\& int EVP_PKEY_meth_add0(const EVP_PKEY_METHOD *pmeth);
+\&
+\& void EVP_PKEY_meth_set_init(EVP_PKEY_METHOD *pmeth,
+\& int (*init) (EVP_PKEY_CTX *ctx));
+\& void EVP_PKEY_meth_set_copy(EVP_PKEY_METHOD *pmeth,
+\& int (*copy) (EVP_PKEY_CTX *dst,
+\& EVP_PKEY_CTX *src));
+\& void EVP_PKEY_meth_set_cleanup(EVP_PKEY_METHOD *pmeth,
+\& void (*cleanup) (EVP_PKEY_CTX *ctx));
+\& void EVP_PKEY_meth_set_paramgen(EVP_PKEY_METHOD *pmeth,
+\& int (*paramgen_init) (EVP_PKEY_CTX *ctx),
+\& int (*paramgen) (EVP_PKEY_CTX *ctx,
+\& EVP_PKEY *pkey));
+\& void EVP_PKEY_meth_set_keygen(EVP_PKEY_METHOD *pmeth,
+\& int (*keygen_init) (EVP_PKEY_CTX *ctx),
+\& int (*keygen) (EVP_PKEY_CTX *ctx,
+\& EVP_PKEY *pkey));
+\& void EVP_PKEY_meth_set_sign(EVP_PKEY_METHOD *pmeth,
+\& int (*sign_init) (EVP_PKEY_CTX *ctx),
+\& int (*sign) (EVP_PKEY_CTX *ctx,
+\& unsigned char *sig, size_t *siglen,
+\& const unsigned char *tbs,
+\& size_t tbslen));
+\& void EVP_PKEY_meth_set_verify(EVP_PKEY_METHOD *pmeth,
+\& int (*verify_init) (EVP_PKEY_CTX *ctx),
+\& int (*verify) (EVP_PKEY_CTX *ctx,
+\& const unsigned char *sig,
+\& size_t siglen,
+\& const unsigned char *tbs,
+\& size_t tbslen));
+\& void EVP_PKEY_meth_set_verify_recover(EVP_PKEY_METHOD *pmeth,
+\& int (*verify_recover_init) (EVP_PKEY_CTX
+\& *ctx),
+\& int (*verify_recover) (EVP_PKEY_CTX
+\& *ctx,
+\& unsigned char
+\& *sig,
+\& size_t *siglen,
+\& const unsigned
+\& char *tbs,
+\& size_t tbslen));
+\& void EVP_PKEY_meth_set_signctx(EVP_PKEY_METHOD *pmeth,
+\& int (*signctx_init) (EVP_PKEY_CTX *ctx,
+\& EVP_MD_CTX *mctx),
+\& int (*signctx) (EVP_PKEY_CTX *ctx,
+\& unsigned char *sig,
+\& size_t *siglen,
+\& EVP_MD_CTX *mctx));
+\& void EVP_PKEY_meth_set_verifyctx(EVP_PKEY_METHOD *pmeth,
+\& int (*verifyctx_init) (EVP_PKEY_CTX *ctx,
+\& EVP_MD_CTX *mctx),
+\& int (*verifyctx) (EVP_PKEY_CTX *ctx,
+\& const unsigned char *sig,
+\& int siglen,
+\& EVP_MD_CTX *mctx));
+\& void EVP_PKEY_meth_set_encrypt(EVP_PKEY_METHOD *pmeth,
+\& int (*encrypt_init) (EVP_PKEY_CTX *ctx),
+\& int (*encryptfn) (EVP_PKEY_CTX *ctx,
+\& unsigned char *out,
+\& size_t *outlen,
+\& const unsigned char *in,
+\& size_t inlen));
+\& void EVP_PKEY_meth_set_decrypt(EVP_PKEY_METHOD *pmeth,
+\& int (*decrypt_init) (EVP_PKEY_CTX *ctx),
+\& int (*decrypt) (EVP_PKEY_CTX *ctx,
+\& unsigned char *out,
+\& size_t *outlen,
+\& const unsigned char *in,
+\& size_t inlen));
+\& void EVP_PKEY_meth_set_derive(EVP_PKEY_METHOD *pmeth,
+\& int (*derive_init) (EVP_PKEY_CTX *ctx),
+\& int (*derive) (EVP_PKEY_CTX *ctx,
+\& unsigned char *key,
+\& size_t *keylen));
+\& void EVP_PKEY_meth_set_ctrl(EVP_PKEY_METHOD *pmeth,
+\& int (*ctrl) (EVP_PKEY_CTX *ctx, int type, int p1,
+\& void *p2),
+\& int (*ctrl_str) (EVP_PKEY_CTX *ctx,
+\& const char *type,
+\& const char *value));
+\&
+\& void EVP_PKEY_meth_get_init(EVP_PKEY_METHOD *pmeth,
+\& int (**pinit) (EVP_PKEY_CTX *ctx));
+\& void EVP_PKEY_meth_get_copy(EVP_PKEY_METHOD *pmeth,
+\& int (**pcopy) (EVP_PKEY_CTX *dst,
+\& EVP_PKEY_CTX *src));
+\& void EVP_PKEY_meth_get_cleanup(EVP_PKEY_METHOD *pmeth,
+\& void (**pcleanup) (EVP_PKEY_CTX *ctx));
+\& void EVP_PKEY_meth_get_paramgen(EVP_PKEY_METHOD *pmeth,
+\& int (**pparamgen_init) (EVP_PKEY_CTX *ctx),
+\& int (**pparamgen) (EVP_PKEY_CTX *ctx,
+\& EVP_PKEY *pkey));
+\& void EVP_PKEY_meth_get_keygen(EVP_PKEY_METHOD *pmeth,
+\& int (**pkeygen_init) (EVP_PKEY_CTX *ctx),
+\& int (**pkeygen) (EVP_PKEY_CTX *ctx,
+\& EVP_PKEY *pkey));
+\& void EVP_PKEY_meth_get_sign(EVP_PKEY_METHOD *pmeth,
+\& int (**psign_init) (EVP_PKEY_CTX *ctx),
+\& int (**psign) (EVP_PKEY_CTX *ctx,
+\& unsigned char *sig, size_t *siglen,
+\& const unsigned char *tbs,
+\& size_t tbslen));
+\& void EVP_PKEY_meth_get_verify(EVP_PKEY_METHOD *pmeth,
+\& int (**pverify_init) (EVP_PKEY_CTX *ctx),
+\& int (**pverify) (EVP_PKEY_CTX *ctx,
+\& const unsigned char *sig,
+\& size_t siglen,
+\& const unsigned char *tbs,
+\& size_t tbslen));
+\& void EVP_PKEY_meth_get_verify_recover(EVP_PKEY_METHOD *pmeth,
+\& int (**pverify_recover_init) (EVP_PKEY_CTX
+\& *ctx),
+\& int (**pverify_recover) (EVP_PKEY_CTX
+\& *ctx,
+\& unsigned char
+\& *sig,
+\& size_t *siglen,
+\& const unsigned
+\& char *tbs,
+\& size_t tbslen));
+\& void EVP_PKEY_meth_get_signctx(EVP_PKEY_METHOD *pmeth,
+\& int (**psignctx_init) (EVP_PKEY_CTX *ctx,
+\& EVP_MD_CTX *mctx),
+\& int (**psignctx) (EVP_PKEY_CTX *ctx,
+\& unsigned char *sig,
+\& size_t *siglen,
+\& EVP_MD_CTX *mctx));
+\& void EVP_PKEY_meth_get_verifyctx(EVP_PKEY_METHOD *pmeth,
+\& int (**pverifyctx_init) (EVP_PKEY_CTX *ctx,
+\& EVP_MD_CTX *mctx),
+\& int (**pverifyctx) (EVP_PKEY_CTX *ctx,
+\& const unsigned char *sig,
+\& int siglen,
+\& EVP_MD_CTX *mctx));
+\& void EVP_PKEY_meth_get_encrypt(EVP_PKEY_METHOD *pmeth,
+\& int (**pencrypt_init) (EVP_PKEY_CTX *ctx),
+\& int (**pencryptfn) (EVP_PKEY_CTX *ctx,
+\& unsigned char *out,
+\& size_t *outlen,
+\& const unsigned char *in,
+\& size_t inlen));
+\& void EVP_PKEY_meth_get_decrypt(EVP_PKEY_METHOD *pmeth,
+\& int (**pdecrypt_init) (EVP_PKEY_CTX *ctx),
+\& int (**pdecrypt) (EVP_PKEY_CTX *ctx,
+\& unsigned char *out,
+\& size_t *outlen,
+\& const unsigned char *in,
+\& size_t inlen));
+\& void EVP_PKEY_meth_get_derive(EVP_PKEY_METHOD *pmeth,
+\& int (**pderive_init) (EVP_PKEY_CTX *ctx),
+\& int (**pderive) (EVP_PKEY_CTX *ctx,
+\& unsigned char *key,
+\& size_t *keylen));
+\& void EVP_PKEY_meth_get_ctrl(EVP_PKEY_METHOD *pmeth,
+\& int (**pctrl) (EVP_PKEY_CTX *ctx, int type, int p1,
+\& void *p2),
+\& int (**pctrl_str) (EVP_PKEY_CTX *ctx,
+\& const char *type,
+\& const char *value));
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+\&\fB\s-1EVP_PKEY_METHOD\s0\fR is a structure which holds a set of methods for a
+specific public key cryptographic algorithm. Those methods are usually
+used to perform different jobs, such as generating a key, signing or
+verifying, encrypting or decrypting, etc.
+.PP
+There are two places where the \fB\s-1EVP_PKEY_METHOD\s0\fR objects are stored: one
+is a built-in static array representing the standard methods for different
+algorithms, and the other one is a stack of user-defined application-specific
+methods, which can be manipulated by using \fIEVP_PKEY_meth_add0\fR\|(3).
+.PP
+The \fB\s-1EVP_PKEY_METHOD\s0\fR objects are usually referenced by \fB\s-1EVP_PKEY_CTX\s0\fR
+objects.
+.SS "Methods"
+.IX Subsection "Methods"
+The methods are the underlying implementations of a particular public key
+algorithm present by the \fB\s-1EVP_PKEY_CTX\s0\fR object.
+.PP
+.Vb 3
+\& int (*init) (EVP_PKEY_CTX *ctx);
+\& int (*copy) (EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src);
+\& void (*cleanup) (EVP_PKEY_CTX *ctx);
+.Ve
+.PP
+The \fIinit()\fR method is called to initialize algorithm-specific data when a new
+\&\fB\s-1EVP_PKEY_CTX\s0\fR is created. As opposed to \fIinit()\fR, the \fIcleanup()\fR method is called
+when an \fB\s-1EVP_PKEY_CTX\s0\fR is freed. The \fIcopy()\fR method is called when an \fB\s-1EVP_PKEY_CTX\s0\fR
+is being duplicated. Refer to \fIEVP_PKEY_CTX_new\fR\|(3), \fIEVP_PKEY_CTX_new_id\fR\|(3),
+\&\fIEVP_PKEY_CTX_free\fR\|(3) and \fIEVP_PKEY_CTX_dup\fR\|(3).
+.PP
+.Vb 2
+\& int (*paramgen_init) (EVP_PKEY_CTX *ctx);
+\& int (*paramgen) (EVP_PKEY_CTX *ctx, EVP_PKEY *pkey);
+.Ve
+.PP
+The \fIparamgen_init()\fR and \fIparamgen()\fR methods deal with key parameter generation.
+They are called by \fIEVP_PKEY_paramgen_init\fR\|(3) and \fIEVP_PKEY_paramgen\fR\|(3) to
+handle the parameter generation process.
+.PP
+.Vb 2
+\& int (*keygen_init) (EVP_PKEY_CTX *ctx);
+\& int (*keygen) (EVP_PKEY_CTX *ctx, EVP_PKEY *pkey);
+.Ve
+.PP
+The \fIkeygen_init()\fR and \fIkeygen()\fR methods are used to generate the actual key for
+the specified algorithm. They are called by \fIEVP_PKEY_keygen_init\fR\|(3) and
+\&\fIEVP_PKEY_keygen\fR\|(3).
+.PP
+.Vb 3
+\& int (*sign_init) (EVP_PKEY_CTX *ctx);
+\& int (*sign) (EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
+\& const unsigned char *tbs, size_t tbslen);
+.Ve
+.PP
+The \fIsign_init()\fR and \fIsign()\fR methods are used to generate the signature of a
+piece of data using a private key. They are called by \fIEVP_PKEY_sign_init\fR\|(3)
+and \fIEVP_PKEY_sign\fR\|(3).
+.PP
+.Vb 4
+\& int (*verify_init) (EVP_PKEY_CTX *ctx);
+\& int (*verify) (EVP_PKEY_CTX *ctx,
+\& const unsigned char *sig, size_t siglen,
+\& const unsigned char *tbs, size_t tbslen);
+.Ve
+.PP
+The \fIverify_init()\fR and \fIverify()\fR methods are used to verify whether a signature is
+valid. They are called by \fIEVP_PKEY_verify_init\fR\|(3) and \fIEVP_PKEY_verify\fR\|(3).
+.PP
+.Vb 4
+\& int (*verify_recover_init) (EVP_PKEY_CTX *ctx);
+\& int (*verify_recover) (EVP_PKEY_CTX *ctx,
+\& unsigned char *rout, size_t *routlen,
+\& const unsigned char *sig, size_t siglen);
+.Ve
+.PP
+The \fIverify_recover_init()\fR and \fIverify_recover()\fR methods are used to verify a
+signature and then recover the digest from the signature (for instance, a
+signature that was generated by \s-1RSA\s0 signing algorithm). They are called by
+\&\fIEVP_PKEY_verify_recover_init\fR\|(3) and \fIEVP_PKEY_verify_recover\fR\|(3).
+.PP
+.Vb 3
+\& int (*signctx_init) (EVP_PKEY_CTX *ctx, EVP_MD_CTX *mctx);
+\& int (*signctx) (EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
+\& EVP_MD_CTX *mctx);
+.Ve
+.PP
+The \fIsignctx_init()\fR and \fIsignctx()\fR methods are used to sign a digest present by
+a \fB\s-1EVP_MD_CTX\s0\fR object. They are called by the EVP_DigestSign functions. See
+\&\fIEVP_DigestSignInit\fR\|(3) for detail.
+.PP
+.Vb 3
+\& int (*verifyctx_init) (EVP_PKEY_CTX *ctx, EVP_MD_CTX *mctx);
+\& int (*verifyctx) (EVP_PKEY_CTX *ctx, const unsigned char *sig, int siglen,
+\& EVP_MD_CTX *mctx);
+.Ve
+.PP
+The \fIverifyctx_init()\fR and \fIverifyctx()\fR methods are used to verify a signature
+against the data in a \fB\s-1EVP_MD_CTX\s0\fR object. They are called by the various
+EVP_DigestVerify functions. See \fIEVP_DigestVerifyInit\fR\|(3) for detail.
+.PP
+.Vb 3
+\& int (*encrypt_init) (EVP_PKEY_CTX *ctx);
+\& int (*encrypt) (EVP_PKEY_CTX *ctx, unsigned char *out, size_t *outlen,
+\& const unsigned char *in, size_t inlen);
+.Ve
+.PP
+The \fIencrypt_init()\fR and \fIencrypt()\fR methods are used to encrypt a piece of data.
+They are called by \fIEVP_PKEY_encrypt_init\fR\|(3) and \fIEVP_PKEY_encrypt\fR\|(3).
+.PP
+.Vb 3
+\& int (*decrypt_init) (EVP_PKEY_CTX *ctx);
+\& int (*decrypt) (EVP_PKEY_CTX *ctx, unsigned char *out, size_t *outlen,
+\& const unsigned char *in, size_t inlen);
+.Ve
+.PP
+The \fIdecrypt_init()\fR and \fIdecrypt()\fR methods are used to decrypt a piece of data.
+They are called by \fIEVP_PKEY_decrypt_init\fR\|(3) and \fIEVP_PKEY_decrypt\fR\|(3).
+.PP
+.Vb 2
+\& int (*derive_init) (EVP_PKEY_CTX *ctx);
+\& int (*derive) (EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen);
+.Ve
+.PP
+The \fIderive_init()\fR and \fIderive()\fR methods are used to derive the shared secret
+from a public key algorithm (for instance, the \s-1DH\s0 algorithm). They are called by
+\&\fIEVP_PKEY_derive_init\fR\|(3) and \fIEVP_PKEY_derive\fR\|(3).
+.PP
+.Vb 2
+\& int (*ctrl) (EVP_PKEY_CTX *ctx, int type, int p1, void *p2);
+\& int (*ctrl_str) (EVP_PKEY_CTX *ctx, const char *type, const char *value);
+.Ve
+.PP
+The \fIctrl()\fR and \fIctrl_str()\fR methods are used to adjust algorithm-specific
+settings. See \fIEVP_PKEY_CTX_ctrl\fR\|(3) and related functions for detail.
+.PP
+.Vb 5
+\& int (*digestsign) (EVP_MD_CTX *ctx, unsigned char *sig, size_t *siglen,
+\& const unsigned char *tbs, size_t tbslen);
+\& int (*digestverify) (EVP_MD_CTX *ctx, const unsigned char *sig,
+\& size_t siglen, const unsigned char *tbs,
+\& size_t tbslen);
+.Ve
+.PP
+The \fIdigestsign()\fR and \fIdigestverify()\fR methods are used to generate or verify
+a signature in a one-shot mode. They could be called by \fIEVP_DigetSign\fR\|(3)
+and \fIEVP_DigestVerify\fR\|(3).
+.SS "Functions"
+.IX Subsection "Functions"
+\&\fIEVP_PKEY_meth_new()\fR creates and returns a new \fB\s-1EVP_PKEY_METHOD\s0\fR object,
+and associates the given \fBid\fR and \fBflags\fR. The following flags are
+supported:
+.PP
+.Vb 2
+\& EVP_PKEY_FLAG_AUTOARGLEN
+\& EVP_PKEY_FLAG_SIGCTX_CUSTOM
+.Ve
+.PP
+If an \fB\s-1EVP_PKEY_METHOD\s0\fR is set with the \fB\s-1EVP_PKEY_FLAG_AUTOARGLEN\s0\fR flag, the
+maximum size of the output buffer will be automatically calculated or checked
+in corresponding \s-1EVP\s0 methods by the \s-1EVP\s0 framework. Thus the implementations of
+these methods don't need to care about handling the case of returning output
+buffer size by themselves. For details on the output buffer size, refer to
+\&\fIEVP_PKEY_sign\fR\|(3).
+.PP
+The \fB\s-1EVP_PKEY_FLAG_SIGCTX_CUSTOM\s0\fR is used to indicate the \fIsignctx()\fR method
+of an \fB\s-1EVP_PKEY_METHOD\s0\fR is always called by the \s-1EVP\s0 framework while doing a
+digest signing operation by calling \fIEVP_DigestSignFinal\fR\|(3).
+.PP
+\&\fIEVP_PKEY_meth_free()\fR frees an existing \fB\s-1EVP_PKEY_METHOD\s0\fR pointed by
+\&\fBpmeth\fR.
+.PP
+\&\fIEVP_PKEY_meth_copy()\fR copies an \fB\s-1EVP_PKEY_METHOD\s0\fR object from \fBsrc\fR
+to \fBdst\fR.
+.PP
+\&\fIEVP_PKEY_meth_find()\fR finds an \fB\s-1EVP_PKEY_METHOD\s0\fR object with the \fBid\fR.
+This function first searches through the user-defined method objects and
+then the built-in objects.
+.PP
+\&\fIEVP_PKEY_meth_add0()\fR adds \fBpmeth\fR to the user defined stack of methods.
+.PP
+The EVP_PKEY_meth_set functions set the corresponding fields of
+\&\fB\s-1EVP_PKEY_METHOD\s0\fR structure with the arguments passed.
+.PP
+The EVP_PKEY_meth_get functions get the corresponding fields of
+\&\fB\s-1EVP_PKEY_METHOD\s0\fR structure to the arguments provided.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+\&\fIEVP_PKEY_meth_new()\fR returns a pointer to a new \fB\s-1EVP_PKEY_METHOD\s0\fR
+object or returns \s-1NULL\s0 on error.
+.PP
+\&\fIEVP_PKEY_meth_free()\fR and \fIEVP_PKEY_meth_copy()\fR do not return values.
+.PP
+\&\fIEVP_PKEY_meth_find()\fR returns a pointer to the found \fB\s-1EVP_PKEY_METHOD\s0\fR
+object or returns \s-1NULL\s0 if not found.
+.PP
+\&\fIEVP_PKEY_meth_add0()\fR returns 1 if method is added successfully or 0
+if an error occurred.
+.PP
+All EVP_PKEY_meth_set and EVP_PKEY_meth_get functions have no return
+values. For the 'get' functions, function pointers are returned by
+arguments.
+.SH "COPYRIGHT"
+.IX Header "COPYRIGHT"
+Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
+.PP
+Licensed under the OpenSSL license (the \*(L"License\*(R"). You may not use
+this file except in compliance with the License. You can obtain a copy
+in the file \s-1LICENSE\s0 in the source distribution or at
+<https://www.openssl.org/source/license.html>.
Property changes on: trunk/secure/lib/libcrypto/man/EVP_PKEY_meth_new.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/OPENSSL_instrument_bus.3
===================================================================
--- trunk/secure/lib/libcrypto/man/OPENSSL_instrument_bus.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/OPENSSL_instrument_bus.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,175 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "OPENSSL_instrument_bus 3"
+.TH OPENSSL_instrument_bus 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+OPENSSL_instrument_bus, OPENSSL_instrument_bus2 \- instrument references to memory bus
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 4
+\& #ifdef OPENSSL_CPUID_OBJ
+\& size_t OPENSSL_instrument_bus (int *vector,size_t num);
+\& size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max);
+\& #endif
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+It was empirically found that timings of references to primary memory
+are subject to irregular, apparently non-deterministic variations. The
+subroutines in question instrument these references for purposes of
+gathering entropy for random number generator. In order to make it
+bus-bound a 'flush cache line' instruction is used between probes. In
+addition probes are added to \fBvector\fR elements in atomic or
+interlocked manner, which should contribute additional noise on
+multi-processor systems. This also means that \fBvector[num]\fR should be
+zeroed upon invocation (if you want to retrieve actual probe values).
+.PP
+OPENSSL_instrument_bus performs \fBnum\fR probes and records the number of
+oscillator cycles every probe took.
+.PP
+OPENSSL_instrument_bus2 on the other hand \fBaccumulates\fR consecutive
+probes with the same value, i.e. in a way it records duration of
+periods when probe values appeared deterministic. The subroutine
+performs at most \fBmax\fR probes in attempt to fill the \fBvector[num]\fR,
+with \fBmax\fR value of 0 meaning \*(L"as many as it takes.\*(R"
+.SH "RETURN VALUE"
+.IX Header "RETURN VALUE"
+Return value of 0 indicates that \s-1CPU\s0 is not capable of performing the
+benchmark, either because oscillator counter or 'flush cache line' is
+not available on current platform. For reference, on x86 'flush cache
+line' was introduced with the \s-1SSE2\s0 extensions.
+.PP
+Otherwise number of recorded values is returned.
Property changes on: trunk/secure/lib/libcrypto/man/OPENSSL_instrument_bus.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/X509_check_host.3
===================================================================
--- trunk/secure/lib/libcrypto/man/X509_check_host.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/X509_check_host.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,266 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "X509_check_host 3"
+.TH X509_check_host 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+X509_check_host, X509_check_email, X509_check_ip, X509_check_ip_asc \- X.509 certificate matching
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& #include <openssl/x509.h>
+\&
+\& int X509_check_host(X509 *, const char *name, size_t namelen,
+\& unsigned int flags, char **peername);
+\& int X509_check_email(X509 *, const char *address, size_t addresslen,
+\& unsigned int flags);
+\& int X509_check_ip(X509 *, const unsigned char *address, size_t addresslen,
+\& unsigned int flags);
+\& int X509_check_ip_asc(X509 *, const char *address, unsigned int flags);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+The certificate matching functions are used to check whether a
+certificate matches a given host name, email address, or \s-1IP\s0 address.
+The validity of the certificate and its trust level has to be checked by
+other means.
+.PP
+\&\fIX509_check_host()\fR checks if the certificate Subject Alternative
+Name (\s-1SAN\s0) or Subject CommonName (\s-1CN\s0) matches the specified host
+name, which must be encoded in the preferred name syntax described
+in section 3.5 of \s-1RFC 1034.\s0 By default, wildcards are supported
+and they match only in the left-most label; but they may match
+part of that label with an explicit prefix or suffix. For example,
+by default, the host \fBname\fR \*(L"www.example.com\*(R" would match a
+certificate with a \s-1SAN\s0 or \s-1CN\s0 value of \*(L"*.example.com\*(R", \*(L"w*.example.com\*(R"
+or \*(L"*w.example.com\*(R".
+.PP
+Per section 6.4.2 of \s-1RFC 6125,\s0 \fBname\fR values representing international
+domain names must be given in A\-label form. The \fBnamelen\fR argument
+must be the number of characters in the name string or zero in which
+case the length is calculated with strlen(\fBname\fR). When \fBname\fR starts
+with a dot (e.g \*(L".example.com\*(R"), it will be matched by a certificate
+valid for any sub-domain of \fBname\fR, (see also
+\&\fBX509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS\fR below).
+.PP
+When the certificate is matched, and \fBpeername\fR is not \s-1NULL,\s0 a
+pointer to a copy of the matching \s-1SAN\s0 or \s-1CN\s0 from the peer certificate
+is stored at the address passed in \fBpeername\fR. The application
+is responsible for freeing the peername via \fIOPENSSL_free()\fR when it
+is no longer needed.
+.PP
+\&\fIX509_check_email()\fR checks if the certificate matches the specified
+email \fBaddress\fR. Only the mailbox syntax of \s-1RFC 822\s0 is supported,
+comments are not allowed, and no attempt is made to normalize quoted
+characters. The \fBaddresslen\fR argument must be the number of
+characters in the address string or zero in which case the length
+is calculated with strlen(\fBaddress\fR).
+.PP
+\&\fIX509_check_ip()\fR checks if the certificate matches a specified IPv4 or
+IPv6 address. The \fBaddress\fR array is in binary format, in network
+byte order. The length is either 4 (IPv4) or 16 (IPv6). Only
+explicitly marked addresses in the certificates are considered; \s-1IP\s0
+addresses stored in \s-1DNS\s0 names and Common Names are ignored.
+.PP
+\&\fIX509_check_ip_asc()\fR is similar, except that the NUL-terminated
+string \fBaddress\fR is first converted to the internal representation.
+.PP
+The \fBflags\fR argument is usually 0. It can be the bitwise \s-1OR\s0 of the
+flags:
+.IP "\fBX509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT\fR," 4
+.IX Item "X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT,"
+.PD 0
+.IP "\fBX509_CHECK_FLAG_NO_WILDCARDS\fR," 4
+.IX Item "X509_CHECK_FLAG_NO_WILDCARDS,"
+.IP "\fBX509_CHECK_FLAG_NO_PARTIAL_WILDCARDS\fR," 4
+.IX Item "X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS,"
+.IP "\fBX509_CHECK_FLAG_MULTI_LABEL_WILDCARDS\fR." 4
+.IX Item "X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS."
+.IP "\fBX509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS\fR." 4
+.IX Item "X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS."
+.PD
+.PP
+The \fBX509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT\fR flag causes the function
+to consider the subject \s-1DN\s0 even if the certificate contains at least
+one subject alternative name of the right type (\s-1DNS\s0 name or email
+address as appropriate); the default is to ignore the subject \s-1DN\s0
+when at least one corresponding subject alternative names is present.
+.PP
+If set, \fBX509_CHECK_FLAG_NO_WILDCARDS\fR disables wildcard
+expansion; this only applies to \fBX509_check_host\fR.
+.PP
+If set, \fBX509_CHECK_FLAG_NO_PARTIAL_WILDCARDS\fR suppresses support
+for \*(L"*\*(R" as wildcard pattern in labels that have a prefix or suffix,
+such as: \*(L"www*\*(R" or \*(L"*www\*(R"; this only aplies to \fBX509_check_host\fR.
+.PP
+If set, \fBX509_CHECK_FLAG_MULTI_LABEL_WILDCARDS\fR allows a \*(L"*\*(R" that
+constitutes the complete label of a \s-1DNS\s0 name (e.g. \*(L"*.example.com\*(R")
+to match more than one label in \fBname\fR; this flag only applies
+to \fBX509_check_host\fR.
+.PP
+If set, \fBX509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS\fR restricts \fBname\fR
+values which start with \*(L".\*(R", that would otherwise match any sub-domain
+in the peer certificate, to only match direct child sub-domains.
+Thus, for instance, with this flag set a \fBname\fR of \*(L".example.com\*(R"
+would match a peer certificate with a \s-1DNS\s0 name of \*(L"www.example.com\*(R",
+but would not match a peer certificate with a \s-1DNS\s0 name of
+\&\*(L"www.sub.example.com\*(R"; this flag only applies to \fBX509_check_host\fR.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+The functions return 1 for a successful match, 0 for a failed match
+and \-1 for an internal error: typically a memory allocation failure
+or an \s-1ASN.1\s0 decoding error.
+.PP
+All functions can also return \-2 if the input is malformed. For example,
+\&\fIX509_check_host()\fR returns \-2 if the provided \fBname\fR contains embedded
+NULs.
+.SH "NOTES"
+.IX Header "NOTES"
+Applications are encouraged to use \fIX509_VERIFY_PARAM_set1_host()\fR
+rather than explicitly calling \fIX509_check_host\fR\|(3). Host name
+checks are out of scope with the \s-1\fIDANE\-EE\s0\fR\|(3) certificate usage,
+and the internal checks will be suppressed as appropriate when
+\&\s-1DANE\s0 support is added to OpenSSL.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fISSL_get_verify_result\fR\|(3),
+\&\fIX509_VERIFY_PARAM_set1_host\fR\|(3),
+\&\fIX509_VERIFY_PARAM_add1_host\fR\|(3),
+\&\fIX509_VERIFY_PARAM_set1_email\fR\|(3),
+\&\fIX509_VERIFY_PARAM_set1_ip\fR\|(3),
+\&\fIX509_VERIFY_PARAM_set1_ipasc\fR\|(3)
+.SH "HISTORY"
+.IX Header "HISTORY"
+These functions were added in OpenSSL 1.0.2.
Property changes on: trunk/secure/lib/libcrypto/man/X509_check_host.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/X509_check_private_key.3
===================================================================
--- trunk/secure/lib/libcrypto/man/X509_check_private_key.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/X509_check_private_key.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,182 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "X509_check_private_key 3"
+.TH X509_check_private_key 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+X509_check_private_key, X509_REQ_check_private_key \- check the consistency
+of a private key with the public key in an X509 certificate or certificate
+request
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& #include <openssl/x509.h>
+\&
+\& int X509_check_private_key(X509 *x, EVP_PKEY *k);
+\&
+\& int X509_REQ_check_private_key(X509_REQ *x, EVP_PKEY *k);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+\&\fIX509_check_private_key()\fR function checks the consistency of private
+key \fBk\fR with the public key in \fBx\fR.
+.PP
+\&\fIX509_REQ_check_private_key()\fR is equivalent to \fIX509_check_private_key()\fR
+except that \fBx\fR represents a certificate request of structure \fBX509_REQ\fR.
+.SH "RETURN VALUE"
+.IX Header "RETURN VALUE"
+\&\fIX509_check_private_key()\fR and \fIX509_REQ_check_private_key()\fR return 1 if
+the keys match each other, and 0 if not.
+.PP
+If the key is invalid or an error occurred, the reason code can be
+obtained using \fIERR_get_error\fR\|(3).
+.SH "BUGS"
+.IX Header "BUGS"
+The \fBcheck_private_key\fR functions don't check if \fBk\fR itself is indeed
+a private key or not. It merely compares the public materials (e.g. exponent
+and modulus of an \s-1RSA\s0 key) and/or key parameters (e.g. \s-1EC\s0 params of an \s-1EC\s0 key)
+of a key pair. So if you pass a public key to these functions in \fBk\fR, it will
+return success.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIERR_get_error\fR\|(3)
+.SH "COPYRIGHT"
+.IX Header "COPYRIGHT"
+Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
+.PP
+Licensed under the OpenSSL license (the \*(L"License\*(R"). You may not use
+this file except in compliance with the License. You can obtain a copy
+in the file \s-1LICENSE\s0 in the source distribution or at
+<https://www.openssl.org/source/license.html>.
Property changes on: trunk/secure/lib/libcrypto/man/X509_check_private_key.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/X509_cmp_time.3
===================================================================
--- trunk/secure/lib/libcrypto/man/X509_cmp_time.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/X509_cmp_time.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,168 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "X509_cmp_time 3"
+.TH X509_cmp_time 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+X509_cmp_time \- X509 time functions
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& X509_cmp_time(const ASN1_TIME *asn1_time, time_t *cmp_time);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+\&\fIX509_cmp_time()\fR compares the \s-1ASN1_TIME\s0 in \fBasn1_time\fR with the time in
+<cmp_time>.
+.PP
+\&\fBasn1_time\fR must satisfy the \s-1ASN1_TIME\s0 format mandated by \s-1RFC 5280,\s0 i.e.,
+its format must be either \s-1YYMMDDHHMMSSZ\s0 or \s-1YYYYMMDDHHMMSSZ.\s0
+.PP
+If \fBcmp_time\fR is \s-1NULL\s0 the current time is used.
+.SH "BUGS"
+.IX Header "BUGS"
+Unlike many standard comparison functions, X509_cmp_time returns 0 on error.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+\&\fIX509_cmp_time()\fR returns \-1 if \fBasn1_time\fR is earlier than, or equal to,
+\&\fBcmp_time\fR, and 1 otherwise. It returns 0 on error.
+.SH "COPYRIGHT"
+.IX Header "COPYRIGHT"
+Copyright 2017\-2018 The OpenSSL Project Authors. All Rights Reserved.
+.PP
+Licensed under the OpenSSL license (the \*(L"License\*(R"). You may not use
+this file except in compliance with the License. You can obtain a copy
+in the file \s-1LICENSE\s0 in the source distribution or at
+<https://www.openssl.org/source/license.html>.
Property changes on: trunk/secure/lib/libcrypto/man/X509_cmp_time.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/d2i_ECPKParameters.3
===================================================================
--- trunk/secure/lib/libcrypto/man/d2i_ECPKParameters.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/d2i_ECPKParameters.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,213 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "d2i_ECPKParameters 3"
+.TH d2i_ECPKParameters 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+d2i_ECPKParameters, i2d_ECPKParameters, d2i_ECPKParameters_bio, i2d_ECPKParameters_bio, d2i_ECPKParameters_fp, i2d_ECPKParameters_fp, ECPKParameters_print, ECPKParameters_print_fp \- Functions for decoding and encoding ASN1 representations of elliptic curve entities
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 1
+\& #include <openssl/ec.h>
+\&
+\& EC_GROUP *d2i_ECPKParameters(EC_GROUP **px, const unsigned char **in, long len);
+\& int i2d_ECPKParameters(const EC_GROUP *x, unsigned char **out);
+\& #define d2i_ECPKParameters_bio(bp,x) ASN1_d2i_bio_of(EC_GROUP,NULL,d2i_ECPKParameters,bp,x)
+\& #define i2d_ECPKParameters_bio(bp,x) ASN1_i2d_bio_of_const(EC_GROUP,i2d_ECPKParameters,bp,x)
+\& #define d2i_ECPKParameters_fp(fp,x) (EC_GROUP *)ASN1_d2i_fp(NULL, \e
+\& (char *(*)())d2i_ECPKParameters,(fp),(unsigned char **)(x))
+\& #define i2d_ECPKParameters_fp(fp,x) ASN1_i2d_fp(i2d_ECPKParameters,(fp), \e
+\& (unsigned char *)(x))
+\& int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off);
+\& int ECPKParameters_print_fp(FILE *fp, const EC_GROUP *x, int off);
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+The ECPKParameters encode and decode routines encode and parse the public parameters for an
+\&\fB\s-1EC_GROUP\s0\fR structure, which represents a curve.
+.PP
+\&\fId2i_ECPKParameters()\fR attempts to decode \fBlen\fR bytes at \fB*in\fR. If
+successful a pointer to the \fB\s-1EC_GROUP\s0\fR structure is returned. If an error
+occurred then \fB\s-1NULL\s0\fR is returned. If \fBpx\fR is not \fB\s-1NULL\s0\fR then the
+returned structure is written to \fB*px\fR. If \fB*px\fR is not \fB\s-1NULL\s0\fR
+then it is assumed that \fB*px\fR contains a valid \fB\s-1EC_GROUP\s0\fR
+structure and an attempt is made to reuse it. If the call is
+successful \fB*in\fR is incremented to the byte following the
+parsed data.
+.PP
+\&\fIi2d_ECPKParameters()\fR encodes the structure pointed to by \fBx\fR into \s-1DER\s0 format.
+If \fBout\fR is not \fB\s-1NULL\s0\fR is writes the \s-1DER\s0 encoded data to the buffer
+at \fB*out\fR, and increments it to point after the data just written.
+If the return value is negative an error occurred, otherwise it
+returns the length of the encoded data.
+.PP
+If \fB*out\fR is \fB\s-1NULL\s0\fR memory will be allocated for a buffer and the encoded
+data written to it. In this case \fB*out\fR is not incremented and it points to
+the start of the data just written.
+.PP
+\&\fId2i_ECPKParameters_bio()\fR is similar to \fId2i_ECPKParameters()\fR except it attempts
+to parse data from \s-1BIO\s0 \fBbp\fR.
+.PP
+\&\fId2i_ECPKParameters_fp()\fR is similar to \fId2i_ECPKParameters()\fR except it attempts
+to parse data from \s-1FILE\s0 pointer \fBfp\fR.
+.PP
+\&\fIi2d_ECPKParameters_bio()\fR is similar to \fIi2d_ECPKParameters()\fR except it writes
+the encoding of the structure \fBx\fR to \s-1BIO\s0 \fBbp\fR and it
+returns 1 for success and 0 for failure.
+.PP
+\&\fIi2d_ECPKParameters_fp()\fR is similar to \fIi2d_ECPKParameters()\fR except it writes
+the encoding of the structure \fBx\fR to \s-1BIO\s0 \fBbp\fR and it
+returns 1 for success and 0 for failure.
+.PP
+These functions are very similar to the X509 functions described in \fId2i_X509\fR\|(3),
+where further notes and examples are available.
+.PP
+The ECPKParameters_print and ECPKParameters_print_fp functions print a human-readable output
+of the public parameters of the \s-1EC_GROUP\s0 to \fBbp\fR or \fBfp\fR. The output lines are indented by \fBoff\fR spaces.
+.SH "RETURN VALUES"
+.IX Header "RETURN VALUES"
+\&\fId2i_ECPKParameters()\fR, \fId2i_ECPKParameters_bio()\fR and \fId2i_ECPKParameters_fp()\fR return a valid \fB\s-1EC_GROUP\s0\fR structure
+or \fB\s-1NULL\s0\fR if an error occurs.
+.PP
+\&\fIi2d_ECPKParameters()\fR returns the number of bytes successfully encoded or a negative
+value if an error occurs.
+.PP
+\&\fIi2d_ECPKParameters_bio()\fR, \fIi2d_ECPKParameters_fp()\fR, ECPKParameters_print and ECPKParameters_print_fp
+return 1 for success and 0 if an error occurs.
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIec\fR\|(3), \fIEC_GROUP_new\fR\|(3), \fIEC_GROUP_copy\fR\|(3),
+\&\fIEC_POINT_new\fR\|(3), \fIEC_POINT_add\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3), \fId2i_X509\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/d2i_ECPKParameters.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/secure/lib/libcrypto/man/ec.3
===================================================================
--- trunk/secure/lib/libcrypto/man/ec.3 (rev 0)
+++ trunk/secure/lib/libcrypto/man/ec.3 2019-01-20 05:40:35 UTC (rev 12156)
@@ -0,0 +1,330 @@
+.\" $MidnightBSD$
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "ec 3"
+.TH ec 3 "2018-11-20" "1.0.2q" "OpenSSL"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+ec \- Elliptic Curve functions
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\& #include <openssl/ec.h>
+\& #include <openssl/bn.h>
+\&
+\& const EC_METHOD *EC_GFp_simple_method(void);
+\& const EC_METHOD *EC_GFp_mont_method(void);
+\& const EC_METHOD *EC_GFp_nist_method(void);
+\& const EC_METHOD *EC_GFp_nistp224_method(void);
+\& const EC_METHOD *EC_GFp_nistp256_method(void);
+\& const EC_METHOD *EC_GFp_nistp521_method(void);
+\&
+\& const EC_METHOD *EC_GF2m_simple_method(void);
+\&
+\& EC_GROUP *EC_GROUP_new(const EC_METHOD *meth);
+\& void EC_GROUP_free(EC_GROUP *group);
+\& void EC_GROUP_clear_free(EC_GROUP *group);
+\& int EC_GROUP_copy(EC_GROUP *dst, const EC_GROUP *src);
+\& EC_GROUP *EC_GROUP_dup(const EC_GROUP *src);
+\& const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group);
+\& int EC_METHOD_get_field_type(const EC_METHOD *meth);
+\& int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor);
+\& const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group);
+\& int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx);
+\& int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx);
+\& void EC_GROUP_set_curve_name(EC_GROUP *group, int nid);
+\& int EC_GROUP_get_curve_name(const EC_GROUP *group);
+\& void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag);
+\& int EC_GROUP_get_asn1_flag(const EC_GROUP *group);
+\& void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form);
+\& point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *);
+\& unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x);
+\& size_t EC_GROUP_get_seed_len(const EC_GROUP *);
+\& size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len);
+\& int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+\& int EC_GROUP_get_degree(const EC_GROUP *group);
+\& int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx);
+\& int EC_GROUP_check_discriminant(const EC_GROUP *group, BN_CTX *ctx);
+\& int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx);
+\& EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+\& EC_GROUP *EC_GROUP_new_by_curve_name(int nid);
+\&
+\& size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems);
+\&
+\& EC_POINT *EC_POINT_new(const EC_GROUP *group);
+\& void EC_POINT_free(EC_POINT *point);
+\& void EC_POINT_clear_free(EC_POINT *point);
+\& int EC_POINT_copy(EC_POINT *dst, const EC_POINT *src);
+\& EC_POINT *EC_POINT_dup(const EC_POINT *src, const EC_GROUP *group);
+\& const EC_METHOD *EC_POINT_method_of(const EC_POINT *point);
+\& int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point);
+\& int EC_POINT_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, const BIGNUM *y, const BIGNUM *z, BN_CTX *ctx);
+\& int EC_POINT_get_Jprojective_coordinates_GFp(const EC_GROUP *group,
+\& const EC_POINT *p, BIGNUM *x, BIGNUM *y, BIGNUM *z, BN_CTX *ctx);
+\& int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
+\& const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, int y_bit, BN_CTX *ctx);
+\& int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group,
+\& const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+\& int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+\& const BIGNUM *x, int y_bit, BN_CTX *ctx);
+\& size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *p,
+\& point_conversion_form_t form,
+\& unsigned char *buf, size_t len, BN_CTX *ctx);
+\& int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *p,
+\& const unsigned char *buf, size_t len, BN_CTX *ctx);
+\& BIGNUM *EC_POINT_point2bn(const EC_GROUP *, const EC_POINT *,
+\& point_conversion_form_t form, BIGNUM *, BN_CTX *);
+\& EC_POINT *EC_POINT_bn2point(const EC_GROUP *, const BIGNUM *,
+\& EC_POINT *, BN_CTX *);
+\& char *EC_POINT_point2hex(const EC_GROUP *, const EC_POINT *,
+\& point_conversion_form_t form, BN_CTX *);
+\& EC_POINT *EC_POINT_hex2point(const EC_GROUP *, const char *,
+\& EC_POINT *, BN_CTX *);
+\&
+\& int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+\& int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx);
+\& int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx);
+\& int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *p);
+\& int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx);
+\& int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+\& int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx);
+\& int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx);
+\& int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, size_t num, const EC_POINT *p[], const BIGNUM *m[], BN_CTX *ctx);
+\& int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, const EC_POINT *q, const BIGNUM *m, BN_CTX *ctx);
+\& int EC_GROUP_precompute_mult(EC_GROUP *group, BN_CTX *ctx);
+\& int EC_GROUP_have_precompute_mult(const EC_GROUP *group);
+\&
+\& int EC_GROUP_get_basis_type(const EC_GROUP *);
+\& int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k);
+\& int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1,
+\& unsigned int *k2, unsigned int *k3);
+\& EC_GROUP *d2i_ECPKParameters(EC_GROUP **, const unsigned char **in, long len);
+\& int i2d_ECPKParameters(const EC_GROUP *, unsigned char **out);
+\& #define d2i_ECPKParameters_bio(bp,x) ASN1_d2i_bio_of(EC_GROUP,NULL,d2i_ECPKParameters,bp,x)
+\& #define i2d_ECPKParameters_bio(bp,x) ASN1_i2d_bio_of_const(EC_GROUP,i2d_ECPKParameters,bp,x)
+\& #define d2i_ECPKParameters_fp(fp,x) (EC_GROUP *)ASN1_d2i_fp(NULL, \e
+\& (char *(*)())d2i_ECPKParameters,(fp),(unsigned char **)(x))
+\& #define i2d_ECPKParameters_fp(fp,x) ASN1_i2d_fp(i2d_ECPKParameters,(fp), \e
+\& (unsigned char *)(x))
+\& int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off);
+\& int ECPKParameters_print_fp(FILE *fp, const EC_GROUP *x, int off);
+\&
+\& EC_KEY *EC_KEY_new(void);
+\& int EC_KEY_get_flags(const EC_KEY *key);
+\& void EC_KEY_set_flags(EC_KEY *key, int flags);
+\& void EC_KEY_clear_flags(EC_KEY *key, int flags);
+\& EC_KEY *EC_KEY_new_by_curve_name(int nid);
+\& void EC_KEY_free(EC_KEY *key);
+\& EC_KEY *EC_KEY_copy(EC_KEY *dst, const EC_KEY *src);
+\& EC_KEY *EC_KEY_dup(const EC_KEY *src);
+\& int EC_KEY_up_ref(EC_KEY *key);
+\& const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key);
+\& int EC_KEY_set_group(EC_KEY *key, const EC_GROUP *group);
+\& const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key);
+\& int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *prv);
+\& const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key);
+\& int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub);
+\& unsigned EC_KEY_get_enc_flags(const EC_KEY *key);
+\& void EC_KEY_set_enc_flags(EC_KEY *eckey, unsigned int flags);
+\& point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key);
+\& void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform);
+\& void *EC_KEY_get_key_method_data(EC_KEY *key,
+\& void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+\& void EC_KEY_insert_key_method_data(EC_KEY *key, void *data,
+\& void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+\& void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+\& int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx);
+\& int EC_KEY_generate_key(EC_KEY *key);
+\& int EC_KEY_check_key(const EC_KEY *key);
+\& int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y);
+\&
+\& EC_KEY *d2i_ECPrivateKey(EC_KEY **key, const unsigned char **in, long len);
+\& int i2d_ECPrivateKey(EC_KEY *key, unsigned char **out);
+\&
+\& EC_KEY *d2i_ECParameters(EC_KEY **key, const unsigned char **in, long len);
+\& int i2d_ECParameters(EC_KEY *key, unsigned char **out);
+\&
+\& EC_KEY *o2i_ECPublicKey(EC_KEY **key, const unsigned char **in, long len);
+\& int i2o_ECPublicKey(EC_KEY *key, unsigned char **out);
+\& int ECParameters_print(BIO *bp, const EC_KEY *key);
+\& int EC_KEY_print(BIO *bp, const EC_KEY *key, int off);
+\& int ECParameters_print_fp(FILE *fp, const EC_KEY *key);
+\& int EC_KEY_print_fp(FILE *fp, const EC_KEY *key, int off);
+\& #define ECParameters_dup(x) ASN1_dup_of(EC_KEY,i2d_ECParameters,d2i_ECParameters,x)
+\& #define EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid) \e
+\& EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, EVP_PKEY_OP_PARAMGEN, \e
+\& EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID, nid, NULL)
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+This library provides an extensive set of functions for performing operations on elliptic curves over finite fields.
+In general an elliptic curve is one with an equation of the form:
+.PP
+y^2 = x^3 + ax + b
+.PP
+An \fB\s-1EC_GROUP\s0\fR structure is used to represent the definition of an elliptic curve. Points on a curve are stored using an
+\&\fB\s-1EC_POINT\s0\fR structure. An \fB\s-1EC_KEY\s0\fR is used to hold a private/public key pair, where a private key is simply a \s-1BIGNUM\s0 and a
+public key is a point on a curve (represented by an \fB\s-1EC_POINT\s0\fR).
+.PP
+The library contains a number of alternative implementations of the different functions. Each implementation is optimised
+for different scenarios. No matter which implementation is being used, the interface remains the same. The library
+handles calling the correct implementation when an interface function is invoked. An implementation is represented by
+an \fB\s-1EC_METHOD\s0\fR structure.
+.PP
+The creation and destruction of \fB\s-1EC_GROUP\s0\fR objects is described in \fIEC_GROUP_new\fR\|(3). Functions for
+manipulating \fB\s-1EC_GROUP\s0\fR objects are described in \fIEC_GROUP_copy\fR\|(3).
+.PP
+Functions for creating, destroying and manipulating \fB\s-1EC_POINT\s0\fR objects are explained in \fIEC_POINT_new\fR\|(3),
+whilst functions for performing mathematical operations and tests on \fBEC_POINTs\fR are coverd in \fIEC_POINT_add\fR\|(3).
+.PP
+For working with private and public keys refer to \fIEC_KEY_new\fR\|(3). Implementations are covered in
+\&\fIEC_GFp_simple_method\fR\|(3).
+.PP
+For information on encoding and decoding curve parameters to and from \s-1ASN1\s0 see \fId2i_ECPKParameters\fR\|(3).
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+\&\fIcrypto\fR\|(3), \fIEC_GROUP_new\fR\|(3), \fIEC_GROUP_copy\fR\|(3),
+\&\fIEC_POINT_new\fR\|(3), \fIEC_POINT_add\fR\|(3), \fIEC_KEY_new\fR\|(3),
+\&\fIEC_GFp_simple_method\fR\|(3), \fId2i_ECPKParameters\fR\|(3)
Property changes on: trunk/secure/lib/libcrypto/man/ec.3
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
More information about the Midnightbsd-cvs
mailing list