1/* $OpenBSD: sha1_amd64_shani.S,v 1.1 2024/12/06 11:57:18 jsing Exp $ */ 2/* 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18#ifdef __CET__ 19#include <cet.h> 20#else 21#define _CET_ENDBR 22#endif 23 24/* 25 * SHA-1 implementation using the Intel SHA extensions: 26 * 27 * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html 28 */ 29 30#define ctx %rdi 31#define in %rsi 32#define num %rdx 33 34#define end %rbx 35 36#define xabcd_save %xmm0 37#define xe_save %xmm1 38 39#define xabcd %xmm2 40#define xe0 %xmm3 41#define xe1 %xmm4 42 43#define xmsg0 %xmm5 44#define xmsg1 %xmm6 45#define xmsg2 %xmm7 46#define xmsg3 %xmm8 47 48#define xshufmask %xmm9 49 50 51#define sha1_message_schedule_load(idx, m, xmsg) \ 52 movdqu (idx*16)(m), xmsg; \ 53 pshufb xshufmask, xmsg; 54 55#define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \ 56 sha1msg1 xm1, xm0; \ 57 pxor xm2, xm0; \ 58 sha1msg2 xm3, xm0; 59 60#define sha1_shani_round(fn, xmsg, xe, xe_next) \ 61 sha1nexte xmsg, xe; \ 62 movdqa xabcd, xe_next; \ 63 sha1rnds4 fn, xe, xabcd; 64 65#define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \ 66 sha1_message_schedule_load(idx, m, xmsg); \ 67 sha1_shani_round(fn, xmsg, xe, xe_next); 68 69#define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \ 70 sha1_message_schedule_update(xm0, xm1, xm2, xm3); \ 71 sha1_shani_round(fn, xm0, xe, xe_next); 72 73 74.text 75 76/* 77 * void sha1_block_shani(SHA256_CTX *ctx, const void *in, size_t num); 78 * 79 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num 80 */ 81.align 16 82.globl sha1_block_shani 83.type sha1_block_shani,@function 84sha1_block_shani: 85 _CET_ENDBR 86 87 /* Save callee save registers. */ 88 pushq %rbx 89 90 /* Compute end of message. */ 91 shlq $6, num 92 leaq (in, num, 1), end 93 94 /* Load endian shuffle mask. */ 95 movdqa shufmask(%rip), xshufmask 96 97 /* Load current hash state from context. */ 98 movdqu (0*16)(ctx), xabcd 99 pshufd $0x1b, xabcd, xabcd /* dcba -> abcd */ 100 pxor xe0, xe0 101 pinsrd $3, (1*16)(ctx), xe0 /* e */ 102 103 jmp .Lshani_block_loop 104 105.align 16 106.Lshani_block_loop: 107 /* Save state for accumulation. */ 108 movdqa xabcd, xabcd_save 109 movdqa xe0, xe_save 110 111 /* Rounds 0 through 15 (four rounds at a time). */ 112 sha1_message_schedule_load(0, in, xmsg0); 113 paddd xmsg0, xe0 114 movdqa xabcd, xe1 115 sha1rnds4 $0, xe0, xabcd 116 117 sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0); 118 sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1); 119 sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0); 120 121 /* Rounds 16 through 79 (four rounds at a time). */ 122 sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) 123 sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) 124 sha1_shani_round_update($1, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) 125 sha1_shani_round_update($1, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) 126 127 sha1_shani_round_update($1, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) 128 sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) 129 sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) 130 sha1_shani_round_update($2, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) 131 132 sha1_shani_round_update($2, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) 133 sha1_shani_round_update($2, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) 134 sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) 135 sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) 136 137 sha1_shani_round_update($3, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) 138 sha1_shani_round_update($3, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) 139 sha1_shani_round_update($3, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) 140 sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) 141 142 /* Accumulate hash state. */ 143 paddd xabcd_save, xabcd 144 sha1nexte xe_save, xe0 145 146 addq $64, in 147 cmpq end, in 148 jb .Lshani_block_loop 149 150 /* Update stored hash context. */ 151 pshufd $0x1b, xabcd, xabcd /* abcd -> dcba */ 152 movdqu xabcd, (0*16)(ctx) 153 pextrd $3, xe0, (1*16)(ctx) /* e */ 154 155 /* Restore callee save registers. */ 156 popq %rbx 157 158 ret 159 160.rodata 161 162/* 163 * Shuffle mask - byte reversal for little endian to big endian word conversion, 164 * and reordering to abcd. 165 */ 166.align 16 167.type shufmask,@object 168shufmask: 169.octa 0x000102030405060708090a0b0c0d0e0f 170.size shufmask,.-shufmask 171