1/* $OpenBSD: sha1_amd64_shani.S,v 1.1 2024/12/06 11:57:18 jsing Exp $ */
2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifdef __CET__
19#include <cet.h>
20#else
21#define _CET_ENDBR
22#endif
23
24/*
25 * SHA-1 implementation using the Intel SHA extensions:
26 *
27 * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
28 */
29
30#define	ctx		%rdi
31#define	in		%rsi
32#define	num		%rdx
33
34#define	end		%rbx
35
36#define	xabcd_save	%xmm0
37#define	xe_save		%xmm1
38
39#define	xabcd		%xmm2
40#define	xe0		%xmm3
41#define	xe1		%xmm4
42
43#define	xmsg0		%xmm5
44#define	xmsg1		%xmm6
45#define	xmsg2		%xmm7
46#define	xmsg3		%xmm8
47
48#define	xshufmask	%xmm9
49
50
51#define sha1_message_schedule_load(idx, m, xmsg) \
52	movdqu	(idx*16)(m), xmsg;					\
53	pshufb	xshufmask, xmsg;
54
55#define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \
56	sha1msg1 xm1, xm0;						\
57	pxor	xm2, xm0;						\
58	sha1msg2 xm3, xm0;
59
60#define sha1_shani_round(fn, xmsg, xe, xe_next) \
61	sha1nexte xmsg, xe;						\
62	movdqa	xabcd, xe_next;						\
63	sha1rnds4 fn, xe, xabcd;
64
65#define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \
66	sha1_message_schedule_load(idx, m, xmsg);			\
67	sha1_shani_round(fn, xmsg, xe, xe_next);
68
69#define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \
70	sha1_message_schedule_update(xm0, xm1, xm2, xm3);		\
71	sha1_shani_round(fn, xm0, xe, xe_next);
72
73
74.text
75
76/*
77 * void sha1_block_shani(SHA256_CTX *ctx, const void *in, size_t num);
78 *
79 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num
80 */
81.align 16
82.globl	sha1_block_shani
83.type	sha1_block_shani,@function
84sha1_block_shani:
85	_CET_ENDBR
86
87	/* Save callee save registers. */
88	pushq	%rbx
89
90	/* Compute end of message. */
91	shlq	$6, num
92	leaq	(in, num, 1), end
93
94	/* Load endian shuffle mask. */
95	movdqa	shufmask(%rip), xshufmask
96
97	/* Load current hash state from context. */
98	movdqu	(0*16)(ctx), xabcd
99	pshufd	$0x1b, xabcd, xabcd	/* dcba -> abcd */
100	pxor	xe0, xe0
101	pinsrd	$3, (1*16)(ctx), xe0	/* e */
102
103	jmp	.Lshani_block_loop
104
105.align 16
106.Lshani_block_loop:
107	/* Save state for accumulation. */
108	movdqa	xabcd, xabcd_save
109	movdqa	xe0, xe_save
110
111	/* Rounds 0 through 15 (four rounds at a time). */
112	sha1_message_schedule_load(0, in, xmsg0);
113	paddd	xmsg0, xe0
114	movdqa	xabcd, xe1
115	sha1rnds4 $0, xe0, xabcd
116
117	sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0);
118	sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1);
119	sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0);
120
121	/* Rounds 16 through 79 (four rounds at a time). */
122	sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
123	sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
124	sha1_shani_round_update($1, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
125	sha1_shani_round_update($1, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
126
127	sha1_shani_round_update($1, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
128	sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
129	sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
130	sha1_shani_round_update($2, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
131
132	sha1_shani_round_update($2, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
133	sha1_shani_round_update($2, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
134	sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
135	sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
136
137	sha1_shani_round_update($3, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
138	sha1_shani_round_update($3, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
139	sha1_shani_round_update($3, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
140	sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
141
142	/* Accumulate hash state. */
143	paddd	xabcd_save, xabcd
144	sha1nexte xe_save, xe0
145
146	addq	$64, in
147	cmpq	end, in
148	jb	.Lshani_block_loop
149
150	/* Update stored hash context. */
151	pshufd	$0x1b, xabcd, xabcd	/* abcd -> dcba */
152	movdqu	xabcd, (0*16)(ctx)
153	pextrd	$3, xe0, (1*16)(ctx)	/* e */
154
155	/* Restore callee save registers. */
156	popq	%rbx
157
158	ret
159
160.rodata
161
162/*
163 * Shuffle mask - byte reversal for little endian to big endian word conversion,
164 * and reordering to abcd.
165 */
166.align	16
167.type	shufmask,@object
168shufmask:
169.octa	0x000102030405060708090a0b0c0d0e0f
170.size	shufmask,.-shufmask
171