ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/src/vendor-crypto/openssl/0.9.8ze/crypto/bn/asm/mips3.s
Revision: 6970
Committed: Sat Mar 7 00:08:20 2015 UTC (9 years, 1 month ago) by laffer1
File size: 37870 byte(s)
Log Message:
tag version

File Contents

# Content
1 .rdata
2 .asciiz "mips3.s, Version 1.1"
3 .asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4
5 /*
6 * ====================================================================
7 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
8 * project.
9 *
10 * Rights for redistribution and usage in source and binary forms are
11 * granted according to the OpenSSL license. Warranty of any kind is
12 * disclaimed.
13 * ====================================================================
14 */
15
16 /*
17 * This is my modest contributon to the OpenSSL project (see
18 * http://www.openssl.org/ for more information about it) and is
19 * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
20 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 *
22 * The module is designed to work with either of the "new" MIPS ABI(5),
23 * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
24 * IRIX 5.x not only because it doesn't support new ABIs but also
25 * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
26 * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
27 * cause illegal instruction exception:-(
28 *
29 * In addition the code depends on preprocessor flags set up by MIPSpro
30 * compiler driver (either as or cc) and therefore (probably?) can't be
31 * compiled by the GNU assembler. GNU C driver manages fine though...
32 * I mean as long as -mmips-as is specified or is the default option,
33 * because then it simply invokes /usr/bin/as which in turn takes
34 * perfect care of the preprocessor definitions. Another neat feature
35 * offered by the MIPSpro assembler is an optimization pass. This gave
36 * me the opportunity to have the code looking more regular as all those
37 * architecture dependent instruction rescheduling details were left to
38 * the assembler. Cool, huh?
39 *
40 * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
41 * goes way over 3 times faster!
42 *
43 * <appro@fy.chalmers.se>
44 */
45 #include <asm.h>
46 #include <regdef.h>
47
48 #if _MIPS_ISA>=4
49 #define MOVNZ(cond,dst,src) \
50 movn dst,src,cond
51 #else
52 #define MOVNZ(cond,dst,src) \
53 .set noreorder; \
54 bnezl cond,.+8; \
55 move dst,src; \
56 .set reorder
57 #endif
58
59 .text
60
61 .set noat
62 .set reorder
63
64 #define MINUS4 v1
65
66 .align 5
67 LEAF(bn_mul_add_words)
68 .set noreorder
69 bgtzl a2,.L_bn_mul_add_words_proceed
70 ld t0,0(a1)
71 jr ra
72 move v0,zero
73 .set reorder
74
75 .L_bn_mul_add_words_proceed:
76 li MINUS4,-4
77 and ta0,a2,MINUS4
78 move v0,zero
79 beqz ta0,.L_bn_mul_add_words_tail
80
81 .L_bn_mul_add_words_loop:
82 dmultu t0,a3
83 ld t1,0(a0)
84 ld t2,8(a1)
85 ld t3,8(a0)
86 ld ta0,16(a1)
87 ld ta1,16(a0)
88 daddu t1,v0
89 sltu v0,t1,v0 /* All manuals say it "compares 32-bit
90 * values", but it seems to work fine
91 * even on 64-bit registers. */
92 mflo AT
93 mfhi t0
94 daddu t1,AT
95 daddu v0,t0
96 sltu AT,t1,AT
97 sd t1,0(a0)
98 daddu v0,AT
99
100 dmultu t2,a3
101 ld ta2,24(a1)
102 ld ta3,24(a0)
103 daddu t3,v0
104 sltu v0,t3,v0
105 mflo AT
106 mfhi t2
107 daddu t3,AT
108 daddu v0,t2
109 sltu AT,t3,AT
110 sd t3,8(a0)
111 daddu v0,AT
112
113 dmultu ta0,a3
114 subu a2,4
115 PTR_ADD a0,32
116 PTR_ADD a1,32
117 daddu ta1,v0
118 sltu v0,ta1,v0
119 mflo AT
120 mfhi ta0
121 daddu ta1,AT
122 daddu v0,ta0
123 sltu AT,ta1,AT
124 sd ta1,-16(a0)
125 daddu v0,AT
126
127
128 dmultu ta2,a3
129 and ta0,a2,MINUS4
130 daddu ta3,v0
131 sltu v0,ta3,v0
132 mflo AT
133 mfhi ta2
134 daddu ta3,AT
135 daddu v0,ta2
136 sltu AT,ta3,AT
137 sd ta3,-8(a0)
138 daddu v0,AT
139 .set noreorder
140 bgtzl ta0,.L_bn_mul_add_words_loop
141 ld t0,0(a1)
142
143 bnezl a2,.L_bn_mul_add_words_tail
144 ld t0,0(a1)
145 .set reorder
146
147 .L_bn_mul_add_words_return:
148 jr ra
149
150 .L_bn_mul_add_words_tail:
151 dmultu t0,a3
152 ld t1,0(a0)
153 subu a2,1
154 daddu t1,v0
155 sltu v0,t1,v0
156 mflo AT
157 mfhi t0
158 daddu t1,AT
159 daddu v0,t0
160 sltu AT,t1,AT
161 sd t1,0(a0)
162 daddu v0,AT
163 beqz a2,.L_bn_mul_add_words_return
164
165 ld t0,8(a1)
166 dmultu t0,a3
167 ld t1,8(a0)
168 subu a2,1
169 daddu t1,v0
170 sltu v0,t1,v0
171 mflo AT
172 mfhi t0
173 daddu t1,AT
174 daddu v0,t0
175 sltu AT,t1,AT
176 sd t1,8(a0)
177 daddu v0,AT
178 beqz a2,.L_bn_mul_add_words_return
179
180 ld t0,16(a1)
181 dmultu t0,a3
182 ld t1,16(a0)
183 daddu t1,v0
184 sltu v0,t1,v0
185 mflo AT
186 mfhi t0
187 daddu t1,AT
188 daddu v0,t0
189 sltu AT,t1,AT
190 sd t1,16(a0)
191 daddu v0,AT
192 jr ra
193 END(bn_mul_add_words)
194
195 .align 5
196 LEAF(bn_mul_words)
197 .set noreorder
198 bgtzl a2,.L_bn_mul_words_proceed
199 ld t0,0(a1)
200 jr ra
201 move v0,zero
202 .set reorder
203
204 .L_bn_mul_words_proceed:
205 li MINUS4,-4
206 and ta0,a2,MINUS4
207 move v0,zero
208 beqz ta0,.L_bn_mul_words_tail
209
210 .L_bn_mul_words_loop:
211 dmultu t0,a3
212 ld t2,8(a1)
213 ld ta0,16(a1)
214 ld ta2,24(a1)
215 mflo AT
216 mfhi t0
217 daddu v0,AT
218 sltu t1,v0,AT
219 sd v0,0(a0)
220 daddu v0,t1,t0
221
222 dmultu t2,a3
223 subu a2,4
224 PTR_ADD a0,32
225 PTR_ADD a1,32
226 mflo AT
227 mfhi t2
228 daddu v0,AT
229 sltu t3,v0,AT
230 sd v0,-24(a0)
231 daddu v0,t3,t2
232
233 dmultu ta0,a3
234 mflo AT
235 mfhi ta0
236 daddu v0,AT
237 sltu ta1,v0,AT
238 sd v0,-16(a0)
239 daddu v0,ta1,ta0
240
241
242 dmultu ta2,a3
243 and ta0,a2,MINUS4
244 mflo AT
245 mfhi ta2
246 daddu v0,AT
247 sltu ta3,v0,AT
248 sd v0,-8(a0)
249 daddu v0,ta3,ta2
250 .set noreorder
251 bgtzl ta0,.L_bn_mul_words_loop
252 ld t0,0(a1)
253
254 bnezl a2,.L_bn_mul_words_tail
255 ld t0,0(a1)
256 .set reorder
257
258 .L_bn_mul_words_return:
259 jr ra
260
261 .L_bn_mul_words_tail:
262 dmultu t0,a3
263 subu a2,1
264 mflo AT
265 mfhi t0
266 daddu v0,AT
267 sltu t1,v0,AT
268 sd v0,0(a0)
269 daddu v0,t1,t0
270 beqz a2,.L_bn_mul_words_return
271
272 ld t0,8(a1)
273 dmultu t0,a3
274 subu a2,1
275 mflo AT
276 mfhi t0
277 daddu v0,AT
278 sltu t1,v0,AT
279 sd v0,8(a0)
280 daddu v0,t1,t0
281 beqz a2,.L_bn_mul_words_return
282
283 ld t0,16(a1)
284 dmultu t0,a3
285 mflo AT
286 mfhi t0
287 daddu v0,AT
288 sltu t1,v0,AT
289 sd v0,16(a0)
290 daddu v0,t1,t0
291 jr ra
292 END(bn_mul_words)
293
294 .align 5
295 LEAF(bn_sqr_words)
296 .set noreorder
297 bgtzl a2,.L_bn_sqr_words_proceed
298 ld t0,0(a1)
299 jr ra
300 move v0,zero
301 .set reorder
302
303 .L_bn_sqr_words_proceed:
304 li MINUS4,-4
305 and ta0,a2,MINUS4
306 move v0,zero
307 beqz ta0,.L_bn_sqr_words_tail
308
309 .L_bn_sqr_words_loop:
310 dmultu t0,t0
311 ld t2,8(a1)
312 ld ta0,16(a1)
313 ld ta2,24(a1)
314 mflo t1
315 mfhi t0
316 sd t1,0(a0)
317 sd t0,8(a0)
318
319 dmultu t2,t2
320 subu a2,4
321 PTR_ADD a0,64
322 PTR_ADD a1,32
323 mflo t3
324 mfhi t2
325 sd t3,-48(a0)
326 sd t2,-40(a0)
327
328 dmultu ta0,ta0
329 mflo ta1
330 mfhi ta0
331 sd ta1,-32(a0)
332 sd ta0,-24(a0)
333
334
335 dmultu ta2,ta2
336 and ta0,a2,MINUS4
337 mflo ta3
338 mfhi ta2
339 sd ta3,-16(a0)
340 sd ta2,-8(a0)
341
342 .set noreorder
343 bgtzl ta0,.L_bn_sqr_words_loop
344 ld t0,0(a1)
345
346 bnezl a2,.L_bn_sqr_words_tail
347 ld t0,0(a1)
348 .set reorder
349
350 .L_bn_sqr_words_return:
351 move v0,zero
352 jr ra
353
354 .L_bn_sqr_words_tail:
355 dmultu t0,t0
356 subu a2,1
357 mflo t1
358 mfhi t0
359 sd t1,0(a0)
360 sd t0,8(a0)
361 beqz a2,.L_bn_sqr_words_return
362
363 ld t0,8(a1)
364 dmultu t0,t0
365 subu a2,1
366 mflo t1
367 mfhi t0
368 sd t1,16(a0)
369 sd t0,24(a0)
370 beqz a2,.L_bn_sqr_words_return
371
372 ld t0,16(a1)
373 dmultu t0,t0
374 mflo t1
375 mfhi t0
376 sd t1,32(a0)
377 sd t0,40(a0)
378 jr ra
379 END(bn_sqr_words)
380
381 .align 5
382 LEAF(bn_add_words)
383 .set noreorder
384 bgtzl a3,.L_bn_add_words_proceed
385 ld t0,0(a1)
386 jr ra
387 move v0,zero
388 .set reorder
389
390 .L_bn_add_words_proceed:
391 li MINUS4,-4
392 and AT,a3,MINUS4
393 move v0,zero
394 beqz AT,.L_bn_add_words_tail
395
396 .L_bn_add_words_loop:
397 ld ta0,0(a2)
398 subu a3,4
399 ld t1,8(a1)
400 and AT,a3,MINUS4
401 ld t2,16(a1)
402 PTR_ADD a2,32
403 ld t3,24(a1)
404 PTR_ADD a0,32
405 ld ta1,-24(a2)
406 PTR_ADD a1,32
407 ld ta2,-16(a2)
408 ld ta3,-8(a2)
409 daddu ta0,t0
410 sltu t8,ta0,t0
411 daddu t0,ta0,v0
412 sltu v0,t0,ta0
413 sd t0,-32(a0)
414 daddu v0,t8
415
416 daddu ta1,t1
417 sltu t9,ta1,t1
418 daddu t1,ta1,v0
419 sltu v0,t1,ta1
420 sd t1,-24(a0)
421 daddu v0,t9
422
423 daddu ta2,t2
424 sltu t8,ta2,t2
425 daddu t2,ta2,v0
426 sltu v0,t2,ta2
427 sd t2,-16(a0)
428 daddu v0,t8
429
430 daddu ta3,t3
431 sltu t9,ta3,t3
432 daddu t3,ta3,v0
433 sltu v0,t3,ta3
434 sd t3,-8(a0)
435 daddu v0,t9
436
437 .set noreorder
438 bgtzl AT,.L_bn_add_words_loop
439 ld t0,0(a1)
440
441 bnezl a3,.L_bn_add_words_tail
442 ld t0,0(a1)
443 .set reorder
444
445 .L_bn_add_words_return:
446 jr ra
447
448 .L_bn_add_words_tail:
449 ld ta0,0(a2)
450 daddu ta0,t0
451 subu a3,1
452 sltu t8,ta0,t0
453 daddu t0,ta0,v0
454 sltu v0,t0,ta0
455 sd t0,0(a0)
456 daddu v0,t8
457 beqz a3,.L_bn_add_words_return
458
459 ld t1,8(a1)
460 ld ta1,8(a2)
461 daddu ta1,t1
462 subu a3,1
463 sltu t9,ta1,t1
464 daddu t1,ta1,v0
465 sltu v0,t1,ta1
466 sd t1,8(a0)
467 daddu v0,t9
468 beqz a3,.L_bn_add_words_return
469
470 ld t2,16(a1)
471 ld ta2,16(a2)
472 daddu ta2,t2
473 sltu t8,ta2,t2
474 daddu t2,ta2,v0
475 sltu v0,t2,ta2
476 sd t2,16(a0)
477 daddu v0,t8
478 jr ra
479 END(bn_add_words)
480
481 .align 5
482 LEAF(bn_sub_words)
483 .set noreorder
484 bgtzl a3,.L_bn_sub_words_proceed
485 ld t0,0(a1)
486 jr ra
487 move v0,zero
488 .set reorder
489
490 .L_bn_sub_words_proceed:
491 li MINUS4,-4
492 and AT,a3,MINUS4
493 move v0,zero
494 beqz AT,.L_bn_sub_words_tail
495
496 .L_bn_sub_words_loop:
497 ld ta0,0(a2)
498 subu a3,4
499 ld t1,8(a1)
500 and AT,a3,MINUS4
501 ld t2,16(a1)
502 PTR_ADD a2,32
503 ld t3,24(a1)
504 PTR_ADD a0,32
505 ld ta1,-24(a2)
506 PTR_ADD a1,32
507 ld ta2,-16(a2)
508 ld ta3,-8(a2)
509 sltu t8,t0,ta0
510 dsubu t0,ta0
511 dsubu ta0,t0,v0
512 sd ta0,-32(a0)
513 MOVNZ (t0,v0,t8)
514
515 sltu t9,t1,ta1
516 dsubu t1,ta1
517 dsubu ta1,t1,v0
518 sd ta1,-24(a0)
519 MOVNZ (t1,v0,t9)
520
521
522 sltu t8,t2,ta2
523 dsubu t2,ta2
524 dsubu ta2,t2,v0
525 sd ta2,-16(a0)
526 MOVNZ (t2,v0,t8)
527
528 sltu t9,t3,ta3
529 dsubu t3,ta3
530 dsubu ta3,t3,v0
531 sd ta3,-8(a0)
532 MOVNZ (t3,v0,t9)
533
534 .set noreorder
535 bgtzl AT,.L_bn_sub_words_loop
536 ld t0,0(a1)
537
538 bnezl a3,.L_bn_sub_words_tail
539 ld t0,0(a1)
540 .set reorder
541
542 .L_bn_sub_words_return:
543 jr ra
544
545 .L_bn_sub_words_tail:
546 ld ta0,0(a2)
547 subu a3,1
548 sltu t8,t0,ta0
549 dsubu t0,ta0
550 dsubu ta0,t0,v0
551 MOVNZ (t0,v0,t8)
552 sd ta0,0(a0)
553 beqz a3,.L_bn_sub_words_return
554
555 ld t1,8(a1)
556 subu a3,1
557 ld ta1,8(a2)
558 sltu t9,t1,ta1
559 dsubu t1,ta1
560 dsubu ta1,t1,v0
561 MOVNZ (t1,v0,t9)
562 sd ta1,8(a0)
563 beqz a3,.L_bn_sub_words_return
564
565 ld t2,16(a1)
566 ld ta2,16(a2)
567 sltu t8,t2,ta2
568 dsubu t2,ta2
569 dsubu ta2,t2,v0
570 MOVNZ (t2,v0,t8)
571 sd ta2,16(a0)
572 jr ra
573 END(bn_sub_words)
574
575 #undef MINUS4
576
577 .align 5
578 LEAF(bn_div_3_words)
579 .set reorder
580 move a3,a0 /* we know that bn_div_words doesn't
581 * touch a3, ta2, ta3 and preserves a2
582 * so that we can save two arguments
583 * and return address in registers
584 * instead of stack:-)
585 */
586 ld a0,(a3)
587 move ta2,a1
588 ld a1,-8(a3)
589 bne a0,a2,.L_bn_div_3_words_proceed
590 li v0,-1
591 jr ra
592 .L_bn_div_3_words_proceed:
593 move ta3,ra
594 bal bn_div_words
595 move ra,ta3
596 dmultu ta2,v0
597 ld t2,-16(a3)
598 move ta0,zero
599 mfhi t1
600 mflo t0
601 sltu t8,t1,v1
602 .L_bn_div_3_words_inner_loop:
603 bnez t8,.L_bn_div_3_words_inner_loop_done
604 sgeu AT,t2,t0
605 seq t9,t1,v1
606 and AT,t9
607 sltu t3,t0,ta2
608 daddu v1,a2
609 dsubu t1,t3
610 dsubu t0,ta2
611 sltu t8,t1,v1
612 sltu ta0,v1,a2
613 or t8,ta0
614 .set noreorder
615 beqzl AT,.L_bn_div_3_words_inner_loop
616 dsubu v0,1
617 .set reorder
618 .L_bn_div_3_words_inner_loop_done:
619 jr ra
620 END(bn_div_3_words)
621
622 .align 5
623 LEAF(bn_div_words)
624 .set noreorder
625 bnezl a2,.L_bn_div_words_proceed
626 move v1,zero
627 jr ra
628 li v0,-1 /* I'd rather signal div-by-zero
629 * which can be done with 'break 7' */
630
631 .L_bn_div_words_proceed:
632 bltz a2,.L_bn_div_words_body
633 move t9,v1
634 dsll a2,1
635 bgtz a2,.-4
636 addu t9,1
637
638 .set reorder
639 negu t1,t9
640 li t2,-1
641 dsll t2,t1
642 and t2,a0
643 dsrl AT,a1,t1
644 .set noreorder
645 bnezl t2,.+8
646 break 6 /* signal overflow */
647 .set reorder
648 dsll a0,t9
649 dsll a1,t9
650 or a0,AT
651
652 #define QT ta0
653 #define HH ta1
654 #define DH v1
655 .L_bn_div_words_body:
656 dsrl DH,a2,32
657 sgeu AT,a0,a2
658 .set noreorder
659 bnezl AT,.+8
660 dsubu a0,a2
661 .set reorder
662
663 li QT,-1
664 dsrl HH,a0,32
665 dsrl QT,32 /* q=0xffffffff */
666 beq DH,HH,.L_bn_div_words_skip_div1
667 ddivu zero,a0,DH
668 mflo QT
669 .L_bn_div_words_skip_div1:
670 dmultu a2,QT
671 dsll t3,a0,32
672 dsrl AT,a1,32
673 or t3,AT
674 mflo t0
675 mfhi t1
676 .L_bn_div_words_inner_loop1:
677 sltu t2,t3,t0
678 seq t8,HH,t1
679 sltu AT,HH,t1
680 and t2,t8
681 sltu v0,t0,a2
682 or AT,t2
683 .set noreorder
684 beqz AT,.L_bn_div_words_inner_loop1_done
685 dsubu t1,v0
686 dsubu t0,a2
687 b .L_bn_div_words_inner_loop1
688 dsubu QT,1
689 .set reorder
690 .L_bn_div_words_inner_loop1_done:
691
692 dsll a1,32
693 dsubu a0,t3,t0
694 dsll v0,QT,32
695
696 li QT,-1
697 dsrl HH,a0,32
698 dsrl QT,32 /* q=0xffffffff */
699 beq DH,HH,.L_bn_div_words_skip_div2
700 ddivu zero,a0,DH
701 mflo QT
702 .L_bn_div_words_skip_div2:
703 #undef DH
704 dmultu a2,QT
705 dsll t3,a0,32
706 dsrl AT,a1,32
707 or t3,AT
708 mflo t0
709 mfhi t1
710 .L_bn_div_words_inner_loop2:
711 sltu t2,t3,t0
712 seq t8,HH,t1
713 sltu AT,HH,t1
714 and t2,t8
715 sltu v1,t0,a2
716 or AT,t2
717 .set noreorder
718 beqz AT,.L_bn_div_words_inner_loop2_done
719 dsubu t1,v1
720 dsubu t0,a2
721 b .L_bn_div_words_inner_loop2
722 dsubu QT,1
723 .set reorder
724 .L_bn_div_words_inner_loop2_done:
725 #undef HH
726
727 dsubu a0,t3,t0
728 or v0,QT
729 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
730 dsrl a2,t9 /* restore a2 */
731 jr ra
732 #undef QT
733 END(bn_div_words)
734
735 #define a_0 t0
736 #define a_1 t1
737 #define a_2 t2
738 #define a_3 t3
739 #define b_0 ta0
740 #define b_1 ta1
741 #define b_2 ta2
742 #define b_3 ta3
743
744 #define a_4 s0
745 #define a_5 s2
746 #define a_6 s4
747 #define a_7 a1 /* once we load a[7] we don't need a anymore */
748 #define b_4 s1
749 #define b_5 s3
750 #define b_6 s5
751 #define b_7 a2 /* once we load b[7] we don't need b anymore */
752
753 #define t_1 t8
754 #define t_2 t9
755
756 #define c_1 v0
757 #define c_2 v1
758 #define c_3 a3
759
760 #define FRAME_SIZE 48
761
762 .align 5
763 LEAF(bn_mul_comba8)
764 .set noreorder
765 PTR_SUB sp,FRAME_SIZE
766 .frame sp,64,ra
767 .set reorder
768 ld a_0,0(a1) /* If compiled with -mips3 option on
769 * R5000 box assembler barks on this
770 * line with "shouldn't have mult/div
771 * as last instruction in bb (R10K
772 * bug)" warning. If anybody out there
773 * has a clue about how to circumvent
774 * this do send me a note.
775 * <appro@fy.chalmers.se>
776 */
777 ld b_0,0(a2)
778 ld a_1,8(a1)
779 ld a_2,16(a1)
780 ld a_3,24(a1)
781 ld b_1,8(a2)
782 ld b_2,16(a2)
783 ld b_3,24(a2)
784 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
785 sd s0,0(sp)
786 sd s1,8(sp)
787 sd s2,16(sp)
788 sd s3,24(sp)
789 sd s4,32(sp)
790 sd s5,40(sp)
791 mflo c_1
792 mfhi c_2
793
794 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
795 ld a_4,32(a1)
796 ld a_5,40(a1)
797 ld a_6,48(a1)
798 ld a_7,56(a1)
799 ld b_4,32(a2)
800 ld b_5,40(a2)
801 mflo t_1
802 mfhi t_2
803 daddu c_2,t_1
804 sltu AT,c_2,t_1
805 daddu c_3,t_2,AT
806 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
807 ld b_6,48(a2)
808 ld b_7,56(a2)
809 sd c_1,0(a0) /* r[0]=c1; */
810 mflo t_1
811 mfhi t_2
812 daddu c_2,t_1
813 sltu AT,c_2,t_1
814 daddu t_2,AT
815 daddu c_3,t_2
816 sltu c_1,c_3,t_2
817 sd c_2,8(a0) /* r[1]=c2; */
818
819 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
820 mflo t_1
821 mfhi t_2
822 daddu c_3,t_1
823 sltu AT,c_3,t_1
824 daddu t_2,AT
825 daddu c_1,t_2
826 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
827 mflo t_1
828 mfhi t_2
829 daddu c_3,t_1
830 sltu AT,c_3,t_1
831 daddu t_2,AT
832 daddu c_1,t_2
833 sltu c_2,c_1,t_2
834 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
835 mflo t_1
836 mfhi t_2
837 daddu c_3,t_1
838 sltu AT,c_3,t_1
839 daddu t_2,AT
840 daddu c_1,t_2
841 sltu AT,c_1,t_2
842 daddu c_2,AT
843 sd c_3,16(a0) /* r[2]=c3; */
844
845 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
846 mflo t_1
847 mfhi t_2
848 daddu c_1,t_1
849 sltu AT,c_1,t_1
850 daddu t_2,AT
851 daddu c_2,t_2
852 sltu c_3,c_2,t_2
853 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
854 mflo t_1
855 mfhi t_2
856 daddu c_1,t_1
857 sltu AT,c_1,t_1
858 daddu t_2,AT
859 daddu c_2,t_2
860 sltu AT,c_2,t_2
861 daddu c_3,AT
862 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
863 mflo t_1
864 mfhi t_2
865 daddu c_1,t_1
866 sltu AT,c_1,t_1
867 daddu t_2,AT
868 daddu c_2,t_2
869 sltu AT,c_2,t_2
870 daddu c_3,AT
871 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
872 mflo t_1
873 mfhi t_2
874 daddu c_1,t_1
875 sltu AT,c_1,t_1
876 daddu t_2,AT
877 daddu c_2,t_2
878 sltu AT,c_2,t_2
879 daddu c_3,AT
880 sd c_1,24(a0) /* r[3]=c1; */
881
882 dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
883 mflo t_1
884 mfhi t_2
885 daddu c_2,t_1
886 sltu AT,c_2,t_1
887 daddu t_2,AT
888 daddu c_3,t_2
889 sltu c_1,c_3,t_2
890 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
891 mflo t_1
892 mfhi t_2
893 daddu c_2,t_1
894 sltu AT,c_2,t_1
895 daddu t_2,AT
896 daddu c_3,t_2
897 sltu AT,c_3,t_2
898 daddu c_1,AT
899 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
900 mflo t_1
901 mfhi t_2
902 daddu c_2,t_1
903 sltu AT,c_2,t_1
904 daddu t_2,AT
905 daddu c_3,t_2
906 sltu AT,c_3,t_2
907 daddu c_1,AT
908 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
909 mflo t_1
910 mfhi t_2
911 daddu c_2,t_1
912 sltu AT,c_2,t_1
913 daddu t_2,AT
914 daddu c_3,t_2
915 sltu AT,c_3,t_2
916 daddu c_1,AT
917 dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
918 mflo t_1
919 mfhi t_2
920 daddu c_2,t_1
921 sltu AT,c_2,t_1
922 daddu t_2,AT
923 daddu c_3,t_2
924 sltu AT,c_3,t_2
925 daddu c_1,AT
926 sd c_2,32(a0) /* r[4]=c2; */
927
928 dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
929 mflo t_1
930 mfhi t_2
931 daddu c_3,t_1
932 sltu AT,c_3,t_1
933 daddu t_2,AT
934 daddu c_1,t_2
935 sltu c_2,c_1,t_2
936 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
937 mflo t_1
938 mfhi t_2
939 daddu c_3,t_1
940 sltu AT,c_3,t_1
941 daddu t_2,AT
942 daddu c_1,t_2
943 sltu AT,c_1,t_2
944 daddu c_2,AT
945 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
946 mflo t_1
947 mfhi t_2
948 daddu c_3,t_1
949 sltu AT,c_3,t_1
950 daddu t_2,AT
951 daddu c_1,t_2
952 sltu AT,c_1,t_2
953 daddu c_2,AT
954 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
955 mflo t_1
956 mfhi t_2
957 daddu c_3,t_1
958 sltu AT,c_3,t_1
959 daddu t_2,AT
960 daddu c_1,t_2
961 sltu AT,c_1,t_2
962 daddu c_2,AT
963 dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
964 mflo t_1
965 mfhi t_2
966 daddu c_3,t_1
967 sltu AT,c_3,t_1
968 daddu t_2,AT
969 daddu c_1,t_2
970 sltu AT,c_1,t_2
971 daddu c_2,AT
972 dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
973 mflo t_1
974 mfhi t_2
975 daddu c_3,t_1
976 sltu AT,c_3,t_1
977 daddu t_2,AT
978 daddu c_1,t_2
979 sltu AT,c_1,t_2
980 daddu c_2,AT
981 sd c_3,40(a0) /* r[5]=c3; */
982
983 dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
984 mflo t_1
985 mfhi t_2
986 daddu c_1,t_1
987 sltu AT,c_1,t_1
988 daddu t_2,AT
989 daddu c_2,t_2
990 sltu c_3,c_2,t_2
991 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
992 mflo t_1
993 mfhi t_2
994 daddu c_1,t_1
995 sltu AT,c_1,t_1
996 daddu t_2,AT
997 daddu c_2,t_2
998 sltu AT,c_2,t_2
999 daddu c_3,AT
1000 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
1001 mflo t_1
1002 mfhi t_2
1003 daddu c_1,t_1
1004 sltu AT,c_1,t_1
1005 daddu t_2,AT
1006 daddu c_2,t_2
1007 sltu AT,c_2,t_2
1008 daddu c_3,AT
1009 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1010 mflo t_1
1011 mfhi t_2
1012 daddu c_1,t_1
1013 sltu AT,c_1,t_1
1014 daddu t_2,AT
1015 daddu c_2,t_2
1016 sltu AT,c_2,t_2
1017 daddu c_3,AT
1018 dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
1019 mflo t_1
1020 mfhi t_2
1021 daddu c_1,t_1
1022 sltu AT,c_1,t_1
1023 daddu t_2,AT
1024 daddu c_2,t_2
1025 sltu AT,c_2,t_2
1026 daddu c_3,AT
1027 dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
1028 mflo t_1
1029 mfhi t_2
1030 daddu c_1,t_1
1031 sltu AT,c_1,t_1
1032 daddu t_2,AT
1033 daddu c_2,t_2
1034 sltu AT,c_2,t_2
1035 daddu c_3,AT
1036 dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
1037 mflo t_1
1038 mfhi t_2
1039 daddu c_1,t_1
1040 sltu AT,c_1,t_1
1041 daddu t_2,AT
1042 daddu c_2,t_2
1043 sltu AT,c_2,t_2
1044 daddu c_3,AT
1045 sd c_1,48(a0) /* r[6]=c1; */
1046
1047 dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
1048 mflo t_1
1049 mfhi t_2
1050 daddu c_2,t_1
1051 sltu AT,c_2,t_1
1052 daddu t_2,AT
1053 daddu c_3,t_2
1054 sltu c_1,c_3,t_2
1055 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1056 mflo t_1
1057 mfhi t_2
1058 daddu c_2,t_1
1059 sltu AT,c_2,t_1
1060 daddu t_2,AT
1061 daddu c_3,t_2
1062 sltu AT,c_3,t_2
1063 daddu c_1,AT
1064 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1065 mflo t_1
1066 mfhi t_2
1067 daddu c_2,t_1
1068 sltu AT,c_2,t_1
1069 daddu t_2,AT
1070 daddu c_3,t_2
1071 sltu AT,c_3,t_2
1072 daddu c_1,AT
1073 dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
1074 mflo t_1
1075 mfhi t_2
1076 daddu c_2,t_1
1077 sltu AT,c_2,t_1
1078 daddu t_2,AT
1079 daddu c_3,t_2
1080 sltu AT,c_3,t_2
1081 daddu c_1,AT
1082 dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
1083 mflo t_1
1084 mfhi t_2
1085 daddu c_2,t_1
1086 sltu AT,c_2,t_1
1087 daddu t_2,AT
1088 daddu c_3,t_2
1089 sltu AT,c_3,t_2
1090 daddu c_1,AT
1091 dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
1092 mflo t_1
1093 mfhi t_2
1094 daddu c_2,t_1
1095 sltu AT,c_2,t_1
1096 daddu t_2,AT
1097 daddu c_3,t_2
1098 sltu AT,c_3,t_2
1099 daddu c_1,AT
1100 dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
1101 mflo t_1
1102 mfhi t_2
1103 daddu c_2,t_1
1104 sltu AT,c_2,t_1
1105 daddu t_2,AT
1106 daddu c_3,t_2
1107 sltu AT,c_3,t_2
1108 daddu c_1,AT
1109 dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
1110 mflo t_1
1111 mfhi t_2
1112 daddu c_2,t_1
1113 sltu AT,c_2,t_1
1114 daddu t_2,AT
1115 daddu c_3,t_2
1116 sltu AT,c_3,t_2
1117 daddu c_1,AT
1118 sd c_2,56(a0) /* r[7]=c2; */
1119
1120 dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
1121 mflo t_1
1122 mfhi t_2
1123 daddu c_3,t_1
1124 sltu AT,c_3,t_1
1125 daddu t_2,AT
1126 daddu c_1,t_2
1127 sltu c_2,c_1,t_2
1128 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1129 mflo t_1
1130 mfhi t_2
1131 daddu c_3,t_1
1132 sltu AT,c_3,t_1
1133 daddu t_2,AT
1134 daddu c_1,t_2
1135 sltu AT,c_1,t_2
1136 daddu c_2,AT
1137 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1138 mflo t_1
1139 mfhi t_2
1140 daddu c_3,t_1
1141 sltu AT,c_3,t_1
1142 daddu t_2,AT
1143 daddu c_1,t_2
1144 sltu AT,c_1,t_2
1145 daddu c_2,AT
1146 dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1147 mflo t_1
1148 mfhi t_2
1149 daddu c_3,t_1
1150 sltu AT,c_3,t_1
1151 daddu t_2,AT
1152 daddu c_1,t_2
1153 sltu AT,c_1,t_2
1154 daddu c_2,AT
1155 dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
1156 mflo t_1
1157 mfhi t_2
1158 daddu c_3,t_1
1159 sltu AT,c_3,t_1
1160 daddu t_2,AT
1161 daddu c_1,t_2
1162 sltu AT,c_1,t_2
1163 daddu c_2,AT
1164 dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
1165 mflo t_1
1166 mfhi t_2
1167 daddu c_3,t_1
1168 sltu AT,c_3,t_1
1169 daddu t_2,AT
1170 daddu c_1,t_2
1171 sltu AT,c_1,t_2
1172 daddu c_2,AT
1173 dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
1174 mflo t_1
1175 mfhi t_2
1176 daddu c_3,t_1
1177 sltu AT,c_3,t_1
1178 daddu t_2,AT
1179 daddu c_1,t_2
1180 sltu AT,c_1,t_2
1181 daddu c_2,AT
1182 sd c_3,64(a0) /* r[8]=c3; */
1183
1184 dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
1185 mflo t_1
1186 mfhi t_2
1187 daddu c_1,t_1
1188 sltu AT,c_1,t_1
1189 daddu t_2,AT
1190 daddu c_2,t_2
1191 sltu c_3,c_2,t_2
1192 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1193 mflo t_1
1194 mfhi t_2
1195 daddu c_1,t_1
1196 sltu AT,c_1,t_1
1197 daddu t_2,AT
1198 daddu c_2,t_2
1199 sltu AT,c_2,t_2
1200 daddu c_3,AT
1201 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1202 mflo t_1
1203 mfhi t_2
1204 daddu c_1,t_1
1205 sltu AT,c_1,t_1
1206 daddu t_2,AT
1207 daddu c_2,t_2
1208 sltu AT,c_2,t_2
1209 daddu c_3,AT
1210 dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
1211 mflo t_1
1212 mfhi t_2
1213 daddu c_1,t_1
1214 sltu AT,c_1,t_1
1215 daddu t_2,AT
1216 daddu c_2,t_2
1217 sltu AT,c_2,t_2
1218 daddu c_3,AT
1219 dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
1220 mflo t_1
1221 mfhi t_2
1222 daddu c_1,t_1
1223 sltu AT,c_1,t_1
1224 daddu t_2,AT
1225 daddu c_2,t_2
1226 sltu AT,c_2,t_2
1227 daddu c_3,AT
1228 dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
1229 mflo t_1
1230 mfhi t_2
1231 daddu c_1,t_1
1232 sltu AT,c_1,t_1
1233 daddu t_2,AT
1234 daddu c_2,t_2
1235 sltu AT,c_2,t_2
1236 daddu c_3,AT
1237 sd c_1,72(a0) /* r[9]=c1; */
1238
1239 dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
1240 mflo t_1
1241 mfhi t_2
1242 daddu c_2,t_1
1243 sltu AT,c_2,t_1
1244 daddu t_2,AT
1245 daddu c_3,t_2
1246 sltu c_1,c_3,t_2
1247 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1248 mflo t_1
1249 mfhi t_2
1250 daddu c_2,t_1
1251 sltu AT,c_2,t_1
1252 daddu t_2,AT
1253 daddu c_3,t_2
1254 sltu AT,c_3,t_2
1255 daddu c_1,AT
1256 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1257 mflo t_1
1258 mfhi t_2
1259 daddu c_2,t_1
1260 sltu AT,c_2,t_1
1261 daddu t_2,AT
1262 daddu c_3,t_2
1263 sltu AT,c_3,t_2
1264 daddu c_1,AT
1265 dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
1266 mflo t_1
1267 mfhi t_2
1268 daddu c_2,t_1
1269 sltu AT,c_2,t_1
1270 daddu t_2,AT
1271 daddu c_3,t_2
1272 sltu AT,c_3,t_2
1273 daddu c_1,AT
1274 dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
1275 mflo t_1
1276 mfhi t_2
1277 daddu c_2,t_1
1278 sltu AT,c_2,t_1
1279 daddu t_2,AT
1280 daddu c_3,t_2
1281 sltu AT,c_3,t_2
1282 daddu c_1,AT
1283 sd c_2,80(a0) /* r[10]=c2; */
1284
1285 dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
1286 mflo t_1
1287 mfhi t_2
1288 daddu c_3,t_1
1289 sltu AT,c_3,t_1
1290 daddu t_2,AT
1291 daddu c_1,t_2
1292 sltu c_2,c_1,t_2
1293 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
1294 mflo t_1
1295 mfhi t_2
1296 daddu c_3,t_1
1297 sltu AT,c_3,t_1
1298 daddu t_2,AT
1299 daddu c_1,t_2
1300 sltu AT,c_1,t_2
1301 daddu c_2,AT
1302 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
1303 mflo t_1
1304 mfhi t_2
1305 daddu c_3,t_1
1306 sltu AT,c_3,t_1
1307 daddu t_2,AT
1308 daddu c_1,t_2
1309 sltu AT,c_1,t_2
1310 daddu c_2,AT
1311 dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
1312 mflo t_1
1313 mfhi t_2
1314 daddu c_3,t_1
1315 sltu AT,c_3,t_1
1316 daddu t_2,AT
1317 daddu c_1,t_2
1318 sltu AT,c_1,t_2
1319 daddu c_2,AT
1320 sd c_3,88(a0) /* r[11]=c3; */
1321
1322 dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
1323 mflo t_1
1324 mfhi t_2
1325 daddu c_1,t_1
1326 sltu AT,c_1,t_1
1327 daddu t_2,AT
1328 daddu c_2,t_2
1329 sltu c_3,c_2,t_2
1330 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1331 mflo t_1
1332 mfhi t_2
1333 daddu c_1,t_1
1334 sltu AT,c_1,t_1
1335 daddu t_2,AT
1336 daddu c_2,t_2
1337 sltu AT,c_2,t_2
1338 daddu c_3,AT
1339 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
1340 mflo t_1
1341 mfhi t_2
1342 daddu c_1,t_1
1343 sltu AT,c_1,t_1
1344 daddu t_2,AT
1345 daddu c_2,t_2
1346 sltu AT,c_2,t_2
1347 daddu c_3,AT
1348 sd c_1,96(a0) /* r[12]=c1; */
1349
1350 dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
1351 mflo t_1
1352 mfhi t_2
1353 daddu c_2,t_1
1354 sltu AT,c_2,t_1
1355 daddu t_2,AT
1356 daddu c_3,t_2
1357 sltu c_1,c_3,t_2
1358 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
1359 mflo t_1
1360 mfhi t_2
1361 daddu c_2,t_1
1362 sltu AT,c_2,t_1
1363 daddu t_2,AT
1364 daddu c_3,t_2
1365 sltu AT,c_3,t_2
1366 daddu c_1,AT
1367 sd c_2,104(a0) /* r[13]=c2; */
1368
1369 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
1370 ld s0,0(sp)
1371 ld s1,8(sp)
1372 ld s2,16(sp)
1373 ld s3,24(sp)
1374 ld s4,32(sp)
1375 ld s5,40(sp)
1376 mflo t_1
1377 mfhi t_2
1378 daddu c_3,t_1
1379 sltu AT,c_3,t_1
1380 daddu t_2,AT
1381 daddu c_1,t_2
1382 sd c_3,112(a0) /* r[14]=c3; */
1383 sd c_1,120(a0) /* r[15]=c1; */
1384
1385 PTR_ADD sp,FRAME_SIZE
1386
1387 jr ra
1388 END(bn_mul_comba8)
1389
1390 .align 5
1391 LEAF(bn_mul_comba4)
1392 .set reorder
1393 ld a_0,0(a1)
1394 ld b_0,0(a2)
1395 ld a_1,8(a1)
1396 ld a_2,16(a1)
1397 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1398 ld a_3,24(a1)
1399 ld b_1,8(a2)
1400 ld b_2,16(a2)
1401 ld b_3,24(a2)
1402 mflo c_1
1403 mfhi c_2
1404 sd c_1,0(a0)
1405
1406 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
1407 mflo t_1
1408 mfhi t_2
1409 daddu c_2,t_1
1410 sltu AT,c_2,t_1
1411 daddu c_3,t_2,AT
1412 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
1413 mflo t_1
1414 mfhi t_2
1415 daddu c_2,t_1
1416 sltu AT,c_2,t_1
1417 daddu t_2,AT
1418 daddu c_3,t_2
1419 sltu c_1,c_3,t_2
1420 sd c_2,8(a0)
1421
1422 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
1423 mflo t_1
1424 mfhi t_2
1425 daddu c_3,t_1
1426 sltu AT,c_3,t_1
1427 daddu t_2,AT
1428 daddu c_1,t_2
1429 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1430 mflo t_1
1431 mfhi t_2
1432 daddu c_3,t_1
1433 sltu AT,c_3,t_1
1434 daddu t_2,AT
1435 daddu c_1,t_2
1436 sltu c_2,c_1,t_2
1437 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
1438 mflo t_1
1439 mfhi t_2
1440 daddu c_3,t_1
1441 sltu AT,c_3,t_1
1442 daddu t_2,AT
1443 daddu c_1,t_2
1444 sltu AT,c_1,t_2
1445 daddu c_2,AT
1446 sd c_3,16(a0)
1447
1448 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
1449 mflo t_1
1450 mfhi t_2
1451 daddu c_1,t_1
1452 sltu AT,c_1,t_1
1453 daddu t_2,AT
1454 daddu c_2,t_2
1455 sltu c_3,c_2,t_2
1456 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1457 mflo t_1
1458 mfhi t_2
1459 daddu c_1,t_1
1460 sltu AT,c_1,t_1
1461 daddu t_2,AT
1462 daddu c_2,t_2
1463 sltu AT,c_2,t_2
1464 daddu c_3,AT
1465 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1466 mflo t_1
1467 mfhi t_2
1468 daddu c_1,t_1
1469 sltu AT,c_1,t_1
1470 daddu t_2,AT
1471 daddu c_2,t_2
1472 sltu AT,c_2,t_2
1473 daddu c_3,AT
1474 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
1475 mflo t_1
1476 mfhi t_2
1477 daddu c_1,t_1
1478 sltu AT,c_1,t_1
1479 daddu t_2,AT
1480 daddu c_2,t_2
1481 sltu AT,c_2,t_2
1482 daddu c_3,AT
1483 sd c_1,24(a0)
1484
1485 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
1486 mflo t_1
1487 mfhi t_2
1488 daddu c_2,t_1
1489 sltu AT,c_2,t_1
1490 daddu t_2,AT
1491 daddu c_3,t_2
1492 sltu c_1,c_3,t_2
1493 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1494 mflo t_1
1495 mfhi t_2
1496 daddu c_2,t_1
1497 sltu AT,c_2,t_1
1498 daddu t_2,AT
1499 daddu c_3,t_2
1500 sltu AT,c_3,t_2
1501 daddu c_1,AT
1502 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1503 mflo t_1
1504 mfhi t_2
1505 daddu c_2,t_1
1506 sltu AT,c_2,t_1
1507 daddu t_2,AT
1508 daddu c_3,t_2
1509 sltu AT,c_3,t_2
1510 daddu c_1,AT
1511 sd c_2,32(a0)
1512
1513 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
1514 mflo t_1
1515 mfhi t_2
1516 daddu c_3,t_1
1517 sltu AT,c_3,t_1
1518 daddu t_2,AT
1519 daddu c_1,t_2
1520 sltu c_2,c_1,t_2
1521 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1522 mflo t_1
1523 mfhi t_2
1524 daddu c_3,t_1
1525 sltu AT,c_3,t_1
1526 daddu t_2,AT
1527 daddu c_1,t_2
1528 sltu AT,c_1,t_2
1529 daddu c_2,AT
1530 sd c_3,40(a0)
1531
1532 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1533 mflo t_1
1534 mfhi t_2
1535 daddu c_1,t_1
1536 sltu AT,c_1,t_1
1537 daddu t_2,AT
1538 daddu c_2,t_2
1539 sd c_1,48(a0)
1540 sd c_2,56(a0)
1541
1542 jr ra
1543 END(bn_mul_comba4)
1544
1545 #undef a_4
1546 #undef a_5
1547 #undef a_6
1548 #undef a_7
1549 #define a_4 b_0
1550 #define a_5 b_1
1551 #define a_6 b_2
1552 #define a_7 b_3
1553
1554 .align 5
1555 LEAF(bn_sqr_comba8)
1556 .set reorder
1557 ld a_0,0(a1)
1558 ld a_1,8(a1)
1559 ld a_2,16(a1)
1560 ld a_3,24(a1)
1561
1562 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1563 ld a_4,32(a1)
1564 ld a_5,40(a1)
1565 ld a_6,48(a1)
1566 ld a_7,56(a1)
1567 mflo c_1
1568 mfhi c_2
1569 sd c_1,0(a0)
1570
1571 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
1572 mflo t_1
1573 mfhi t_2
1574 slt c_1,t_2,zero
1575 dsll t_2,1
1576 slt a2,t_1,zero
1577 daddu t_2,a2
1578 dsll t_1,1
1579 daddu c_2,t_1
1580 sltu AT,c_2,t_1
1581 daddu c_3,t_2,AT
1582 sd c_2,8(a0)
1583
1584 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
1585 mflo t_1
1586 mfhi t_2
1587 daddu c_3,t_1
1588 sltu AT,c_3,t_1
1589 daddu c_3,t_1
1590 daddu AT,t_2
1591 sltu t_1,c_3,t_1
1592 daddu c_1,AT
1593 daddu t_2,t_1
1594 sltu c_2,c_1,AT
1595 daddu c_1,t_2
1596 sltu t_2,c_1,t_2
1597 daddu c_2,t_2
1598 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1599 mflo t_1
1600 mfhi t_2
1601 daddu c_3,t_1
1602 sltu AT,c_3,t_1
1603 daddu t_2,AT
1604 daddu c_1,t_2
1605 sltu AT,c_1,t_2
1606 daddu c_2,AT
1607 sd c_3,16(a0)
1608
1609 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
1610 mflo t_1
1611 mfhi t_2
1612 daddu c_1,t_1
1613 sltu AT,c_1,t_1
1614 daddu c_1,t_1
1615 daddu AT,t_2
1616 sltu t_1,c_1,t_1
1617 daddu c_2,AT
1618 daddu t_2,t_1
1619 sltu c_3,c_2,AT
1620 daddu c_2,t_2
1621 sltu t_2,c_2,t_2
1622 daddu c_3,t_2
1623 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
1624 mflo t_1
1625 mfhi t_2
1626 daddu c_1,t_1
1627 sltu AT,c_1,t_1
1628 daddu c_1,t_1
1629 daddu AT,t_2
1630 sltu t_1,c_1,t_1
1631 daddu c_2,AT
1632 daddu t_2,t_1
1633 sltu AT,c_2,AT
1634 daddu c_2,t_2
1635 daddu c_3,AT
1636 sltu t_2,c_2,t_2
1637 daddu c_3,t_2
1638 sd c_1,24(a0)
1639
1640 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
1641 mflo t_1
1642 mfhi t_2
1643 daddu c_2,t_1
1644 sltu AT,c_2,t_1
1645 daddu c_2,t_1
1646 daddu AT,t_2
1647 sltu t_1,c_2,t_1
1648 daddu c_3,AT
1649 daddu t_2,t_1
1650 sltu c_1,c_3,AT
1651 daddu c_3,t_2
1652 sltu t_2,c_3,t_2
1653 daddu c_1,t_2
1654 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
1655 mflo t_1
1656 mfhi t_2
1657 daddu c_2,t_1
1658 sltu AT,c_2,t_1
1659 daddu c_2,t_1
1660 daddu AT,t_2
1661 sltu t_1,c_2,t_1
1662 daddu c_3,AT
1663 daddu t_2,t_1
1664 sltu AT,c_3,AT
1665 daddu c_3,t_2
1666 daddu c_1,AT
1667 sltu t_2,c_3,t_2
1668 daddu c_1,t_2
1669 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1670 mflo t_1
1671 mfhi t_2
1672 daddu c_2,t_1
1673 sltu AT,c_2,t_1
1674 daddu t_2,AT
1675 daddu c_3,t_2
1676 sltu AT,c_3,t_2
1677 daddu c_1,AT
1678 sd c_2,32(a0)
1679
1680 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
1681 mflo t_1
1682 mfhi t_2
1683 daddu c_3,t_1
1684 sltu AT,c_3,t_1
1685 daddu c_3,t_1
1686 daddu AT,t_2
1687 sltu t_1,c_3,t_1
1688 daddu c_1,AT
1689 daddu t_2,t_1
1690 sltu c_2,c_1,AT
1691 daddu c_1,t_2
1692 sltu t_2,c_1,t_2
1693 daddu c_2,t_2
1694 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
1695 mflo t_1
1696 mfhi t_2
1697 daddu c_3,t_1
1698 sltu AT,c_3,t_1
1699 daddu c_3,t_1
1700 daddu AT,t_2
1701 sltu t_1,c_3,t_1
1702 daddu c_1,AT
1703 daddu t_2,t_1
1704 sltu AT,c_1,AT
1705 daddu c_1,t_2
1706 daddu c_2,AT
1707 sltu t_2,c_1,t_2
1708 daddu c_2,t_2
1709 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
1710 mflo t_1
1711 mfhi t_2
1712 daddu c_3,t_1
1713 sltu AT,c_3,t_1
1714 daddu c_3,t_1
1715 daddu AT,t_2
1716 sltu t_1,c_3,t_1
1717 daddu c_1,AT
1718 daddu t_2,t_1
1719 sltu AT,c_1,AT
1720 daddu c_1,t_2
1721 daddu c_2,AT
1722 sltu t_2,c_1,t_2
1723 daddu c_2,t_2
1724 sd c_3,40(a0)
1725
1726 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
1727 mflo t_1
1728 mfhi t_2
1729 daddu c_1,t_1
1730 sltu AT,c_1,t_1
1731 daddu c_1,t_1
1732 daddu AT,t_2
1733 sltu t_1,c_1,t_1
1734 daddu c_2,AT
1735 daddu t_2,t_1
1736 sltu c_3,c_2,AT
1737 daddu c_2,t_2
1738 sltu t_2,c_2,t_2
1739 daddu c_3,t_2
1740 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
1741 mflo t_1
1742 mfhi t_2
1743 daddu c_1,t_1
1744 sltu AT,c_1,t_1
1745 daddu c_1,t_1
1746 daddu AT,t_2
1747 sltu t_1,c_1,t_1
1748 daddu c_2,AT
1749 daddu t_2,t_1
1750 sltu AT,c_2,AT
1751 daddu c_2,t_2
1752 daddu c_3,AT
1753 sltu t_2,c_2,t_2
1754 daddu c_3,t_2
1755 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
1756 mflo t_1
1757 mfhi t_2
1758 daddu c_1,t_1
1759 sltu AT,c_1,t_1
1760 daddu c_1,t_1
1761 daddu AT,t_2
1762 sltu t_1,c_1,t_1
1763 daddu c_2,AT
1764 daddu t_2,t_1
1765 sltu AT,c_2,AT
1766 daddu c_2,t_2
1767 daddu c_3,AT
1768 sltu t_2,c_2,t_2
1769 daddu c_3,t_2
1770 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1771 mflo t_1
1772 mfhi t_2
1773 daddu c_1,t_1
1774 sltu AT,c_1,t_1
1775 daddu t_2,AT
1776 daddu c_2,t_2
1777 sltu AT,c_2,t_2
1778 daddu c_3,AT
1779 sd c_1,48(a0)
1780
1781 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
1782 mflo t_1
1783 mfhi t_2
1784 daddu c_2,t_1
1785 sltu AT,c_2,t_1
1786 daddu c_2,t_1
1787 daddu AT,t_2
1788 sltu t_1,c_2,t_1
1789 daddu c_3,AT
1790 daddu t_2,t_1
1791 sltu c_1,c_3,AT
1792 daddu c_3,t_2
1793 sltu t_2,c_3,t_2
1794 daddu c_1,t_2
1795 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
1796 mflo t_1
1797 mfhi t_2
1798 daddu c_2,t_1
1799 sltu AT,c_2,t_1
1800 daddu c_2,t_1
1801 daddu AT,t_2
1802 sltu t_1,c_2,t_1
1803 daddu c_3,AT
1804 daddu t_2,t_1
1805 sltu AT,c_3,AT
1806 daddu c_3,t_2
1807 daddu c_1,AT
1808 sltu t_2,c_3,t_2
1809 daddu c_1,t_2
1810 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
1811 mflo t_1
1812 mfhi t_2
1813 daddu c_2,t_1
1814 sltu AT,c_2,t_1
1815 daddu c_2,t_1
1816 daddu AT,t_2
1817 sltu t_1,c_2,t_1
1818 daddu c_3,AT
1819 daddu t_2,t_1
1820 sltu AT,c_3,AT
1821 daddu c_3,t_2
1822 daddu c_1,AT
1823 sltu t_2,c_3,t_2
1824 daddu c_1,t_2
1825 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
1826 mflo t_1
1827 mfhi t_2
1828 daddu c_2,t_1
1829 sltu AT,c_2,t_1
1830 daddu c_2,t_1
1831 daddu AT,t_2
1832 sltu t_1,c_2,t_1
1833 daddu c_3,AT
1834 daddu t_2,t_1
1835 sltu AT,c_3,AT
1836 daddu c_3,t_2
1837 daddu c_1,AT
1838 sltu t_2,c_3,t_2
1839 daddu c_1,t_2
1840 sd c_2,56(a0)
1841
1842 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
1843 mflo t_1
1844 mfhi t_2
1845 daddu c_3,t_1
1846 sltu AT,c_3,t_1
1847 daddu c_3,t_1
1848 daddu AT,t_2
1849 sltu t_1,c_3,t_1
1850 daddu c_1,AT
1851 daddu t_2,t_1
1852 sltu c_2,c_1,AT
1853 daddu c_1,t_2
1854 sltu t_2,c_1,t_2
1855 daddu c_2,t_2
1856 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
1857 mflo t_1
1858 mfhi t_2
1859 daddu c_3,t_1
1860 sltu AT,c_3,t_1
1861 daddu c_3,t_1
1862 daddu AT,t_2
1863 sltu t_1,c_3,t_1
1864 daddu c_1,AT
1865 daddu t_2,t_1
1866 sltu AT,c_1,AT
1867 daddu c_1,t_2
1868 daddu c_2,AT
1869 sltu t_2,c_1,t_2
1870 daddu c_2,t_2
1871 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
1872 mflo t_1
1873 mfhi t_2
1874 daddu c_3,t_1
1875 sltu AT,c_3,t_1
1876 daddu c_3,t_1
1877 daddu AT,t_2
1878 sltu t_1,c_3,t_1
1879 daddu c_1,AT
1880 daddu t_2,t_1
1881 sltu AT,c_1,AT
1882 daddu c_1,t_2
1883 daddu c_2,AT
1884 sltu t_2,c_1,t_2
1885 daddu c_2,t_2
1886 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1887 mflo t_1
1888 mfhi t_2
1889 daddu c_3,t_1
1890 sltu AT,c_3,t_1
1891 daddu t_2,AT
1892 daddu c_1,t_2
1893 sltu AT,c_1,t_2
1894 daddu c_2,AT
1895 sd c_3,64(a0)
1896
1897 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
1898 mflo t_1
1899 mfhi t_2
1900 daddu c_1,t_1
1901 sltu AT,c_1,t_1
1902 daddu c_1,t_1
1903 daddu AT,t_2
1904 sltu t_1,c_1,t_1
1905 daddu c_2,AT
1906 daddu t_2,t_1
1907 sltu c_3,c_2,AT
1908 daddu c_2,t_2
1909 sltu t_2,c_2,t_2
1910 daddu c_3,t_2
1911 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
1912 mflo t_1
1913 mfhi t_2
1914 daddu c_1,t_1
1915 sltu AT,c_1,t_1
1916 daddu c_1,t_1
1917 daddu AT,t_2
1918 sltu t_1,c_1,t_1
1919 daddu c_2,AT
1920 daddu t_2,t_1
1921 sltu AT,c_2,AT
1922 daddu c_2,t_2
1923 daddu c_3,AT
1924 sltu t_2,c_2,t_2
1925 daddu c_3,t_2
1926 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
1927 mflo t_1
1928 mfhi t_2
1929 daddu c_1,t_1
1930 sltu AT,c_1,t_1
1931 daddu c_1,t_1
1932 daddu AT,t_2
1933 sltu t_1,c_1,t_1
1934 daddu c_2,AT
1935 daddu t_2,t_1
1936 sltu AT,c_2,AT
1937 daddu c_2,t_2
1938 daddu c_3,AT
1939 sltu t_2,c_2,t_2
1940 daddu c_3,t_2
1941 sd c_1,72(a0)
1942
1943 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
1944 mflo t_1
1945 mfhi t_2
1946 daddu c_2,t_1
1947 sltu AT,c_2,t_1
1948 daddu c_2,t_1
1949 daddu AT,t_2
1950 sltu t_1,c_2,t_1
1951 daddu c_3,AT
1952 daddu t_2,t_1
1953 sltu c_1,c_3,AT
1954 daddu c_3,t_2
1955 sltu t_2,c_3,t_2
1956 daddu c_1,t_2
1957 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
1958 mflo t_1
1959 mfhi t_2
1960 daddu c_2,t_1
1961 sltu AT,c_2,t_1
1962 daddu c_2,t_1
1963 daddu AT,t_2
1964 sltu t_1,c_2,t_1
1965 daddu c_3,AT
1966 daddu t_2,t_1
1967 sltu AT,c_3,AT
1968 daddu c_3,t_2
1969 daddu c_1,AT
1970 sltu t_2,c_3,t_2
1971 daddu c_1,t_2
1972 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1973 mflo t_1
1974 mfhi t_2
1975 daddu c_2,t_1
1976 sltu AT,c_2,t_1
1977 daddu t_2,AT
1978 daddu c_3,t_2
1979 sltu AT,c_3,t_2
1980 daddu c_1,AT
1981 sd c_2,80(a0)
1982
1983 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
1984 mflo t_1
1985 mfhi t_2
1986 daddu c_3,t_1
1987 sltu AT,c_3,t_1
1988 daddu c_3,t_1
1989 daddu AT,t_2
1990 sltu t_1,c_3,t_1
1991 daddu c_1,AT
1992 daddu t_2,t_1
1993 sltu c_2,c_1,AT
1994 daddu c_1,t_2
1995 sltu t_2,c_1,t_2
1996 daddu c_2,t_2
1997 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
1998 mflo t_1
1999 mfhi t_2
2000 daddu c_3,t_1
2001 sltu AT,c_3,t_1
2002 daddu c_3,t_1
2003 daddu AT,t_2
2004 sltu t_1,c_3,t_1
2005 daddu c_1,AT
2006 daddu t_2,t_1
2007 sltu AT,c_1,AT
2008 daddu c_1,t_2
2009 daddu c_2,AT
2010 sltu t_2,c_1,t_2
2011 daddu c_2,t_2
2012 sd c_3,88(a0)
2013
2014 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
2015 mflo t_1
2016 mfhi t_2
2017 daddu c_1,t_1
2018 sltu AT,c_1,t_1
2019 daddu c_1,t_1
2020 daddu AT,t_2
2021 sltu t_1,c_1,t_1
2022 daddu c_2,AT
2023 daddu t_2,t_1
2024 sltu c_3,c_2,AT
2025 daddu c_2,t_2
2026 sltu t_2,c_2,t_2
2027 daddu c_3,t_2
2028 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
2029 mflo t_1
2030 mfhi t_2
2031 daddu c_1,t_1
2032 sltu AT,c_1,t_1
2033 daddu t_2,AT
2034 daddu c_2,t_2
2035 sltu AT,c_2,t_2
2036 daddu c_3,AT
2037 sd c_1,96(a0)
2038
2039 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
2040 mflo t_1
2041 mfhi t_2
2042 daddu c_2,t_1
2043 sltu AT,c_2,t_1
2044 daddu c_2,t_1
2045 daddu AT,t_2
2046 sltu t_1,c_2,t_1
2047 daddu c_3,AT
2048 daddu t_2,t_1
2049 sltu c_1,c_3,AT
2050 daddu c_3,t_2
2051 sltu t_2,c_3,t_2
2052 daddu c_1,t_2
2053 sd c_2,104(a0)
2054
2055 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2056 mflo t_1
2057 mfhi t_2
2058 daddu c_3,t_1
2059 sltu AT,c_3,t_1
2060 daddu t_2,AT
2061 daddu c_1,t_2
2062 sd c_3,112(a0)
2063 sd c_1,120(a0)
2064
2065 jr ra
2066 END(bn_sqr_comba8)
2067
2068 .align 5
2069 LEAF(bn_sqr_comba4)
2070 .set reorder
2071 ld a_0,0(a1)
2072 ld a_1,8(a1)
2073 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2074 ld a_2,16(a1)
2075 ld a_3,24(a1)
2076 mflo c_1
2077 mfhi c_2
2078 sd c_1,0(a0)
2079
2080 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2081 mflo t_1
2082 mfhi t_2
2083 slt c_1,t_2,zero
2084 dsll t_2,1
2085 slt a2,t_1,zero
2086 daddu t_2,a2
2087 dsll t_1,1
2088 daddu c_2,t_1
2089 sltu AT,c_2,t_1
2090 daddu c_3,t_2,AT
2091 sd c_2,8(a0)
2092
2093 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2094 mflo t_1
2095 mfhi t_2
2096 daddu c_3,t_1
2097 sltu AT,c_3,t_1
2098 daddu c_3,t_1
2099 daddu AT,t_2
2100 sltu t_1,c_3,t_1
2101 daddu c_1,AT
2102 daddu t_2,t_1
2103 sltu c_2,c_1,AT
2104 daddu c_1,t_2
2105 sltu t_2,c_1,t_2
2106 daddu c_2,t_2
2107 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2108 mflo t_1
2109 mfhi t_2
2110 daddu c_3,t_1
2111 sltu AT,c_3,t_1
2112 daddu t_2,AT
2113 daddu c_1,t_2
2114 sltu AT,c_1,t_2
2115 daddu c_2,AT
2116 sd c_3,16(a0)
2117
2118 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2119 mflo t_1
2120 mfhi t_2
2121 daddu c_1,t_1
2122 sltu AT,c_1,t_1
2123 daddu c_1,t_1
2124 daddu AT,t_2
2125 sltu t_1,c_1,t_1
2126 daddu c_2,AT
2127 daddu t_2,t_1
2128 sltu c_3,c_2,AT
2129 daddu c_2,t_2
2130 sltu t_2,c_2,t_2
2131 daddu c_3,t_2
2132 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2133 mflo t_1
2134 mfhi t_2
2135 daddu c_1,t_1
2136 sltu AT,c_1,t_1
2137 daddu c_1,t_1
2138 daddu AT,t_2
2139 sltu t_1,c_1,t_1
2140 daddu c_2,AT
2141 daddu t_2,t_1
2142 sltu AT,c_2,AT
2143 daddu c_2,t_2
2144 daddu c_3,AT
2145 sltu t_2,c_2,t_2
2146 daddu c_3,t_2
2147 sd c_1,24(a0)
2148
2149 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2150 mflo t_1
2151 mfhi t_2
2152 daddu c_2,t_1
2153 sltu AT,c_2,t_1
2154 daddu c_2,t_1
2155 daddu AT,t_2
2156 sltu t_1,c_2,t_1
2157 daddu c_3,AT
2158 daddu t_2,t_1
2159 sltu c_1,c_3,AT
2160 daddu c_3,t_2
2161 sltu t_2,c_3,t_2
2162 daddu c_1,t_2
2163 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2164 mflo t_1
2165 mfhi t_2
2166 daddu c_2,t_1
2167 sltu AT,c_2,t_1
2168 daddu t_2,AT
2169 daddu c_3,t_2
2170 sltu AT,c_3,t_2
2171 daddu c_1,AT
2172 sd c_2,32(a0)
2173
2174 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2175 mflo t_1
2176 mfhi t_2
2177 daddu c_3,t_1
2178 sltu AT,c_3,t_1
2179 daddu c_3,t_1
2180 daddu AT,t_2
2181 sltu t_1,c_3,t_1
2182 daddu c_1,AT
2183 daddu t_2,t_1
2184 sltu c_2,c_1,AT
2185 daddu c_1,t_2
2186 sltu t_2,c_1,t_2
2187 daddu c_2,t_2
2188 sd c_3,40(a0)
2189
2190 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2191 mflo t_1
2192 mfhi t_2
2193 daddu c_1,t_1
2194 sltu AT,c_1,t_1
2195 daddu t_2,AT
2196 daddu c_2,t_2
2197 sd c_1,48(a0)
2198 sd c_2,56(a0)
2199
2200 jr ra
2201 END(bn_sqr_comba4)