1#include "mips_arch.h"
2
3#if defined(_MIPS_ARCH_MIPS64R6)
4# define ddivu(rs,rt)
5# define mfqt(rd,rs,rt)       ddivu     rd,rs,rt
6# define mfrm(rd,rs,rt)       dmodu     rd,rs,rt
7#elif defined(_MIPS_ARCH_MIPS32R6)
8# define divu(rs,rt)
9# define mfqt(rd,rs,rt)       divu      rd,rs,rt
10# define mfrm(rd,rs,rt)       modu      rd,rs,rt
11#else
12# define ddivu(rs,rt)         ddivu     $0,rs,rt
13# define mfqt(rd,rs,rt)       mflo      rd
14# define mfrm(rd,rs,rt)       mfhi      rd
15#endif
16
17.rdata
18.asciiz   "mips3.s, Version 1.2"
19.asciiz   "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
20
21.text
22.set      noat
23
24.align    5
25.globl    bn_mul_add_words
26.ent      bn_mul_add_words
27bn_mul_add_words:
28          .set      noreorder
29          bgtz      $6,bn_mul_add_words_internal
30          move      $2,$0
31          jr        $31
32          move      $4,$2
33.end      bn_mul_add_words
34
35.align    5
36.ent      bn_mul_add_words_internal
37bn_mul_add_words_internal:
38          .set      reorder
39          li        $3,-4
40          and       $8,$6,$3
41          beqz      $8,.L_bn_mul_add_words_tail
42
43.L_bn_mul_add_words_loop:
44          ld        $12,0($5)
45          dmultu    ($12,$7)
46          ld        $13,0($4)
47          ld        $14,8($5)
48          ld        $15,8($4)
49          ld        $8,2*8($5)
50          ld        $9,2*8($4)
51          daddu     $13,$2
52          sltu      $2,$13,$2 # All manuals say it "compares 32-bit
53                                        # values", but it seems to work fine
54                                        # even on 64-bit registers.
55          mflo      ($1,$12,$7)
56          mfhi      ($12,$12,$7)
57          daddu     $13,$1
58          daddu     $2,$12
59           dmultu   ($14,$7)
60          sltu      $1,$13,$1
61          sd        $13,0($4)
62          daddu     $2,$1
63
64          ld        $10,3*8($5)
65          ld        $11,3*8($4)
66          daddu     $15,$2
67          sltu      $2,$15,$2
68          mflo      ($1,$14,$7)
69          mfhi      ($14,$14,$7)
70          daddu     $15,$1
71          daddu     $2,$14
72           dmultu   ($8,$7)
73          sltu      $1,$15,$1
74          sd        $15,8($4)
75          daddu     $2,$1
76
77          subu      $6,4
78          daddu $4,4*8
79          daddu $5,4*8
80          daddu     $9,$2
81          sltu      $2,$9,$2
82          mflo      ($1,$8,$7)
83          mfhi      ($8,$8,$7)
84          daddu     $9,$1
85          daddu     $2,$8
86           dmultu   ($10,$7)
87          sltu      $1,$9,$1
88          sd        $9,-2*8($4)
89          daddu     $2,$1
90
91
92          and       $8,$6,$3
93          daddu     $11,$2
94          sltu      $2,$11,$2
95          mflo      ($1,$10,$7)
96          mfhi      ($10,$10,$7)
97          daddu     $11,$1
98          daddu     $2,$10
99          sltu      $1,$11,$1
100          sd        $11,-8($4)
101          .set      noreorder
102          bgtz      $8,.L_bn_mul_add_words_loop
103          daddu     $2,$1
104
105          beqz      $6,.L_bn_mul_add_words_return
106          nop
107
108.L_bn_mul_add_words_tail:
109          .set      reorder
110          ld        $12,0($5)
111          dmultu    ($12,$7)
112          ld        $13,0($4)
113          subu      $6,1
114          daddu     $13,$2
115          sltu      $2,$13,$2
116          mflo      ($1,$12,$7)
117          mfhi      ($12,$12,$7)
118          daddu     $13,$1
119          daddu     $2,$12
120          sltu      $1,$13,$1
121          sd        $13,0($4)
122          daddu     $2,$1
123          beqz      $6,.L_bn_mul_add_words_return
124
125          ld        $12,8($5)
126          dmultu    ($12,$7)
127          ld        $13,8($4)
128          subu      $6,1
129          daddu     $13,$2
130          sltu      $2,$13,$2
131          mflo      ($1,$12,$7)
132          mfhi      ($12,$12,$7)
133          daddu     $13,$1
134          daddu     $2,$12
135          sltu      $1,$13,$1
136          sd        $13,8($4)
137          daddu     $2,$1
138          beqz      $6,.L_bn_mul_add_words_return
139
140          ld        $12,2*8($5)
141          dmultu    ($12,$7)
142          ld        $13,2*8($4)
143          daddu     $13,$2
144          sltu      $2,$13,$2
145          mflo      ($1,$12,$7)
146          mfhi      ($12,$12,$7)
147          daddu     $13,$1
148          daddu     $2,$12
149          sltu      $1,$13,$1
150          sd        $13,2*8($4)
151          daddu     $2,$1
152
153.L_bn_mul_add_words_return:
154          .set      noreorder
155          jr        $31
156          move      $4,$2
157.end      bn_mul_add_words_internal
158
159.align    5
160.globl    bn_mul_words
161.ent      bn_mul_words
162bn_mul_words:
163          .set      noreorder
164          bgtz      $6,bn_mul_words_internal
165          move      $2,$0
166          jr        $31
167          move      $4,$2
168.end      bn_mul_words
169
170.align    5
171.ent      bn_mul_words_internal
172bn_mul_words_internal:
173          .set      reorder
174          li        $3,-4
175          and       $8,$6,$3
176          beqz      $8,.L_bn_mul_words_tail
177
178.L_bn_mul_words_loop:
179          ld        $12,0($5)
180          dmultu    ($12,$7)
181          ld        $14,8($5)
182          ld        $8,2*8($5)
183          ld        $10,3*8($5)
184          mflo      ($1,$12,$7)
185          mfhi      ($12,$12,$7)
186          daddu     $2,$1
187          sltu      $13,$2,$1
188           dmultu   ($14,$7)
189          sd        $2,0($4)
190          daddu     $2,$13,$12
191
192          subu      $6,4
193          daddu $4,4*8
194          daddu $5,4*8
195          mflo      ($1,$14,$7)
196          mfhi      ($14,$14,$7)
197          daddu     $2,$1
198          sltu      $15,$2,$1
199           dmultu   ($8,$7)
200          sd        $2,-3*8($4)
201          daddu     $2,$15,$14
202
203          mflo      ($1,$8,$7)
204          mfhi      ($8,$8,$7)
205          daddu     $2,$1
206          sltu      $9,$2,$1
207           dmultu   ($10,$7)
208          sd        $2,-2*8($4)
209          daddu     $2,$9,$8
210
211          and       $8,$6,$3
212          mflo      ($1,$10,$7)
213          mfhi      ($10,$10,$7)
214          daddu     $2,$1
215          sltu      $11,$2,$1
216          sd        $2,-8($4)
217          .set      noreorder
218          bgtz      $8,.L_bn_mul_words_loop
219          daddu     $2,$11,$10
220
221          beqz      $6,.L_bn_mul_words_return
222          nop
223
224.L_bn_mul_words_tail:
225          .set      reorder
226          ld        $12,0($5)
227          dmultu    ($12,$7)
228          subu      $6,1
229          mflo      ($1,$12,$7)
230          mfhi      ($12,$12,$7)
231          daddu     $2,$1
232          sltu      $13,$2,$1
233          sd        $2,0($4)
234          daddu     $2,$13,$12
235          beqz      $6,.L_bn_mul_words_return
236
237          ld        $12,8($5)
238          dmultu    ($12,$7)
239          subu      $6,1
240          mflo      ($1,$12,$7)
241          mfhi      ($12,$12,$7)
242          daddu     $2,$1
243          sltu      $13,$2,$1
244          sd        $2,8($4)
245          daddu     $2,$13,$12
246          beqz      $6,.L_bn_mul_words_return
247
248          ld        $12,2*8($5)
249          dmultu    ($12,$7)
250          mflo      ($1,$12,$7)
251          mfhi      ($12,$12,$7)
252          daddu     $2,$1
253          sltu      $13,$2,$1
254          sd        $2,2*8($4)
255          daddu     $2,$13,$12
256
257.L_bn_mul_words_return:
258          .set      noreorder
259          jr        $31
260          move      $4,$2
261.end      bn_mul_words_internal
262
263.align    5
264.globl    bn_sqr_words
265.ent      bn_sqr_words
266bn_sqr_words:
267          .set      noreorder
268          bgtz      $6,bn_sqr_words_internal
269          move      $2,$0
270          jr        $31
271          move      $4,$2
272.end      bn_sqr_words
273
274.align    5
275.ent      bn_sqr_words_internal
276bn_sqr_words_internal:
277          .set      reorder
278          li        $3,-4
279          and       $8,$6,$3
280          beqz      $8,.L_bn_sqr_words_tail
281
282.L_bn_sqr_words_loop:
283          ld        $12,0($5)
284          dmultu    ($12,$12)
285          ld        $14,8($5)
286          ld        $8,2*8($5)
287          ld        $10,3*8($5)
288          mflo      ($13,$12,$12)
289          mfhi      ($12,$12,$12)
290          sd        $13,0($4)
291          sd        $12,8($4)
292
293          dmultu    ($14,$14)
294          subu      $6,4
295          daddu $4,8*8
296          daddu $5,4*8
297          mflo      ($15,$14,$14)
298          mfhi      ($14,$14,$14)
299          sd        $15,-6*8($4)
300          sd        $14,-5*8($4)
301
302          dmultu    ($8,$8)
303          mflo      ($9,$8,$8)
304          mfhi      ($8,$8,$8)
305          sd        $9,-4*8($4)
306          sd        $8,-3*8($4)
307
308
309          dmultu    ($10,$10)
310          and       $8,$6,$3
311          mflo      ($11,$10,$10)
312          mfhi      ($10,$10,$10)
313          sd        $11,-2*8($4)
314
315          .set      noreorder
316          bgtz      $8,.L_bn_sqr_words_loop
317          sd        $10,-8($4)
318
319          beqz      $6,.L_bn_sqr_words_return
320          nop
321
322.L_bn_sqr_words_tail:
323          .set      reorder
324          ld        $12,0($5)
325          dmultu    ($12,$12)
326          subu      $6,1
327          mflo      ($13,$12,$12)
328          mfhi      ($12,$12,$12)
329          sd        $13,0($4)
330          sd        $12,8($4)
331          beqz      $6,.L_bn_sqr_words_return
332
333          ld        $12,8($5)
334          dmultu    ($12,$12)
335          subu      $6,1
336          mflo      ($13,$12,$12)
337          mfhi      ($12,$12,$12)
338          sd        $13,2*8($4)
339          sd        $12,3*8($4)
340          beqz      $6,.L_bn_sqr_words_return
341
342          ld        $12,2*8($5)
343          dmultu    ($12,$12)
344          mflo      ($13,$12,$12)
345          mfhi      ($12,$12,$12)
346          sd        $13,4*8($4)
347          sd        $12,5*8($4)
348
349.L_bn_sqr_words_return:
350          .set      noreorder
351          jr        $31
352          move      $4,$2
353
354.end      bn_sqr_words_internal
355
356.align    5
357.globl    bn_add_words
358.ent      bn_add_words
359bn_add_words:
360          .set      noreorder
361          bgtz      $7,bn_add_words_internal
362          move      $2,$0
363          jr        $31
364          move      $4,$2
365.end      bn_add_words
366
367.align    5
368.ent      bn_add_words_internal
369bn_add_words_internal:
370          .set      reorder
371          li        $3,-4
372          and       $1,$7,$3
373          beqz      $1,.L_bn_add_words_tail
374
375.L_bn_add_words_loop:
376          ld        $12,0($5)
377          ld        $8,0($6)
378          subu      $7,4
379          ld        $13,8($5)
380          and       $1,$7,$3
381          ld        $14,2*8($5)
382          daddu $6,4*8
383          ld        $15,3*8($5)
384          daddu $4,4*8
385          ld        $9,-3*8($6)
386          daddu $5,4*8
387          ld        $10,-2*8($6)
388          ld        $11,-8($6)
389          daddu     $8,$12
390          sltu      $24,$8,$12
391          daddu     $12,$8,$2
392          sltu      $2,$12,$8
393          sd        $12,-4*8($4)
394          daddu     $2,$24
395
396          daddu     $9,$13
397          sltu      $25,$9,$13
398          daddu     $13,$9,$2
399          sltu      $2,$13,$9
400          sd        $13,-3*8($4)
401          daddu     $2,$25
402
403          daddu     $10,$14
404          sltu      $24,$10,$14
405          daddu     $14,$10,$2
406          sltu      $2,$14,$10
407          sd        $14,-2*8($4)
408          daddu     $2,$24
409
410          daddu     $11,$15
411          sltu      $25,$11,$15
412          daddu     $15,$11,$2
413          sltu      $2,$15,$11
414          sd        $15,-8($4)
415
416          .set      noreorder
417          bgtz      $1,.L_bn_add_words_loop
418          daddu     $2,$25
419
420          beqz      $7,.L_bn_add_words_return
421          nop
422
423.L_bn_add_words_tail:
424          .set      reorder
425          ld        $12,0($5)
426          ld        $8,0($6)
427          daddu     $8,$12
428          subu      $7,1
429          sltu      $24,$8,$12
430          daddu     $12,$8,$2
431          sltu      $2,$12,$8
432          sd        $12,0($4)
433          daddu     $2,$24
434          beqz      $7,.L_bn_add_words_return
435
436          ld        $13,8($5)
437          ld        $9,8($6)
438          daddu     $9,$13
439          subu      $7,1
440          sltu      $25,$9,$13
441          daddu     $13,$9,$2
442          sltu      $2,$13,$9
443          sd        $13,8($4)
444          daddu     $2,$25
445          beqz      $7,.L_bn_add_words_return
446
447          ld        $14,2*8($5)
448          ld        $10,2*8($6)
449          daddu     $10,$14
450          sltu      $24,$10,$14
451          daddu     $14,$10,$2
452          sltu      $2,$14,$10
453          sd        $14,2*8($4)
454          daddu     $2,$24
455
456.L_bn_add_words_return:
457          .set      noreorder
458          jr        $31
459          move      $4,$2
460
461.end      bn_add_words_internal
462
463.align    5
464.globl    bn_sub_words
465.ent      bn_sub_words
466bn_sub_words:
467          .set      noreorder
468          bgtz      $7,bn_sub_words_internal
469          move      $2,$0
470          jr        $31
471          move      $4,$0
472.end      bn_sub_words
473
474.align    5
475.ent      bn_sub_words_internal
476bn_sub_words_internal:
477          .set      reorder
478          li        $3,-4
479          and       $1,$7,$3
480          beqz      $1,.L_bn_sub_words_tail
481
482.L_bn_sub_words_loop:
483          ld        $12,0($5)
484          ld        $8,0($6)
485          subu      $7,4
486          ld        $13,8($5)
487          and       $1,$7,$3
488          ld        $14,2*8($5)
489          daddu $6,4*8
490          ld        $15,3*8($5)
491          daddu $4,4*8
492          ld        $9,-3*8($6)
493          daddu $5,4*8
494          ld        $10,-2*8($6)
495          ld        $11,-8($6)
496          sltu      $24,$12,$8
497          dsubu     $8,$12,$8
498          dsubu     $12,$8,$2
499          sgtu      $2,$12,$8
500          sd        $12,-4*8($4)
501          daddu     $2,$24
502
503          sltu      $25,$13,$9
504          dsubu     $9,$13,$9
505          dsubu     $13,$9,$2
506          sgtu      $2,$13,$9
507          sd        $13,-3*8($4)
508          daddu     $2,$25
509
510
511          sltu      $24,$14,$10
512          dsubu     $10,$14,$10
513          dsubu     $14,$10,$2
514          sgtu      $2,$14,$10
515          sd        $14,-2*8($4)
516          daddu     $2,$24
517
518          sltu      $25,$15,$11
519          dsubu     $11,$15,$11
520          dsubu     $15,$11,$2
521          sgtu      $2,$15,$11
522          sd        $15,-8($4)
523
524          .set      noreorder
525          bgtz      $1,.L_bn_sub_words_loop
526          daddu     $2,$25
527
528          beqz      $7,.L_bn_sub_words_return
529          nop
530
531.L_bn_sub_words_tail:
532          .set      reorder
533          ld        $12,0($5)
534          ld        $8,0($6)
535          subu      $7,1
536          sltu      $24,$12,$8
537          dsubu     $8,$12,$8
538          dsubu     $12,$8,$2
539          sgtu      $2,$12,$8
540          sd        $12,0($4)
541          daddu     $2,$24
542          beqz      $7,.L_bn_sub_words_return
543
544          ld        $13,8($5)
545          subu      $7,1
546          ld        $9,8($6)
547          sltu      $25,$13,$9
548          dsubu     $9,$13,$9
549          dsubu     $13,$9,$2
550          sgtu      $2,$13,$9
551          sd        $13,8($4)
552          daddu     $2,$25
553          beqz      $7,.L_bn_sub_words_return
554
555          ld        $14,2*8($5)
556          ld        $10,2*8($6)
557          sltu      $24,$14,$10
558          dsubu     $10,$14,$10
559          dsubu     $14,$10,$2
560          sgtu      $2,$14,$10
561          sd        $14,2*8($4)
562          daddu     $2,$24
563
564.L_bn_sub_words_return:
565          .set      noreorder
566          jr        $31
567          move      $4,$2
568.end      bn_sub_words_internal
569
570#if 0
571/*
572 * The bn_div_3_words entry point is re-used for constant-time interface.
573 * Implementation is retained as historical reference.
574 */
575.align 5
576.globl    bn_div_3_words
577.ent      bn_div_3_words
578bn_div_3_words:
579          .set      noreorder
580          move      $7,$4               # we know that bn_div_words does not
581                                        # touch $7, $10, $11 and preserves $6
582                                        # so that we can save two arguments
583                                        # and return address in registers
584                                        # instead of stack:-)
585
586          ld        $4,($7)
587          move      $10,$5
588          bne       $4,$6,bn_div_3_words_internal
589          ld        $5,-8($7)
590          li        $2,-1
591          jr        $31
592          move      $4,$2
593.end      bn_div_3_words
594
595.align    5
596.ent      bn_div_3_words_internal
597bn_div_3_words_internal:
598          .set      reorder
599          move      $11,$31
600          bal       bn_div_words_internal
601          move      $31,$11
602          dmultu    ($10,$2)
603          ld        $14,-2*8($7)
604          move      $8,$0
605          mfhi      ($13,$10,$2)
606          mflo      ($12,$10,$2)
607          sltu      $24,$13,$5
608.L_bn_div_3_words_inner_loop:
609          bnez      $24,.L_bn_div_3_words_inner_loop_done
610          sgeu      $1,$14,$12
611          seq       $25,$13,$5
612          and       $1,$25
613          sltu      $15,$12,$10
614          daddu     $5,$6
615          dsubu     $13,$15
616          dsubu     $12,$10
617          sltu      $24,$13,$5
618          sltu      $8,$5,$6
619          or        $24,$8
620          .set      noreorder
621          beqz      $1,.L_bn_div_3_words_inner_loop
622          dsubu     $2,1
623          daddu     $2,1
624          .set      reorder
625.L_bn_div_3_words_inner_loop_done:
626          .set      noreorder
627          jr        $31
628          move      $4,$2
629.end      bn_div_3_words_internal
630#endif
631
632.align    5
633.globl    bn_div_words
634.ent      bn_div_words
635bn_div_words:
636          .set      noreorder
637          bnez      $6,bn_div_words_internal
638          li        $2,-1               # I would rather signal div-by-zero
639                                        # which can be done with 'break 7'
640          jr        $31
641          move      $4,$2
642.end      bn_div_words
643
644.align    5
645.ent      bn_div_words_internal
646bn_div_words_internal:
647          move      $3,$0
648          bltz      $6,.L_bn_div_words_body
649          move      $25,$3
650          dsll      $6,1
651          bgtz      $6,.-4
652          addu      $25,1
653
654          .set      reorder
655          negu      $13,$25
656          li        $14,-1
657          dsll      $14,$13
658          and       $14,$4
659          dsrl      $1,$5,$13
660          .set      noreorder
661          beqz      $14,.+12
662          nop
663          break     6                   # signal overflow
664          .set      reorder
665          dsll      $4,$25
666          dsll      $5,$25
667          or        $4,$1
668.L_bn_div_words_body:
669          dsrl      $3,$6,4*8 # bits
670          sgeu      $1,$4,$6
671          .set      noreorder
672          beqz      $1,.+12
673          nop
674          dsubu     $4,$6
675          .set      reorder
676
677          li        $8,-1
678          dsrl      $9,$4,4*8 # bits
679          dsrl      $8,4*8    # q=0xffffffff
680          beq       $3,$9,.L_bn_div_words_skip_div1
681          ddivu     ($4,$3)
682          mfqt      ($8,$4,$3)
683.L_bn_div_words_skip_div1:
684          dmultu    ($6,$8)
685          dsll      $15,$4,4*8          # bits
686          dsrl      $1,$5,4*8 # bits
687          or        $15,$1
688          mflo      ($12,$6,$8)
689          mfhi      ($13,$6,$8)
690.L_bn_div_words_inner_loop1:
691          sltu      $14,$15,$12
692          seq       $24,$9,$13
693          sltu      $1,$9,$13
694          and       $14,$24
695          sltu      $2,$12,$6
696          or        $1,$14
697          .set      noreorder
698          beqz      $1,.L_bn_div_words_inner_loop1_done
699          dsubu     $13,$2
700          dsubu     $12,$6
701          b         .L_bn_div_words_inner_loop1
702          dsubu     $8,1
703          .set      reorder
704.L_bn_div_words_inner_loop1_done:
705
706          dsll      $5,4*8    # bits
707          dsubu     $4,$15,$12
708          dsll      $2,$8,4*8 # bits
709
710          li        $8,-1
711          dsrl      $9,$4,4*8 # bits
712          dsrl      $8,4*8    # q=0xffffffff
713          beq       $3,$9,.L_bn_div_words_skip_div2
714          ddivu     ($4,$3)
715          mfqt      ($8,$4,$3)
716.L_bn_div_words_skip_div2:
717          dmultu    ($6,$8)
718          dsll      $15,$4,4*8          # bits
719          dsrl      $1,$5,4*8 # bits
720          or        $15,$1
721          mflo      ($12,$6,$8)
722          mfhi      ($13,$6,$8)
723.L_bn_div_words_inner_loop2:
724          sltu      $14,$15,$12
725          seq       $24,$9,$13
726          sltu      $1,$9,$13
727          and       $14,$24
728          sltu      $3,$12,$6
729          or        $1,$14
730          .set      noreorder
731          beqz      $1,.L_bn_div_words_inner_loop2_done
732          dsubu     $13,$3
733          dsubu     $12,$6
734          b         .L_bn_div_words_inner_loop2
735          dsubu     $8,1
736          .set      reorder
737.L_bn_div_words_inner_loop2_done:
738
739          dsubu     $4,$15,$12
740          or        $2,$8
741          dsrl      $3,$4,$25 # $3 contains remainder if anybody wants it
742          dsrl      $6,$25              # restore $6
743
744          .set      noreorder
745          move      $5,$3
746          jr        $31
747          move      $4,$2
748.end      bn_div_words_internal
749
750.align    5
751.globl    bn_mul_comba8
752.ent      bn_mul_comba8
753bn_mul_comba8:
754          .set      noreorder
755          .frame    $29,6*8,$31
756          .mask     0x003f0000,-8
757          dsubu $29,6*8
758          sd        $21,5*8($29)
759          sd        $20,4*8($29)
760          sd        $19,3*8($29)
761          sd        $18,2*8($29)
762          sd        $17,1*8($29)
763          sd        $16,0*8($29)
764
765          .set      reorder
766          ld        $12,0($5) # If compiled with -mips3 option on
767                                        # R5000 box assembler barks on this
768                                        # 1ine with "should not have mult/div
769                                        # as last instruction in bb (R10K
770                                        # bug)" warning. If anybody out there
771                                        # has a clue about how to circumvent
772                                        # this do send me a note.
773                                        #                   <appro@fy.chalmers.se>
774
775          ld        $8,0($6)
776          ld        $13,8($5)
777          ld        $14,2*8($5)
778          dmultu    ($12,$8)            # mul_add_c(a[0],b[0],c1,c2,c3);
779          ld        $15,3*8($5)
780          ld        $9,8($6)
781          ld        $10,2*8($6)
782          ld        $11,3*8($6)
783          mflo      ($2,$12,$8)
784          mfhi      ($3,$12,$8)
785
786          ld        $16,4*8($5)
787          ld        $18,5*8($5)
788          dmultu    ($12,$9)            # mul_add_c(a[0],b[1],c2,c3,c1);
789          ld        $20,6*8($5)
790          ld        $5,7*8($5)
791          ld        $17,4*8($6)
792          ld        $19,5*8($6)
793          mflo      ($24,$12,$9)
794          mfhi      ($25,$12,$9)
795          daddu     $3,$24
796          sltu      $1,$3,$24
797          dmultu    ($13,$8)            # mul_add_c(a[1],b[0],c2,c3,c1);
798          daddu     $7,$25,$1
799          ld        $21,6*8($6)
800          ld        $6,7*8($6)
801          sd        $2,0($4)  # r[0]=c1;
802          mflo      ($24,$13,$8)
803          mfhi      ($25,$13,$8)
804          daddu     $3,$24
805          sltu      $1,$3,$24
806           dmultu   ($14,$8)            # mul_add_c(a[2],b[0],c3,c1,c2);
807          daddu     $25,$1
808          daddu     $7,$25
809          sltu      $2,$7,$25
810          sd        $3,8($4)  # r[1]=c2;
811
812          mflo      ($24,$14,$8)
813          mfhi      ($25,$14,$8)
814          daddu     $7,$24
815          sltu      $1,$7,$24
816          dmultu    ($13,$9)            # mul_add_c(a[1],b[1],c3,c1,c2);
817          daddu     $25,$1
818          daddu     $2,$25
819          mflo      ($24,$13,$9)
820          mfhi      ($25,$13,$9)
821          daddu     $7,$24
822          sltu      $1,$7,$24
823          dmultu    ($12,$10)           # mul_add_c(a[0],b[2],c3,c1,c2);
824          daddu     $25,$1
825          daddu     $2,$25
826          sltu      $3,$2,$25
827          mflo      ($24,$12,$10)
828          mfhi      ($25,$12,$10)
829          daddu     $7,$24
830          sltu      $1,$7,$24
831           dmultu   ($12,$11)           # mul_add_c(a[0],b[3],c1,c2,c3);
832          daddu     $25,$1
833          daddu     $2,$25
834          sltu      $1,$2,$25
835          daddu     $3,$1
836          sd        $7,2*8($4)          # r[2]=c3;
837
838          mflo      ($24,$12,$11)
839          mfhi      ($25,$12,$11)
840          daddu     $2,$24
841          sltu      $1,$2,$24
842          dmultu    ($13,$10)           # mul_add_c(a[1],b[2],c1,c2,c3);
843          daddu     $25,$1
844          daddu     $3,$25
845          sltu      $7,$3,$25
846          mflo      ($24,$13,$10)
847          mfhi      ($25,$13,$10)
848          daddu     $2,$24
849          sltu      $1,$2,$24
850          dmultu    ($14,$9)            # mul_add_c(a[2],b[1],c1,c2,c3);
851          daddu     $25,$1
852          daddu     $3,$25
853          sltu      $1,$3,$25
854          daddu     $7,$1
855          mflo      ($24,$14,$9)
856          mfhi      ($25,$14,$9)
857          daddu     $2,$24
858          sltu      $1,$2,$24
859          dmultu    ($15,$8)            # mul_add_c(a[3],b[0],c1,c2,c3);
860          daddu     $25,$1
861          daddu     $3,$25
862          sltu      $1,$3,$25
863          daddu     $7,$1
864          mflo      ($24,$15,$8)
865          mfhi      ($25,$15,$8)
866          daddu     $2,$24
867          sltu      $1,$2,$24
868           dmultu   ($16,$8)            # mul_add_c(a[4],b[0],c2,c3,c1);
869          daddu     $25,$1
870          daddu     $3,$25
871          sltu      $1,$3,$25
872          daddu     $7,$1
873          sd        $2,3*8($4)          # r[3]=c1;
874
875          mflo      ($24,$16,$8)
876          mfhi      ($25,$16,$8)
877          daddu     $3,$24
878          sltu      $1,$3,$24
879          dmultu    ($15,$9)            # mul_add_c(a[3],b[1],c2,c3,c1);
880          daddu     $25,$1
881          daddu     $7,$25
882          sltu      $2,$7,$25
883          mflo      ($24,$15,$9)
884          mfhi      ($25,$15,$9)
885          daddu     $3,$24
886          sltu      $1,$3,$24
887          dmultu    ($14,$10)           # mul_add_c(a[2],b[2],c2,c3,c1);
888          daddu     $25,$1
889          daddu     $7,$25
890          sltu      $1,$7,$25
891          daddu     $2,$1
892          mflo      ($24,$14,$10)
893          mfhi      ($25,$14,$10)
894          daddu     $3,$24
895          sltu      $1,$3,$24
896          dmultu    ($13,$11)           # mul_add_c(a[1],b[3],c2,c3,c1);
897          daddu     $25,$1
898          daddu     $7,$25
899          sltu      $1,$7,$25
900          daddu     $2,$1
901          mflo      ($24,$13,$11)
902          mfhi      ($25,$13,$11)
903          daddu     $3,$24
904          sltu      $1,$3,$24
905          dmultu    ($12,$17)           # mul_add_c(a[0],b[4],c2,c3,c1);
906          daddu     $25,$1
907          daddu     $7,$25
908          sltu      $1,$7,$25
909          daddu     $2,$1
910          mflo      ($24,$12,$17)
911          mfhi      ($25,$12,$17)
912          daddu     $3,$24
913          sltu      $1,$3,$24
914           dmultu   ($12,$19)           # mul_add_c(a[0],b[5],c3,c1,c2);
915          daddu     $25,$1
916          daddu     $7,$25
917          sltu      $1,$7,$25
918          daddu     $2,$1
919          sd        $3,4*8($4)          # r[4]=c2;
920
921          mflo      ($24,$12,$19)
922          mfhi      ($25,$12,$19)
923          daddu     $7,$24
924          sltu      $1,$7,$24
925          dmultu    ($13,$17)           # mul_add_c(a[1],b[4],c3,c1,c2);
926          daddu     $25,$1
927          daddu     $2,$25
928          sltu      $3,$2,$25
929          mflo      ($24,$13,$17)
930          mfhi      ($25,$13,$17)
931          daddu     $7,$24
932          sltu      $1,$7,$24
933          dmultu    ($14,$11)           # mul_add_c(a[2],b[3],c3,c1,c2);
934          daddu     $25,$1
935          daddu     $2,$25
936          sltu      $1,$2,$25
937          daddu     $3,$1
938          mflo      ($24,$14,$11)
939          mfhi      ($25,$14,$11)
940          daddu     $7,$24
941          sltu      $1,$7,$24
942          dmultu    ($15,$10)           # mul_add_c(a[3],b[2],c3,c1,c2);
943          daddu     $25,$1
944          daddu     $2,$25
945          sltu      $1,$2,$25
946          daddu     $3,$1
947          mflo      ($24,$15,$10)
948          mfhi      ($25,$15,$10)
949          daddu     $7,$24
950          sltu      $1,$7,$24
951          dmultu    ($16,$9)            # mul_add_c(a[4],b[1],c3,c1,c2);
952          daddu     $25,$1
953          daddu     $2,$25
954          sltu      $1,$2,$25
955          daddu     $3,$1
956          mflo      ($24,$16,$9)
957          mfhi      ($25,$16,$9)
958          daddu     $7,$24
959          sltu      $1,$7,$24
960          dmultu    ($18,$8)            # mul_add_c(a[5],b[0],c3,c1,c2);
961          daddu     $25,$1
962          daddu     $2,$25
963          sltu      $1,$2,$25
964          daddu     $3,$1
965          mflo      ($24,$18,$8)
966          mfhi      ($25,$18,$8)
967          daddu     $7,$24
968          sltu      $1,$7,$24
969           dmultu   ($20,$8)            # mul_add_c(a[6],b[0],c1,c2,c3);
970          daddu     $25,$1
971          daddu     $2,$25
972          sltu      $1,$2,$25
973          daddu     $3,$1
974          sd        $7,5*8($4)          # r[5]=c3;
975
976          mflo      ($24,$20,$8)
977          mfhi      ($25,$20,$8)
978          daddu     $2,$24
979          sltu      $1,$2,$24
980          dmultu    ($18,$9)            # mul_add_c(a[5],b[1],c1,c2,c3);
981          daddu     $25,$1
982          daddu     $3,$25
983          sltu      $7,$3,$25
984          mflo      ($24,$18,$9)
985          mfhi      ($25,$18,$9)
986          daddu     $2,$24
987          sltu      $1,$2,$24
988          dmultu    ($16,$10)           # mul_add_c(a[4],b[2],c1,c2,c3);
989          daddu     $25,$1
990          daddu     $3,$25
991          sltu      $1,$3,$25
992          daddu     $7,$1
993          mflo      ($24,$16,$10)
994          mfhi      ($25,$16,$10)
995          daddu     $2,$24
996          sltu      $1,$2,$24
997          dmultu    ($15,$11)           # mul_add_c(a[3],b[3],c1,c2,c3);
998          daddu     $25,$1
999          daddu     $3,$25
1000          sltu      $1,$3,$25
1001          daddu     $7,$1
1002          mflo      ($24,$15,$11)
1003          mfhi      ($25,$15,$11)
1004          daddu     $2,$24
1005          sltu      $1,$2,$24
1006          dmultu    ($14,$17)           # mul_add_c(a[2],b[4],c1,c2,c3);
1007          daddu     $25,$1
1008          daddu     $3,$25
1009          sltu      $1,$3,$25
1010          daddu     $7,$1
1011          mflo      ($24,$14,$17)
1012          mfhi      ($25,$14,$17)
1013          daddu     $2,$24
1014          sltu      $1,$2,$24
1015          dmultu    ($13,$19)           # mul_add_c(a[1],b[5],c1,c2,c3);
1016          daddu     $25,$1
1017          daddu     $3,$25
1018          sltu      $1,$3,$25
1019          daddu     $7,$1
1020          mflo      ($24,$13,$19)
1021          mfhi      ($25,$13,$19)
1022          daddu     $2,$24
1023          sltu      $1,$2,$24
1024          dmultu    ($12,$21)           # mul_add_c(a[0],b[6],c1,c2,c3);
1025          daddu     $25,$1
1026          daddu     $3,$25
1027          sltu      $1,$3,$25
1028          daddu     $7,$1
1029          mflo      ($24,$12,$21)
1030          mfhi      ($25,$12,$21)
1031          daddu     $2,$24
1032          sltu      $1,$2,$24
1033           dmultu   ($12,$6)            # mul_add_c(a[0],b[7],c2,c3,c1);
1034          daddu     $25,$1
1035          daddu     $3,$25
1036          sltu      $1,$3,$25
1037          daddu     $7,$1
1038          sd        $2,6*8($4)          # r[6]=c1;
1039
1040          mflo      ($24,$12,$6)
1041          mfhi      ($25,$12,$6)
1042          daddu     $3,$24
1043          sltu      $1,$3,$24
1044          dmultu    ($13,$21)           # mul_add_c(a[1],b[6],c2,c3,c1);
1045          daddu     $25,$1
1046          daddu     $7,$25
1047          sltu      $2,$7,$25
1048          mflo      ($24,$13,$21)
1049          mfhi      ($25,$13,$21)
1050          daddu     $3,$24
1051          sltu      $1,$3,$24
1052          dmultu    ($14,$19)           # mul_add_c(a[2],b[5],c2,c3,c1);
1053          daddu     $25,$1
1054          daddu     $7,$25
1055          sltu      $1,$7,$25
1056          daddu     $2,$1
1057          mflo      ($24,$14,$19)
1058          mfhi      ($25,$14,$19)
1059          daddu     $3,$24
1060          sltu      $1,$3,$24
1061          dmultu    ($15,$17)           # mul_add_c(a[3],b[4],c2,c3,c1);
1062          daddu     $25,$1
1063          daddu     $7,$25
1064          sltu      $1,$7,$25
1065          daddu     $2,$1
1066          mflo      ($24,$15,$17)
1067          mfhi      ($25,$15,$17)
1068          daddu     $3,$24
1069          sltu      $1,$3,$24
1070          dmultu    ($16,$11)           # mul_add_c(a[4],b[3],c2,c3,c1);
1071          daddu     $25,$1
1072          daddu     $7,$25
1073          sltu      $1,$7,$25
1074          daddu     $2,$1
1075          mflo      ($24,$16,$11)
1076          mfhi      ($25,$16,$11)
1077          daddu     $3,$24
1078          sltu      $1,$3,$24
1079          dmultu    ($18,$10)           # mul_add_c(a[5],b[2],c2,c3,c1);
1080          daddu     $25,$1
1081          daddu     $7,$25
1082          sltu      $1,$7,$25
1083          daddu     $2,$1
1084          mflo      ($24,$18,$10)
1085          mfhi      ($25,$18,$10)
1086          daddu     $3,$24
1087          sltu      $1,$3,$24
1088          dmultu    ($20,$9)            # mul_add_c(a[6],b[1],c2,c3,c1);
1089          daddu     $25,$1
1090          daddu     $7,$25
1091          sltu      $1,$7,$25
1092          daddu     $2,$1
1093          mflo      ($24,$20,$9)
1094          mfhi      ($25,$20,$9)
1095          daddu     $3,$24
1096          sltu      $1,$3,$24
1097          dmultu    ($5,$8)             # mul_add_c(a[7],b[0],c2,c3,c1);
1098          daddu     $25,$1
1099          daddu     $7,$25
1100          sltu      $1,$7,$25
1101          daddu     $2,$1
1102          mflo      ($24,$5,$8)
1103          mfhi      ($25,$5,$8)
1104          daddu     $3,$24
1105          sltu      $1,$3,$24
1106           dmultu   ($5,$9)             # mul_add_c(a[7],b[1],c3,c1,c2);
1107          daddu     $25,$1
1108          daddu     $7,$25
1109          sltu      $1,$7,$25
1110          daddu     $2,$1
1111          sd        $3,7*8($4)          # r[7]=c2;
1112
1113          mflo      ($24,$5,$9)
1114          mfhi      ($25,$5,$9)
1115          daddu     $7,$24
1116          sltu      $1,$7,$24
1117          dmultu    ($20,$10)           # mul_add_c(a[6],b[2],c3,c1,c2);
1118          daddu     $25,$1
1119          daddu     $2,$25
1120          sltu      $3,$2,$25
1121          mflo      ($24,$20,$10)
1122          mfhi      ($25,$20,$10)
1123          daddu     $7,$24
1124          sltu      $1,$7,$24
1125          dmultu    ($18,$11)           # mul_add_c(a[5],b[3],c3,c1,c2);
1126          daddu     $25,$1
1127          daddu     $2,$25
1128          sltu      $1,$2,$25
1129          daddu     $3,$1
1130          mflo      ($24,$18,$11)
1131          mfhi      ($25,$18,$11)
1132          daddu     $7,$24
1133          sltu      $1,$7,$24
1134          dmultu    ($16,$17)           # mul_add_c(a[4],b[4],c3,c1,c2);
1135          daddu     $25,$1
1136          daddu     $2,$25
1137          sltu      $1,$2,$25
1138          daddu     $3,$1
1139          mflo      ($24,$16,$17)
1140          mfhi      ($25,$16,$17)
1141          daddu     $7,$24
1142          sltu      $1,$7,$24
1143          dmultu    ($15,$19)           # mul_add_c(a[3],b[5],c3,c1,c2);
1144          daddu     $25,$1
1145          daddu     $2,$25
1146          sltu      $1,$2,$25
1147          daddu     $3,$1
1148          mflo      ($24,$15,$19)
1149          mfhi      ($25,$15,$19)
1150          daddu     $7,$24
1151          sltu      $1,$7,$24
1152          dmultu    ($14,$21)           # mul_add_c(a[2],b[6],c3,c1,c2);
1153          daddu     $25,$1
1154          daddu     $2,$25
1155          sltu      $1,$2,$25
1156          daddu     $3,$1
1157          mflo      ($24,$14,$21)
1158          mfhi      ($25,$14,$21)
1159          daddu     $7,$24
1160          sltu      $1,$7,$24
1161          dmultu    ($13,$6)            # mul_add_c(a[1],b[7],c3,c1,c2);
1162          daddu     $25,$1
1163          daddu     $2,$25
1164          sltu      $1,$2,$25
1165          daddu     $3,$1
1166          mflo      ($24,$13,$6)
1167          mfhi      ($25,$13,$6)
1168          daddu     $7,$24
1169          sltu      $1,$7,$24
1170           dmultu   ($14,$6)            # mul_add_c(a[2],b[7],c1,c2,c3);
1171          daddu     $25,$1
1172          daddu     $2,$25
1173          sltu      $1,$2,$25
1174          daddu     $3,$1
1175          sd        $7,8*8($4)          # r[8]=c3;
1176
1177          mflo      ($24,$14,$6)
1178          mfhi      ($25,$14,$6)
1179          daddu     $2,$24
1180          sltu      $1,$2,$24
1181          dmultu    ($15,$21)           # mul_add_c(a[3],b[6],c1,c2,c3);
1182          daddu     $25,$1
1183          daddu     $3,$25
1184          sltu      $7,$3,$25
1185          mflo      ($24,$15,$21)
1186          mfhi      ($25,$15,$21)
1187          daddu     $2,$24
1188          sltu      $1,$2,$24
1189          dmultu    ($16,$19)           # mul_add_c(a[4],b[5],c1,c2,c3);
1190          daddu     $25,$1
1191          daddu     $3,$25
1192          sltu      $1,$3,$25
1193          daddu     $7,$1
1194          mflo      ($24,$16,$19)
1195          mfhi      ($25,$16,$19)
1196          daddu     $2,$24
1197          sltu      $1,$2,$24
1198          dmultu    ($18,$17)           # mul_add_c(a[5],b[4],c1,c2,c3);
1199          daddu     $25,$1
1200          daddu     $3,$25
1201          sltu      $1,$3,$25
1202          daddu     $7,$1
1203          mflo      ($24,$18,$17)
1204          mfhi      ($25,$18,$17)
1205          daddu     $2,$24
1206          sltu      $1,$2,$24
1207          dmultu    ($20,$11)           # mul_add_c(a[6],b[3],c1,c2,c3);
1208          daddu     $25,$1
1209          daddu     $3,$25
1210          sltu      $1,$3,$25
1211          daddu     $7,$1
1212          mflo      ($24,$20,$11)
1213          mfhi      ($25,$20,$11)
1214          daddu     $2,$24
1215          sltu      $1,$2,$24
1216          dmultu    ($5,$10)            # mul_add_c(a[7],b[2],c1,c2,c3);
1217          daddu     $25,$1
1218          daddu     $3,$25
1219          sltu      $1,$3,$25
1220          daddu     $7,$1
1221          mflo      ($24,$5,$10)
1222          mfhi      ($25,$5,$10)
1223          daddu     $2,$24
1224          sltu      $1,$2,$24
1225           dmultu   ($5,$11)            # mul_add_c(a[7],b[3],c2,c3,c1);
1226          daddu     $25,$1
1227          daddu     $3,$25
1228          sltu      $1,$3,$25
1229          daddu     $7,$1
1230          sd        $2,9*8($4)          # r[9]=c1;
1231
1232          mflo      ($24,$5,$11)
1233          mfhi      ($25,$5,$11)
1234          daddu     $3,$24
1235          sltu      $1,$3,$24
1236          dmultu    ($20,$17)           # mul_add_c(a[6],b[4],c2,c3,c1);
1237          daddu     $25,$1
1238          daddu     $7,$25
1239          sltu      $2,$7,$25
1240          mflo      ($24,$20,$17)
1241          mfhi      ($25,$20,$17)
1242          daddu     $3,$24
1243          sltu      $1,$3,$24
1244          dmultu    ($18,$19)           # mul_add_c(a[5],b[5],c2,c3,c1);
1245          daddu     $25,$1
1246          daddu     $7,$25
1247          sltu      $1,$7,$25
1248          daddu     $2,$1
1249          mflo      ($24,$18,$19)
1250          mfhi      ($25,$18,$19)
1251          daddu     $3,$24
1252          sltu      $1,$3,$24
1253          dmultu    ($16,$21)           # mul_add_c(a[4],b[6],c2,c3,c1);
1254          daddu     $25,$1
1255          daddu     $7,$25
1256          sltu      $1,$7,$25
1257          daddu     $2,$1
1258          mflo      ($24,$16,$21)
1259          mfhi      ($25,$16,$21)
1260          daddu     $3,$24
1261          sltu      $1,$3,$24
1262          dmultu    ($15,$6)            # mul_add_c(a[3],b[7],c2,c3,c1);
1263          daddu     $25,$1
1264          daddu     $7,$25
1265          sltu      $1,$7,$25
1266          daddu     $2,$1
1267          mflo      ($24,$15,$6)
1268          mfhi      ($25,$15,$6)
1269          daddu     $3,$24
1270          sltu      $1,$3,$24
1271          dmultu    ($16,$6)            # mul_add_c(a[4],b[7],c3,c1,c2);
1272          daddu     $25,$1
1273          daddu     $7,$25
1274          sltu      $1,$7,$25
1275          daddu     $2,$1
1276          sd        $3,10*8($4)         # r[10]=c2;
1277
1278          mflo      ($24,$16,$6)
1279          mfhi      ($25,$16,$6)
1280          daddu     $7,$24
1281          sltu      $1,$7,$24
1282          dmultu    ($18,$21)           # mul_add_c(a[5],b[6],c3,c1,c2);
1283          daddu     $25,$1
1284          daddu     $2,$25
1285          sltu      $3,$2,$25
1286          mflo      ($24,$18,$21)
1287          mfhi      ($25,$18,$21)
1288          daddu     $7,$24
1289          sltu      $1,$7,$24
1290          dmultu    ($20,$19)           # mul_add_c(a[6],b[5],c3,c1,c2);
1291          daddu     $25,$1
1292          daddu     $2,$25
1293          sltu      $1,$2,$25
1294          daddu     $3,$1
1295          mflo      ($24,$20,$19)
1296          mfhi      ($25,$20,$19)
1297          daddu     $7,$24
1298          sltu      $1,$7,$24
1299          dmultu    ($5,$17)            # mul_add_c(a[7],b[4],c3,c1,c2);
1300          daddu     $25,$1
1301          daddu     $2,$25
1302          sltu      $1,$2,$25
1303          daddu     $3,$1
1304          mflo      ($24,$5,$17)
1305          mfhi      ($25,$5,$17)
1306          daddu     $7,$24
1307          sltu      $1,$7,$24
1308           dmultu   ($5,$19)            # mul_add_c(a[7],b[5],c1,c2,c3);
1309          daddu     $25,$1
1310          daddu     $2,$25
1311          sltu      $1,$2,$25
1312          daddu     $3,$1
1313          sd        $7,11*8($4)         # r[11]=c3;
1314
1315          mflo      ($24,$5,$19)
1316          mfhi      ($25,$5,$19)
1317          daddu     $2,$24
1318          sltu      $1,$2,$24
1319          dmultu    ($20,$21)           # mul_add_c(a[6],b[6],c1,c2,c3);
1320          daddu     $25,$1
1321          daddu     $3,$25
1322          sltu      $7,$3,$25
1323          mflo      ($24,$20,$21)
1324          mfhi      ($25,$20,$21)
1325          daddu     $2,$24
1326          sltu      $1,$2,$24
1327          dmultu    ($18,$6)            # mul_add_c(a[5],b[7],c1,c2,c3);
1328          daddu     $25,$1
1329          daddu     $3,$25
1330          sltu      $1,$3,$25
1331          daddu     $7,$1
1332          mflo      ($24,$18,$6)
1333          mfhi      ($25,$18,$6)
1334          daddu     $2,$24
1335          sltu      $1,$2,$24
1336           dmultu   ($20,$6)            # mul_add_c(a[6],b[7],c2,c3,c1);
1337          daddu     $25,$1
1338          daddu     $3,$25
1339          sltu      $1,$3,$25
1340          daddu     $7,$1
1341          sd        $2,12*8($4)         # r[12]=c1;
1342
1343          mflo      ($24,$20,$6)
1344          mfhi      ($25,$20,$6)
1345          daddu     $3,$24
1346          sltu      $1,$3,$24
1347          dmultu    ($5,$21)            # mul_add_c(a[7],b[6],c2,c3,c1);
1348          daddu     $25,$1
1349          daddu     $7,$25
1350          sltu      $2,$7,$25
1351          mflo      ($24,$5,$21)
1352          mfhi      ($25,$5,$21)
1353          daddu     $3,$24
1354          sltu      $1,$3,$24
1355          dmultu    ($5,$6)             # mul_add_c(a[7],b[7],c3,c1,c2);
1356          daddu     $25,$1
1357          daddu     $7,$25
1358          sltu      $1,$7,$25
1359          daddu     $2,$1
1360          sd        $3,13*8($4)         # r[13]=c2;
1361
1362          mflo      ($24,$5,$6)
1363          mfhi      ($25,$5,$6)
1364          daddu     $7,$24
1365          sltu      $1,$7,$24
1366          daddu     $25,$1
1367          daddu     $2,$25
1368          sd        $7,14*8($4)         # r[14]=c3;
1369          sd        $2,15*8($4)         # r[15]=c1;
1370
1371          .set      noreorder
1372          ld        $21,5*8($29)
1373          ld        $20,4*8($29)
1374          ld        $19,3*8($29)
1375          ld        $18,2*8($29)
1376          ld        $17,1*8($29)
1377          ld        $16,0*8($29)
1378          jr        $31
1379          daddu $29,6*8
1380.end      bn_mul_comba8
1381
1382.align    5
1383.globl    bn_mul_comba4
1384.ent      bn_mul_comba4
1385bn_mul_comba4:
1386          .set      reorder
1387          ld        $12,0($5)
1388          ld        $8,0($6)
1389          ld        $13,8($5)
1390          ld        $14,2*8($5)
1391          dmultu    ($12,$8)            # mul_add_c(a[0],b[0],c1,c2,c3);
1392          ld        $15,3*8($5)
1393          ld        $9,8($6)
1394          ld        $10,2*8($6)
1395          ld        $11,3*8($6)
1396          mflo      ($2,$12,$8)
1397          mfhi      ($3,$12,$8)
1398          sd        $2,0($4)
1399
1400          dmultu    ($12,$9)            # mul_add_c(a[0],b[1],c2,c3,c1);
1401          mflo      ($24,$12,$9)
1402          mfhi      ($25,$12,$9)
1403          daddu     $3,$24
1404          sltu      $1,$3,$24
1405          dmultu    ($13,$8)            # mul_add_c(a[1],b[0],c2,c3,c1);
1406          daddu     $7,$25,$1
1407          mflo      ($24,$13,$8)
1408          mfhi      ($25,$13,$8)
1409          daddu     $3,$24
1410          sltu      $1,$3,$24
1411           dmultu   ($14,$8)            # mul_add_c(a[2],b[0],c3,c1,c2);
1412          daddu     $25,$1
1413          daddu     $7,$25
1414          sltu      $2,$7,$25
1415          sd        $3,8($4)
1416
1417          mflo      ($24,$14,$8)
1418          mfhi      ($25,$14,$8)
1419          daddu     $7,$24
1420          sltu      $1,$7,$24
1421          dmultu    ($13,$9)            # mul_add_c(a[1],b[1],c3,c1,c2);
1422          daddu     $25,$1
1423          daddu     $2,$25
1424          mflo      ($24,$13,$9)
1425          mfhi      ($25,$13,$9)
1426          daddu     $7,$24
1427          sltu      $1,$7,$24
1428          dmultu    ($12,$10)           # mul_add_c(a[0],b[2],c3,c1,c2);
1429          daddu     $25,$1
1430          daddu     $2,$25
1431          sltu      $3,$2,$25
1432          mflo      ($24,$12,$10)
1433          mfhi      ($25,$12,$10)
1434          daddu     $7,$24
1435          sltu      $1,$7,$24
1436           dmultu   ($12,$11)           # mul_add_c(a[0],b[3],c1,c2,c3);
1437          daddu     $25,$1
1438          daddu     $2,$25
1439          sltu      $1,$2,$25
1440          daddu     $3,$1
1441          sd        $7,2*8($4)
1442
1443          mflo      ($24,$12,$11)
1444          mfhi      ($25,$12,$11)
1445          daddu     $2,$24
1446          sltu      $1,$2,$24
1447          dmultu    ($13,$10)           # mul_add_c(a[1],b[2],c1,c2,c3);
1448          daddu     $25,$1
1449          daddu     $3,$25
1450          sltu      $7,$3,$25
1451          mflo      ($24,$13,$10)
1452          mfhi      ($25,$13,$10)
1453          daddu     $2,$24
1454          sltu      $1,$2,$24
1455          dmultu    ($14,$9)            # mul_add_c(a[2],b[1],c1,c2,c3);
1456          daddu     $25,$1
1457          daddu     $3,$25
1458          sltu      $1,$3,$25
1459          daddu     $7,$1
1460          mflo      ($24,$14,$9)
1461          mfhi      ($25,$14,$9)
1462          daddu     $2,$24
1463          sltu      $1,$2,$24
1464          dmultu    ($15,$8)            # mul_add_c(a[3],b[0],c1,c2,c3);
1465          daddu     $25,$1
1466          daddu     $3,$25
1467          sltu      $1,$3,$25
1468          daddu     $7,$1
1469          mflo      ($24,$15,$8)
1470          mfhi      ($25,$15,$8)
1471          daddu     $2,$24
1472          sltu      $1,$2,$24
1473           dmultu   ($15,$9)            # mul_add_c(a[3],b[1],c2,c3,c1);
1474          daddu     $25,$1
1475          daddu     $3,$25
1476          sltu      $1,$3,$25
1477          daddu     $7,$1
1478          sd        $2,3*8($4)
1479
1480          mflo      ($24,$15,$9)
1481          mfhi      ($25,$15,$9)
1482          daddu     $3,$24
1483          sltu      $1,$3,$24
1484          dmultu    ($14,$10)           # mul_add_c(a[2],b[2],c2,c3,c1);
1485          daddu     $25,$1
1486          daddu     $7,$25
1487          sltu      $2,$7,$25
1488          mflo      ($24,$14,$10)
1489          mfhi      ($25,$14,$10)
1490          daddu     $3,$24
1491          sltu      $1,$3,$24
1492          dmultu    ($13,$11)           # mul_add_c(a[1],b[3],c2,c3,c1);
1493          daddu     $25,$1
1494          daddu     $7,$25
1495          sltu      $1,$7,$25
1496          daddu     $2,$1
1497          mflo      ($24,$13,$11)
1498          mfhi      ($25,$13,$11)
1499          daddu     $3,$24
1500          sltu      $1,$3,$24
1501           dmultu   ($14,$11)           # mul_add_c(a[2],b[3],c3,c1,c2);
1502          daddu     $25,$1
1503          daddu     $7,$25
1504          sltu      $1,$7,$25
1505          daddu     $2,$1
1506          sd        $3,4*8($4)
1507
1508          mflo      ($24,$14,$11)
1509          mfhi      ($25,$14,$11)
1510          daddu     $7,$24
1511          sltu      $1,$7,$24
1512          dmultu    ($15,$10)           # mul_add_c(a[3],b[2],c3,c1,c2);
1513          daddu     $25,$1
1514          daddu     $2,$25
1515          sltu      $3,$2,$25
1516          mflo      ($24,$15,$10)
1517          mfhi      ($25,$15,$10)
1518          daddu     $7,$24
1519          sltu      $1,$7,$24
1520           dmultu   ($15,$11)           # mul_add_c(a[3],b[3],c1,c2,c3);
1521          daddu     $25,$1
1522          daddu     $2,$25
1523          sltu      $1,$2,$25
1524          daddu     $3,$1
1525          sd        $7,5*8($4)
1526
1527          mflo      ($24,$15,$11)
1528          mfhi      ($25,$15,$11)
1529          daddu     $2,$24
1530          sltu      $1,$2,$24
1531          daddu     $25,$1
1532          daddu     $3,$25
1533          sd        $2,6*8($4)
1534          sd        $3,7*8($4)
1535
1536          .set      noreorder
1537          jr        $31
1538          nop
1539.end      bn_mul_comba4
1540
1541.align    5
1542.globl    bn_sqr_comba8
1543.ent      bn_sqr_comba8
1544bn_sqr_comba8:
1545          .set      reorder
1546          ld        $12,0($5)
1547          ld        $13,8($5)
1548          ld        $14,2*8($5)
1549          ld        $15,3*8($5)
1550
1551          dmultu    ($12,$12)           # mul_add_c(a[0],b[0],c1,c2,c3);
1552          ld        $8,4*8($5)
1553          ld        $9,5*8($5)
1554          ld        $10,6*8($5)
1555          ld        $11,7*8($5)
1556          mflo      ($2,$12,$12)
1557          mfhi      ($3,$12,$12)
1558          sd        $2,0($4)
1559
1560          dmultu    ($12,$13)           # mul_add_c2(a[0],b[1],c2,c3,c1);
1561          mflo      ($24,$12,$13)
1562          mfhi      ($25,$12,$13)
1563          slt       $2,$25,$0
1564          dsll      $25,1
1565           dmultu   ($14,$12)           # mul_add_c2(a[2],b[0],c3,c1,c2);
1566          slt       $6,$24,$0
1567          daddu     $25,$6
1568          dsll      $24,1
1569          daddu     $3,$24
1570          sltu      $1,$3,$24
1571          daddu     $7,$25,$1
1572          sd        $3,8($4)
1573          sltu      $1,$7,$25
1574          daddu     $2,$1
1575          mflo      ($24,$14,$12)
1576          mfhi      ($25,$14,$12)
1577          daddu     $7,$24
1578          sltu      $1,$7,$24
1579           dmultu   ($13,$13)           # forward multiplication
1580          daddu     $7,$24
1581          daddu     $1,$25
1582          sltu      $24,$7,$24
1583          daddu     $2,$1
1584          daddu     $25,$24
1585          sltu      $3,$2,$1
1586          daddu     $2,$25
1587          sltu      $25,$2,$25
1588          daddu     $3,$25
1589          mflo      ($24,$13,$13)
1590          mfhi      ($25,$13,$13)
1591          daddu     $7,$24
1592          sltu      $1,$7,$24
1593           dmultu   ($12,$15)           # mul_add_c2(a[0],b[3],c1,c2,c3);
1594          daddu     $25,$1
1595          daddu     $2,$25
1596          sltu      $1,$2,$25
1597          daddu     $3,$1
1598          sd        $7,2*8($4)
1599          mflo      ($24,$12,$15)
1600          mfhi      ($25,$12,$15)
1601          daddu     $2,$24
1602          sltu      $1,$2,$24
1603           dmultu   ($13,$14)           # forward multiplication
1604          daddu     $2,$24
1605          daddu     $1,$25
1606          sltu      $24,$2,$24
1607          daddu     $3,$1
1608          daddu     $25,$24
1609          sltu      $7,$3,$1
1610          daddu     $3,$25
1611          sltu      $25,$3,$25
1612          daddu     $7,$25
1613          mflo      ($24,$13,$14)
1614          mfhi      ($25,$13,$14)
1615          daddu     $2,$24
1616          sltu      $1,$2,$24
1617           dmultu   ($8,$12)            # forward multiplication
1618          daddu     $2,$24
1619          daddu     $1,$25
1620          sltu      $24,$2,$24
1621          daddu     $3,$1
1622          daddu     $25,$24
1623          sltu      $1,$3,$1
1624          daddu     $3,$25
1625          daddu     $7,$1
1626          sltu      $25,$3,$25
1627          daddu     $7,$25
1628          mflo      ($24,$8,$12)
1629          mfhi      ($25,$8,$12)
1630          sd        $2,3*8($4)
1631          daddu     $3,$24
1632          sltu      $1,$3,$24
1633           dmultu   ($15,$13)           # forward multiplication
1634          daddu     $3,$24
1635          daddu     $1,$25
1636          sltu      $24,$3,$24
1637          daddu     $7,$1
1638          daddu     $25,$24
1639          sltu      $2,$7,$1
1640          daddu     $7,$25
1641          sltu      $25,$7,$25
1642          daddu     $2,$25
1643          mflo      ($24,$15,$13)
1644          mfhi      ($25,$15,$13)
1645          daddu     $3,$24
1646          sltu      $1,$3,$24
1647           dmultu   ($14,$14)           # forward multiplication
1648          daddu     $3,$24
1649          daddu     $1,$25
1650          sltu      $24,$3,$24
1651          daddu     $7,$1
1652          daddu     $25,$24
1653          sltu      $1,$7,$1
1654          daddu     $7,$25
1655          daddu     $2,$1
1656          sltu      $25,$7,$25
1657          daddu     $2,$25
1658          mflo      ($24,$14,$14)
1659          mfhi      ($25,$14,$14)
1660          daddu     $3,$24
1661          sltu      $1,$3,$24
1662           dmultu   ($12,$9)            # mul_add_c2(a[0],b[5],c3,c1,c2);
1663          daddu     $25,$1
1664          daddu     $7,$25
1665          sltu      $1,$7,$25
1666          daddu     $2,$1
1667          sd        $3,4*8($4)
1668          mflo      ($24,$12,$9)
1669          mfhi      ($25,$12,$9)
1670          daddu     $7,$24
1671          sltu      $1,$7,$24
1672           dmultu   ($13,$8)            # forward multiplication
1673          daddu     $7,$24
1674          daddu     $1,$25
1675          sltu      $24,$7,$24
1676          daddu     $2,$1
1677          daddu     $25,$24
1678          sltu      $3,$2,$1
1679          daddu     $2,$25
1680          sltu      $25,$2,$25
1681          daddu     $3,$25
1682          mflo      ($24,$13,$8)
1683          mfhi      ($25,$13,$8)
1684          daddu     $7,$24
1685          sltu      $1,$7,$24
1686           dmultu   ($14,$15)           # forward multiplication
1687          daddu     $7,$24
1688          daddu     $1,$25
1689          sltu      $24,$7,$24
1690          daddu     $2,$1
1691          daddu     $25,$24
1692          sltu      $1,$2,$1
1693          daddu     $2,$25
1694          daddu     $3,$1
1695          sltu      $25,$2,$25
1696          daddu     $3,$25
1697          mflo      ($24,$14,$15)
1698          mfhi      ($25,$14,$15)
1699          daddu     $7,$24
1700          sltu      $1,$7,$24
1701           dmultu   ($10,$12)           # forward multiplication
1702          daddu     $7,$24
1703          daddu     $1,$25
1704          sltu      $24,$7,$24
1705          daddu     $2,$1
1706          daddu     $25,$24
1707          sltu      $1,$2,$1
1708          daddu     $2,$25
1709          daddu     $3,$1
1710          sltu      $25,$2,$25
1711          daddu     $3,$25
1712          mflo      ($24,$10,$12)
1713          mfhi      ($25,$10,$12)
1714          sd        $7,5*8($4)
1715          daddu     $2,$24
1716          sltu      $1,$2,$24
1717           dmultu   ($9,$13)            # forward multiplication
1718          daddu     $2,$24
1719          daddu     $1,$25
1720          sltu      $24,$2,$24
1721          daddu     $3,$1
1722          daddu     $25,$24
1723          sltu      $7,$3,$1
1724          daddu     $3,$25
1725          sltu      $25,$3,$25
1726          daddu     $7,$25
1727          mflo      ($24,$9,$13)
1728          mfhi      ($25,$9,$13)
1729          daddu     $2,$24
1730          sltu      $1,$2,$24
1731           dmultu   ($8,$14)            # forward multiplication
1732          daddu     $2,$24
1733          daddu     $1,$25
1734          sltu      $24,$2,$24
1735          daddu     $3,$1
1736          daddu     $25,$24
1737          sltu      $1,$3,$1
1738          daddu     $3,$25
1739          daddu     $7,$1
1740          sltu      $25,$3,$25
1741          daddu     $7,$25
1742          mflo      ($24,$8,$14)
1743          mfhi      ($25,$8,$14)
1744          daddu     $2,$24
1745          sltu      $1,$2,$24
1746           dmultu   ($15,$15)           # forward multiplication
1747          daddu     $2,$24
1748          daddu     $1,$25
1749          sltu      $24,$2,$24
1750          daddu     $3,$1
1751          daddu     $25,$24
1752          sltu      $1,$3,$1
1753          daddu     $3,$25
1754          daddu     $7,$1
1755          sltu      $25,$3,$25
1756          daddu     $7,$25
1757          mflo      ($24,$15,$15)
1758          mfhi      ($25,$15,$15)
1759          daddu     $2,$24
1760          sltu      $1,$2,$24
1761           dmultu   ($12,$11)           # mul_add_c2(a[0],b[7],c2,c3,c1);
1762          daddu     $25,$1
1763          daddu     $3,$25
1764          sltu      $1,$3,$25
1765          daddu     $7,$1
1766          sd        $2,6*8($4)
1767          mflo      ($24,$12,$11)
1768          mfhi      ($25,$12,$11)
1769          daddu     $3,$24
1770          sltu      $1,$3,$24
1771           dmultu   ($13,$10)           # forward multiplication
1772          daddu     $3,$24
1773          daddu     $1,$25
1774          sltu      $24,$3,$24
1775          daddu     $7,$1
1776          daddu     $25,$24
1777          sltu      $2,$7,$1
1778          daddu     $7,$25
1779          sltu      $25,$7,$25
1780          daddu     $2,$25
1781          mflo      ($24,$13,$10)
1782          mfhi      ($25,$13,$10)
1783          daddu     $3,$24
1784          sltu      $1,$3,$24
1785           dmultu   ($14,$9)            # forward multiplication
1786          daddu     $3,$24
1787          daddu     $1,$25
1788          sltu      $24,$3,$24
1789          daddu     $7,$1
1790          daddu     $25,$24
1791          sltu      $1,$7,$1
1792          daddu     $7,$25
1793          daddu     $2,$1
1794          sltu      $25,$7,$25
1795          daddu     $2,$25
1796          mflo      ($24,$14,$9)
1797          mfhi      ($25,$14,$9)
1798          daddu     $3,$24
1799          sltu      $1,$3,$24
1800           dmultu   ($15,$8)            # forward multiplication
1801          daddu     $3,$24
1802          daddu     $1,$25
1803          sltu      $24,$3,$24
1804          daddu     $7,$1
1805          daddu     $25,$24
1806          sltu      $1,$7,$1
1807          daddu     $7,$25
1808          daddu     $2,$1
1809          sltu      $25,$7,$25
1810          daddu     $2,$25
1811          mflo      ($24,$15,$8)
1812          mfhi      ($25,$15,$8)
1813          daddu     $3,$24
1814          sltu      $1,$3,$24
1815           dmultu   ($11,$13)           # forward multiplication
1816          daddu     $3,$24
1817          daddu     $1,$25
1818          sltu      $24,$3,$24
1819          daddu     $7,$1
1820          daddu     $25,$24
1821          sltu      $1,$7,$1
1822          daddu     $7,$25
1823          daddu     $2,$1
1824          sltu      $25,$7,$25
1825          daddu     $2,$25
1826          mflo      ($24,$11,$13)
1827          mfhi      ($25,$11,$13)
1828          sd        $3,7*8($4)
1829          daddu     $7,$24
1830          sltu      $1,$7,$24
1831           dmultu   ($10,$14)           # forward multiplication
1832          daddu     $7,$24
1833          daddu     $1,$25
1834          sltu      $24,$7,$24
1835          daddu     $2,$1
1836          daddu     $25,$24
1837          sltu      $3,$2,$1
1838          daddu     $2,$25
1839          sltu      $25,$2,$25
1840          daddu     $3,$25
1841          mflo      ($24,$10,$14)
1842          mfhi      ($25,$10,$14)
1843          daddu     $7,$24
1844          sltu      $1,$7,$24
1845           dmultu   ($9,$15)            # forward multiplication
1846          daddu     $7,$24
1847          daddu     $1,$25
1848          sltu      $24,$7,$24
1849          daddu     $2,$1
1850          daddu     $25,$24
1851          sltu      $1,$2,$1
1852          daddu     $2,$25
1853          daddu     $3,$1
1854          sltu      $25,$2,$25
1855          daddu     $3,$25
1856          mflo      ($24,$9,$15)
1857          mfhi      ($25,$9,$15)
1858          daddu     $7,$24
1859          sltu      $1,$7,$24
1860           dmultu   ($8,$8)             # forward multiplication
1861          daddu     $7,$24
1862          daddu     $1,$25
1863          sltu      $24,$7,$24
1864          daddu     $2,$1
1865          daddu     $25,$24
1866          sltu      $1,$2,$1
1867          daddu     $2,$25
1868          daddu     $3,$1
1869          sltu      $25,$2,$25
1870          daddu     $3,$25
1871          mflo      ($24,$8,$8)
1872          mfhi      ($25,$8,$8)
1873          daddu     $7,$24
1874          sltu      $1,$7,$24
1875           dmultu   ($14,$11)           # mul_add_c2(a[2],b[7],c1,c2,c3);
1876          daddu     $25,$1
1877          daddu     $2,$25
1878          sltu      $1,$2,$25
1879          daddu     $3,$1
1880          sd        $7,8*8($4)
1881          mflo      ($24,$14,$11)
1882          mfhi      ($25,$14,$11)
1883          daddu     $2,$24
1884          sltu      $1,$2,$24
1885           dmultu   ($15,$10)           # forward multiplication
1886          daddu     $2,$24
1887          daddu     $1,$25
1888          sltu      $24,$2,$24
1889          daddu     $3,$1
1890          daddu     $25,$24
1891          sltu      $7,$3,$1
1892          daddu     $3,$25
1893          sltu      $25,$3,$25
1894          daddu     $7,$25
1895          mflo      ($24,$15,$10)
1896          mfhi      ($25,$15,$10)
1897          daddu     $2,$24
1898          sltu      $1,$2,$24
1899           dmultu   ($8,$9)             # forward multiplication
1900          daddu     $2,$24
1901          daddu     $1,$25
1902          sltu      $24,$2,$24
1903          daddu     $3,$1
1904          daddu     $25,$24
1905          sltu      $1,$3,$1
1906          daddu     $3,$25
1907          daddu     $7,$1
1908          sltu      $25,$3,$25
1909          daddu     $7,$25
1910          mflo      ($24,$8,$9)
1911          mfhi      ($25,$8,$9)
1912          daddu     $2,$24
1913          sltu      $1,$2,$24
1914           dmultu   ($11,$15)           # forward multiplication
1915          daddu     $2,$24
1916          daddu     $1,$25
1917          sltu      $24,$2,$24
1918          daddu     $3,$1
1919          daddu     $25,$24
1920          sltu      $1,$3,$1
1921          daddu     $3,$25
1922          daddu     $7,$1
1923          sltu      $25,$3,$25
1924          daddu     $7,$25
1925          mflo      ($24,$11,$15)
1926          mfhi      ($25,$11,$15)
1927          sd        $2,9*8($4)
1928          daddu     $3,$24
1929          sltu      $1,$3,$24
1930           dmultu   ($10,$8)            # forward multiplication
1931          daddu     $3,$24
1932          daddu     $1,$25
1933          sltu      $24,$3,$24
1934          daddu     $7,$1
1935          daddu     $25,$24
1936          sltu      $2,$7,$1
1937          daddu     $7,$25
1938          sltu      $25,$7,$25
1939          daddu     $2,$25
1940          mflo      ($24,$10,$8)
1941          mfhi      ($25,$10,$8)
1942          daddu     $3,$24
1943          sltu      $1,$3,$24
1944           dmultu   ($9,$9)             # forward multiplication
1945          daddu     $3,$24
1946          daddu     $1,$25
1947          sltu      $24,$3,$24
1948          daddu     $7,$1
1949          daddu     $25,$24
1950          sltu      $1,$7,$1
1951          daddu     $7,$25
1952          daddu     $2,$1
1953          sltu      $25,$7,$25
1954          daddu     $2,$25
1955          mflo      ($24,$9,$9)
1956          mfhi      ($25,$9,$9)
1957          daddu     $3,$24
1958          sltu      $1,$3,$24
1959           dmultu   ($8,$11)            # mul_add_c2(a[4],b[7],c3,c1,c2);
1960          daddu     $25,$1
1961          daddu     $7,$25
1962          sltu      $1,$7,$25
1963          daddu     $2,$1
1964          sd        $3,10*8($4)
1965          mflo      ($24,$8,$11)
1966          mfhi      ($25,$8,$11)
1967          daddu     $7,$24
1968          sltu      $1,$7,$24
1969           dmultu   ($9,$10)            # forward multiplication
1970          daddu     $7,$24
1971          daddu     $1,$25
1972          sltu      $24,$7,$24
1973          daddu     $2,$1
1974          daddu     $25,$24
1975          sltu      $3,$2,$1
1976          daddu     $2,$25
1977          sltu      $25,$2,$25
1978          daddu     $3,$25
1979          mflo      ($24,$9,$10)
1980          mfhi      ($25,$9,$10)
1981          daddu     $7,$24
1982          sltu      $1,$7,$24
1983           dmultu   ($11,$9)            # forward multiplication
1984          daddu     $7,$24
1985          daddu     $1,$25
1986          sltu      $24,$7,$24
1987          daddu     $2,$1
1988          daddu     $25,$24
1989          sltu      $1,$2,$1
1990          daddu     $2,$25
1991          daddu     $3,$1
1992          sltu      $25,$2,$25
1993          daddu     $3,$25
1994          mflo      ($24,$11,$9)
1995          mfhi      ($25,$11,$9)
1996          sd        $7,11*8($4)
1997          daddu     $2,$24
1998          sltu      $1,$2,$24
1999           dmultu   ($10,$10)           # forward multiplication
2000          daddu     $2,$24
2001          daddu     $1,$25
2002          sltu      $24,$2,$24
2003          daddu     $3,$1
2004          daddu     $25,$24
2005          sltu      $7,$3,$1
2006          daddu     $3,$25
2007          sltu      $25,$3,$25
2008          daddu     $7,$25
2009          mflo      ($24,$10,$10)
2010          mfhi      ($25,$10,$10)
2011          daddu     $2,$24
2012          sltu      $1,$2,$24
2013           dmultu   ($10,$11)           # mul_add_c2(a[6],b[7],c2,c3,c1);
2014          daddu     $25,$1
2015          daddu     $3,$25
2016          sltu      $1,$3,$25
2017          daddu     $7,$1
2018          sd        $2,12*8($4)
2019          mflo      ($24,$10,$11)
2020          mfhi      ($25,$10,$11)
2021          daddu     $3,$24
2022          sltu      $1,$3,$24
2023           dmultu   ($11,$11)           # forward multiplication
2024          daddu     $3,$24
2025          daddu     $1,$25
2026          sltu      $24,$3,$24
2027          daddu     $7,$1
2028          daddu     $25,$24
2029          sltu      $2,$7,$1
2030          daddu     $7,$25
2031          sltu      $25,$7,$25
2032          daddu     $2,$25
2033          mflo      ($24,$11,$11)
2034          mfhi      ($25,$11,$11)
2035          sd        $3,13*8($4)
2036
2037          daddu     $7,$24
2038          sltu      $1,$7,$24
2039          daddu     $25,$1
2040          daddu     $2,$25
2041          sd        $7,14*8($4)
2042          sd        $2,15*8($4)
2043
2044          .set      noreorder
2045          jr        $31
2046          nop
2047.end      bn_sqr_comba8
2048
2049.align    5
2050.globl    bn_sqr_comba4
2051.ent      bn_sqr_comba4
2052bn_sqr_comba4:
2053          .set      reorder
2054          ld        $12,0($5)
2055          ld        $13,8($5)
2056          dmultu    ($12,$12)           # mul_add_c(a[0],b[0],c1,c2,c3);
2057          ld        $14,2*8($5)
2058          ld        $15,3*8($5)
2059          mflo      ($2,$12,$12)
2060          mfhi      ($3,$12,$12)
2061          sd        $2,0($4)
2062
2063          dmultu    ($12,$13)           # mul_add_c2(a[0],b[1],c2,c3,c1);
2064          mflo      ($24,$12,$13)
2065          mfhi      ($25,$12,$13)
2066          slt       $2,$25,$0
2067          dsll      $25,1
2068           dmultu   ($14,$12)           # mul_add_c2(a[2],b[0],c3,c1,c2);
2069          slt       $6,$24,$0
2070          daddu     $25,$6
2071          dsll      $24,1
2072          daddu     $3,$24
2073          sltu      $1,$3,$24
2074          daddu     $7,$25,$1
2075          sd        $3,8($4)
2076          sltu      $1,$7,$25
2077          daddu     $2,$1
2078          mflo      ($24,$14,$12)
2079          mfhi      ($25,$14,$12)
2080          daddu     $7,$24
2081          sltu      $1,$7,$24
2082           dmultu   ($13,$13)           # forward multiplication
2083          daddu     $7,$24
2084          daddu     $1,$25
2085          sltu      $24,$7,$24
2086          daddu     $2,$1
2087          daddu     $25,$24
2088          sltu      $3,$2,$1
2089          daddu     $2,$25
2090          sltu      $25,$2,$25
2091          daddu     $3,$25
2092          mflo      ($24,$13,$13)
2093          mfhi      ($25,$13,$13)
2094          daddu     $7,$24
2095          sltu      $1,$7,$24
2096           dmultu   ($12,$15)           # mul_add_c2(a[0],b[3],c1,c2,c3);
2097          daddu     $25,$1
2098          daddu     $2,$25
2099          sltu      $1,$2,$25
2100          daddu     $3,$1
2101          sd        $7,2*8($4)
2102          mflo      ($24,$12,$15)
2103          mfhi      ($25,$12,$15)
2104          daddu     $2,$24
2105          sltu      $1,$2,$24
2106           dmultu   ($13,$14)           # forward multiplication
2107          daddu     $2,$24
2108          daddu     $1,$25
2109          sltu      $24,$2,$24
2110          daddu     $3,$1
2111          daddu     $25,$24
2112          sltu      $7,$3,$1
2113          daddu     $3,$25
2114          sltu      $25,$3,$25
2115          daddu     $7,$25
2116          mflo      ($24,$13,$14)
2117          mfhi      ($25,$13,$14)
2118          daddu     $2,$24
2119          sltu      $1,$2,$24
2120           dmultu   ($15,$13)           # forward multiplication
2121          daddu     $2,$24
2122          daddu     $1,$25
2123          sltu      $24,$2,$24
2124          daddu     $3,$1
2125          daddu     $25,$24
2126          sltu      $1,$3,$1
2127          daddu     $3,$25
2128          daddu     $7,$1
2129          sltu      $25,$3,$25
2130          daddu     $7,$25
2131          mflo      ($24,$15,$13)
2132          mfhi      ($25,$15,$13)
2133          sd        $2,3*8($4)
2134          daddu     $3,$24
2135          sltu      $1,$3,$24
2136           dmultu   ($14,$14)           # forward multiplication
2137          daddu     $3,$24
2138          daddu     $1,$25
2139          sltu      $24,$3,$24
2140          daddu     $7,$1
2141          daddu     $25,$24
2142          sltu      $2,$7,$1
2143          daddu     $7,$25
2144          sltu      $25,$7,$25
2145          daddu     $2,$25
2146          mflo      ($24,$14,$14)
2147          mfhi      ($25,$14,$14)
2148          daddu     $3,$24
2149          sltu      $1,$3,$24
2150           dmultu   ($14,$15)           # mul_add_c2(a[2],b[3],c3,c1,c2);
2151          daddu     $25,$1
2152          daddu     $7,$25
2153          sltu      $1,$7,$25
2154          daddu     $2,$1
2155          sd        $3,4*8($4)
2156          mflo      ($24,$14,$15)
2157          mfhi      ($25,$14,$15)
2158          daddu     $7,$24
2159          sltu      $1,$7,$24
2160           dmultu   ($15,$15)           # forward multiplication
2161          daddu     $7,$24
2162          daddu     $1,$25
2163          sltu      $24,$7,$24
2164          daddu     $2,$1
2165          daddu     $25,$24
2166          sltu      $3,$2,$1
2167          daddu     $2,$25
2168          sltu      $25,$2,$25
2169          daddu     $3,$25
2170          mflo      ($24,$15,$15)
2171          mfhi      ($25,$15,$15)
2172          sd        $7,5*8($4)
2173
2174          daddu     $2,$24
2175          sltu      $1,$2,$24
2176          daddu     $25,$1
2177          daddu     $3,$25
2178          sd        $2,6*8($4)
2179          sd        $3,7*8($4)
2180
2181          .set      noreorder
2182          jr        $31
2183          nop
2184.end      bn_sqr_comba4
2185