91 |
|
|
92 |
|
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp |
93 |
|
.Lmul_body: |
94 |
+ |
# Some OSes, *cough*-dows, insist on stack being "wired" to |
95 |
+ |
# physical memory in strictly sequential manner, i.e. if stack |
96 |
+ |
# allocation spans two pages, then reference to farmost one can |
97 |
+ |
# be punishable by SEGV. But page walking can do good even on |
98 |
+ |
# other OSes, because it guarantees that villain thread hits |
99 |
+ |
# the guard page before it can make damage to innocent one... |
100 |
+ |
sub %rsp,%r11 |
101 |
+ |
and \$-4096,%r11 |
102 |
+ |
.Lmul_page_walk: |
103 |
+ |
mov (%rsp,%r11),%r10 |
104 |
+ |
sub \$4096,%r11 |
105 |
+ |
.byte 0x66,0x2e # predict non-taken |
106 |
+ |
jnc .Lmul_page_walk |
107 |
+ |
|
108 |
|
mov $bp,%r12 # reassign $bp |
109 |
|
___ |
110 |
|
$bp="%r12"; |
310 |
|
|
311 |
|
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp |
312 |
|
.Lmul4x_body: |
313 |
+ |
sub %rsp,%r11 |
314 |
+ |
and \$-4096,%r11 |
315 |
+ |
.Lmul4x_page_walk: |
316 |
+ |
mov (%rsp,%r11),%r10 |
317 |
+ |
sub \$4096,%r11 |
318 |
+ |
.byte 0x2e # predict non-taken |
319 |
+ |
jnc .Lmul4x_page_walk |
320 |
+ |
|
321 |
|
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp |
322 |
|
mov %rdx,%r12 # reassign $bp |
323 |
|
___ |
729 |
|
.align 16 |
730 |
|
bn_sqr4x_mont: |
731 |
|
.Lsqr4x_enter: |
732 |
+ |
mov %rsp,%rax |
733 |
|
push %rbx |
734 |
|
push %rbp |
735 |
|
push %r12 |
738 |
|
push %r15 |
739 |
|
|
740 |
|
shl \$3,${num}d # convert $num to bytes |
718 |
– |
xor %r10,%r10 |
741 |
|
mov %rsp,%r11 # put aside %rsp |
742 |
< |
sub $num,%r10 # -$num |
742 |
> |
neg $num # -$num |
743 |
|
mov ($n0),$n0 # *n0 |
744 |
< |
lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num) |
744 |
> |
lea -72(%rsp,$num,2),%rsp # alloca(frame+2*$num) |
745 |
|
and \$-1024,%rsp # minimize TLB usage |
746 |
+ |
|
747 |
+ |
sub %rsp,%r11 |
748 |
+ |
and \$-4096,%r11 |
749 |
+ |
.Lsqr4x_page_walk: |
750 |
+ |
mov (%rsp,%r11),%r10 |
751 |
+ |
sub \$4096,%r11 |
752 |
+ |
.byte 0x2e # predict non-taken |
753 |
+ |
jnc .Lsqr4x_page_walk |
754 |
+ |
|
755 |
+ |
mov $num,%r10 |
756 |
+ |
neg $num # restore $num |
757 |
+ |
lea -48(%rax),%r11 # restore saved %rsp |
758 |
|
############################################################## |
759 |
|
# Stack layout |
760 |
|
# |