1/*        $NetBSD: n_argred.S,v 1.10 2024/05/07 15:15:09 riastradh Exp $        */
2/*
3 * Copyright (c) 1985, 1993
4 *        The Regents of the University of California.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 *        @(#)argred.s        8.1 (Berkeley) 6/4/93
31 */
32
33#include <machine/asm.h>
34
35/*
36 *  libm$argred implements Bob Corbett's argument reduction and
37 *  libm$sincos implements Peter Tang's double precision sin/cos.
38 *
39 *  Note: The two entry points libm$argred and libm$sincos are meant
40 *        to be used only by _sin, _cos and _tan.
41 *
42 * method: true range reduction to [-pi/4,pi/4], P. Tang  &  B. Corbett
43 * S. McDonald, April 4,  1985
44 */
45
46          .hidden   __libm_argred
47ENTRY(__libm_argred, 0)
48/*
49 *  Compare the argument with the largest possible that can
50 *  be reduced by table lookup.  %r3 := |x|  will be used in  table_lookup .
51 */
52          movd      %r0,%r3
53          bgeq      abs1
54          mnegd     %r3,%r3
55abs1:
56          cmpd      %r3,$0d+4.55530934770520019583e+01
57          blss      small_arg
58          jsb       trigred
59          rsb
60small_arg:
61          jsb       table_lookup
62          rsb
63/*
64 *  At this point,
65 *           %r0  contains the quadrant number, 0, 1, 2, or 3;
66 *        %r2/%r1  contains the reduced argument as a D-format number;
67 *           %r3  contains a F-format extension to the reduced argument;
68 *          %r4  contains a  0 or 1  corresponding to a  sin or cos  entry.
69 */
70END(__libm_argred)
71
72          .hidden   __libm_sincos
73ENTRY(__libm_sincos, 0)
74/*
75 *  Compensate for a cosine entry by adding one to the quadrant number.
76 */
77          addl2     %r4,%r0
78/*
79 *  Polyd clobbers  %r5-%r0 ;  save  X  in  %r7/%r6 .
80 *  This can be avoided by rewriting  trigred .
81 */
82          movd      %r1,%r6
83/*
84 *  Likewise, save  alpha  in  %r8 .
85 *  This can be avoided by rewriting  trigred .
86 */
87          movf      %r3,%r8
88/*
89 *  Odd or even quadrant?  cosine if odd, sine otherwise.
90 *  Save  floor(quadrant/2) in  %r9  ; it determines the final sign.
91 */
92          rotl      $-1,%r0,%r9
93          blss      cosine
94sine:
95          muld2     %r1,%r1             # Xsq = X * X
96          cmpw      $0x2480,%r1         # [zl] Xsq > 2^-56?
97          blss      1f                  # [zl] yes, go ahead and do polyd
98          clrq      %r1                 # [zl] work around 11/780 FPA polyd bug
991:
100          polyd     %r1,$7,sin_coef     # Q = P(Xsq) , of deg 7
101          mulf3     $0f3.0,%r8,%r4      # beta = 3 * alpha
102          mulf2     %r0,%r4             # beta = Q * beta
103          addf2     %r8,%r4             # beta = alpha + beta
104          muld2     %r6,%r0             # S(X) = X * Q
105/*        cvtfd     %r4,%r4             ... %r5 = 0 after a polyd. */
106          addd2     %r4,%r0             # S(X) = beta + S(X)
107          addd2     %r6,%r0             # S(X) = X + S(X)
108          jbr       done
109cosine:
110          muld2     %r6,%r6             # Xsq = X * X
111          beql      zero_arg
112          mulf2     %r1,%r8             # beta = X * alpha
113          polyd     %r6,$7,cos_coef     /* Q = P'(Xsq) , of deg 7 */
114          subd3     %r0,%r8,%r0         # beta = beta - Q
115          subw2     $0x80,%r6 # Xsq = Xsq / 2
116          addd2     %r0,%r6             # Xsq = Xsq + beta
117zero_arg:
118          subd3     %r6,$0d1.0,%r0      # C(X) = 1 - Xsq
119done:
120          blbc      %r9,even
121          mnegd     %r0,%r0
122even:
123          rsb
124END(__libm_sincos)
125
126#ifdef __ELF__
127          .section .rodata
128#else
129          .text
130#endif
131          _ALIGN_TEXT
132
133sin_coef:
134          .double   0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8..
135          .double   0d+1.60573519267703489121e-10 # s6 = 2^-21  1.611adaede473c8..
136          .double   0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382..
137          .double   0d+2.75573191800593885716e-06 # s4 = 2^-13  1.71de3a4b884278..
138          .double   0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d..
139          .double   0d+8.33333333333325688985e-03 # s2 = 2^-07  1.11111111110e50
140          .double   0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554
141          .double   0d+0.00000000000000000000e+00 # s0 = 0
142
143cos_coef:
144          .double   0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE..
145          .double   0d+2.08746646574796004700e-09 # s6 = 2^-1D  1.1EE632650350BA..
146          .double   0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E..
147          .double   0d+2.48015872682668025200e-05 # s4 = 2^-10  1.A01A0196B902E8..
148          .double   0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE..
149          .double   0d+4.16666666666664761400e-02 # s2 = 2^-05  1.5555555555539E
150          .double   0d+0.00000000000000000000e+00 # s1 = 0
151          .double   0d+0.00000000000000000000e+00 # s0 = 0
152
153/*
154 *  Multiples of  pi/2  expressed as the sum of three doubles,
155 *
156 *  trailing:       n * pi/2 ,  n = 0, 1, 2, ..., 29
157 *                            trailing[n] ,
158 *
159 *  middle:         n * pi/2 ,  n = 0, 1, 2, ..., 29
160 *                            middle[n]   ,
161 *
162 *  leading:        n * pi/2 ,  n = 0, 1, 2, ..., 29
163 *                            leading[n]  ,
164 *
165 *        where
166 *                  leading[n]  := (n * pi/2)  rounded,
167 *                  middle[n]   := (n * pi/2  -  leading[n])  rounded,
168 *                  trailing[n] := (( n * pi/2 - leading[n]) - middle[n])  rounded .
169 */
170trailing:
171          .double   0d+0.00000000000000000000e+00 #  0 * pi/2  trailing
172          .double   0d+4.33590506506189049611e-35 #  1 * pi/2  trailing
173          .double   0d+8.67181013012378099223e-35 #  2 * pi/2  trailing
174          .double   0d+1.30077151951856714215e-34 #  3 * pi/2  trailing
175          .double   0d+1.73436202602475619845e-34 #  4 * pi/2  trailing
176          .double   0d-1.68390735624352669192e-34 #  5 * pi/2  trailing
177          .double   0d+2.60154303903713428430e-34 #  6 * pi/2  trailing
178          .double   0d-8.16726343231148352150e-35 #  7 * pi/2  trailing
179          .double   0d+3.46872405204951239689e-34 #  8 * pi/2  trailing
180          .double   0d+3.90231455855570147991e-34 #  9 * pi/2  trailing
181          .double   0d-3.36781471248705338384e-34 # 10 * pi/2  trailing
182          .double   0d-1.06379439835298071785e-33 # 11 * pi/2  trailing
183          .double   0d+5.20308607807426856861e-34 # 12 * pi/2  trailing
184          .double   0d+5.63667658458045770509e-34 # 13 * pi/2  trailing
185          .double   0d-1.63345268646229670430e-34 # 14 * pi/2  trailing
186          .double   0d-1.19986217995610764801e-34 # 15 * pi/2  trailing
187          .double   0d+6.93744810409902479378e-34 # 16 * pi/2  trailing
188          .double   0d-8.03640094449267300110e-34 # 17 * pi/2  trailing
189          .double   0d+7.80462911711140295982e-34 # 18 * pi/2  trailing
190          .double   0d-7.16921993148029483506e-34 # 19 * pi/2  trailing
191          .double   0d-6.73562942497410676769e-34 # 20 * pi/2  trailing
192          .double   0d-6.30203891846791677593e-34 # 21 * pi/2  trailing
193          .double   0d-2.12758879670596143570e-33 # 22 * pi/2  trailing
194          .double   0d+2.53800212047402350390e-33 # 23 * pi/2  trailing
195          .double   0d+1.04061721561485371372e-33 # 24 * pi/2  trailing
196          .double   0d+6.11729905311472319056e-32 # 25 * pi/2  trailing
197          .double   0d+1.12733531691609154102e-33 # 26 * pi/2  trailing
198          .double   0d-3.70049587943078297272e-34 # 27 * pi/2  trailing
199          .double   0d-3.26690537292459340860e-34 # 28 * pi/2  trailing
200          .double   0d-1.14812616507957271361e-34 # 29 * pi/2  trailing
201
202middle:
203          .double   0d+0.00000000000000000000e+00 #  0 * pi/2  middle
204          .double   0d+5.72118872610983179676e-18 #  1 * pi/2  middle
205          .double   0d+1.14423774522196635935e-17 #  2 * pi/2  middle
206          .double   0d-3.83475850529283316309e-17 #  3 * pi/2  middle
207          .double   0d+2.28847549044393271871e-17 #  4 * pi/2  middle
208          .double   0d-2.69052076007086676522e-17 #  5 * pi/2  middle
209          .double   0d-7.66951701058566632618e-17 #  6 * pi/2  middle
210          .double   0d-1.54628301484890040587e-17 #  7 * pi/2  middle
211          .double   0d+4.57695098088786543741e-17 #  8 * pi/2  middle
212          .double   0d+1.07001849766246313192e-16 #  9 * pi/2  middle
213          .double   0d-5.38104152014173353044e-17 # 10 * pi/2  middle
214          .double   0d-2.14622680169080983801e-16 # 11 * pi/2  middle
215          .double   0d-1.53390340211713326524e-16 # 12 * pi/2  middle
216          .double   0d-9.21580002543456677056e-17 # 13 * pi/2  middle
217          .double   0d-3.09256602969780081173e-17 # 14 * pi/2  middle
218          .double   0d+3.03066796603896507006e-17 # 15 * pi/2  middle
219          .double   0d+9.15390196177573087482e-17 # 16 * pi/2  middle
220          .double   0d+1.52771359575124969107e-16 # 17 * pi/2  middle
221          .double   0d+2.14003699532492626384e-16 # 18 * pi/2  middle
222          .double   0d-1.68853170360202329427e-16 # 19 * pi/2  middle
223          .double   0d-1.07620830402834670609e-16 # 20 * pi/2  middle
224          .double   0d+3.97700719404595604379e-16 # 21 * pi/2  middle
225          .double   0d-4.29245360338161967602e-16 # 22 * pi/2  middle
226          .double   0d-3.68013020380794313406e-16 # 23 * pi/2  middle
227          .double   0d-3.06780680423426653047e-16 # 24 * pi/2  middle
228          .double   0d-2.45548340466059054318e-16 # 25 * pi/2  middle
229          .double   0d-1.84316000508691335411e-16 # 26 * pi/2  middle
230          .double   0d-1.23083660551323675053e-16 # 27 * pi/2  middle
231          .double   0d-6.18513205939560162346e-17 # 28 * pi/2  middle
232          .double   0d-6.18980636588357585202e-19 # 29 * pi/2  middle
233
234leading:
235          .double   0d+0.00000000000000000000e+00 #  0 * pi/2  leading
236          .double   0d+1.57079632679489661351e+00 #  1 * pi/2  leading
237          .double   0d+3.14159265358979322702e+00 #  2 * pi/2  leading
238          .double   0d+4.71238898038468989604e+00 #  3 * pi/2  leading
239          .double   0d+6.28318530717958645404e+00 #  4 * pi/2  leading
240          .double   0d+7.85398163397448312306e+00 #  5 * pi/2  leading
241          .double   0d+9.42477796076937979208e+00 #  6 * pi/2  leading
242          .double   0d+1.09955742875642763501e+01 #  7 * pi/2  leading
243          .double   0d+1.25663706143591729081e+01 #  8 * pi/2  leading
244          .double   0d+1.41371669411540694661e+01 #  9 * pi/2  leading
245          .double   0d+1.57079632679489662461e+01 # 10 * pi/2  leading
246          .double   0d+1.72787595947438630262e+01 # 11 * pi/2  leading
247          .double   0d+1.88495559215387595842e+01 # 12 * pi/2  leading
248          .double   0d+2.04203522483336561422e+01 # 13 * pi/2  leading
249          .double   0d+2.19911485751285527002e+01 # 14 * pi/2  leading
250          .double   0d+2.35619449019234492582e+01 # 15 * pi/2  leading
251          .double   0d+2.51327412287183458162e+01 # 16 * pi/2  leading
252          .double   0d+2.67035375555132423742e+01 # 17 * pi/2  leading
253          .double   0d+2.82743338823081389322e+01 # 18 * pi/2  leading
254          .double   0d+2.98451302091030359342e+01 # 19 * pi/2  leading
255          .double   0d+3.14159265358979324922e+01 # 20 * pi/2  leading
256          .double   0d+3.29867228626928286062e+01 # 21 * pi/2  leading
257          .double   0d+3.45575191894877260523e+01 # 22 * pi/2  leading
258          .double   0d+3.61283155162826226103e+01 # 23 * pi/2  leading
259          .double   0d+3.76991118430775191683e+01 # 24 * pi/2  leading
260          .double   0d+3.92699081698724157263e+01 # 25 * pi/2  leading
261          .double   0d+4.08407044966673122843e+01 # 26 * pi/2  leading
262          .double   0d+4.24115008234622088423e+01 # 27 * pi/2  leading
263          .double   0d+4.39822971502571054003e+01 # 28 * pi/2  leading
264          .double   0d+4.55530934770520019583e+01 # 29 * pi/2  leading
265
266twoOverPi:
267          .double   0d+6.36619772367581343076e-01
268
269          .text
270          _ALIGN_TEXT
271
272table_lookup:
273          muld3     %r3,twoOverPi,%r0
274          cvtrdl    %r0,%r0                       # n = nearest int to ((2/pi)*|x|) rnded
275          subd2     leading[%r0],%r3              # p = (|x| - leading n*pi/2) exactly
276          subd3     middle[%r0],%r3,%r1 # q = (p - middle  n*pi/2) rounded
277          subd2     %r1,%r3                       # r = (p - q)
278          subd2     middle[%r0],%r3               # r =  r - middle  n*pi/2
279          subd2     trailing[%r0],%r3             # r =  r - trailing n*pi/2  rounded
280/*
281 *  If the original argument was negative,
282 *  negate the reduce argument and
283 *  adjust the octant/quadrant number.
284 */
285          tstw      4(%ap)
286          bgeq      abs2
287          mnegf     %r1,%r1
288          mnegf     %r3,%r3
289/*        subb3     %r0,$8,%r0          ...used for  pi/4  reduction -S.McD */
290          subb3     %r0,$4,%r0
291abs2:
292/*
293 *  Clear all unneeded octant/quadrant bits.
294 */
295/*        bicb2     $0xf8,%r0 ...used for  pi/4  reduction -S.McD */
296          bicb2     $0xfc,%r0
297          rsb
298/*
299 *                                                          p.0
300 */
301#ifdef __ELF__
302          .section .rodata
303#else
304          .text
305#endif
306          _ALIGN_TEXT
307/*
308 * Only 256 (actually 225) bits of 2/pi are needed for VAX double
309 * precision; this was determined by enumerating all the nearest
310 * machine integer multiples of pi/2 using continued fractions.
311 * (8a8d3673775b7ff7 required the most bits.)               -S.McD
312 */
313          .long     0
314          .long     0
315          .long     0xaef1586d
316          .long     0x9458eaf7
317          .long     0x10e4107f
318          .long     0xd8a5664f
319          .long     0x4d377036
320          .long     0x09d5f47d
321          .long     0x91054a7f
322          .long     0xbe60db93
323bits2opi:
324          .long     0x00000028
325          .long     0
326/*
327 *  Note: wherever you see the word `octant', read `quadrant'.
328 *  Currently this code is set up for  pi/2  argument reduction.
329 *  By uncommenting/commenting the appropriate lines, it will
330 *  also serve as a  pi/4  argument reduction code.
331 */
332          .text
333
334/*                                                          p.1
335 *  Trigred  preforms argument reduction
336 *  for the trigonometric functions.  It
337 *  takes one input argument, a D-format
338 *  number in  %r1/%r0 .  The magnitude of
339 *  the input argument must be greater
340 *  than or equal to  1/2 .  Trigred produces
341 *  three results:  the number of the octant
342 *  occupied by the argument, the reduced
343 *  argument, and an extension of the
344 *  reduced argument.  The octant number is
345 *  returned in  %r0 .  The reduced argument
346 *  is returned as a D-format number in
347 *  %r2/%r1 .  An 8 bit extension of the
348 *  reduced argument is returned as an
349 *  F-format number in %r3.
350 *                                                          p.2
351 */
352trigred:
353/*
354 *  Save the sign of the input argument.
355 */
356          movw      %r0,-(%sp)
357/*
358 *  Extract the exponent field.
359 */
360          extzv     $7,$7,%r0,%r2
361/*
362 *  Convert the fraction part of the input
363 *  argument into a quadword integer.
364 */
365          bicw2     $0xff80,%r0
366          bisb2     $0x80,%r0 # -S.McD
367          rotl      $16,%r0,%r0
368          rotl      $16,%r1,%r1
369/*
370 *  If  %r1  is negative, add  1  to  %r0 .  This
371 *  adjustment is made so that the two's
372 *  complement multiplications done later
373 *  will produce unsigned results.
374 */
375          bgeq      posmid
376          incl      %r0
377posmid:
378/*                                                          p.3
379 *
380 *  Set  %r3  to the address of the first quadword
381 *  used to obtain the needed portion of  2/pi .
382 *  The address is longword aligned to ensure
383 *  efficient access.
384 */
385          ashl      $-3,%r2,%r3
386          bicb2     $3,%r3
387          mnegl     %r3,%r3
388          movab     bits2opi[%r3],%r3
389/*
390 *  Set  %r2  to the size of the shift needed to
391 *  obtain the correct portion of  2/pi .
392 */
393          bicb2     $0xe0,%r2
394/*                                                          p.4
395 *
396 *  Move the needed  128  bits of  2/pi  into
397 *  %r11 - %r8 .  Adjust the numbers to allow
398 *  for unsigned multiplication.
399 */
400          ashq      %r2,(%r3),%r10
401
402          subl2     $4,%r3
403          ashq      %r2,(%r3),%r9
404          bgeq      signoff1
405          incl      %r11
406signoff1:
407          subl2     $4,%r3
408          ashq      %r2,(%r3),%r8
409          bgeq      signoff2
410          incl      %r10
411signoff2:
412          subl2     $4,%r3
413          ashq      %r2,(%r3),%r7
414          bgeq      signoff3
415          incl      %r9
416signoff3:
417/*                                                          p.5
418 *
419 *  Multiply the contents of  %r0/%r1  by the
420 *  slice of  2/pi  in  %r11 - %r8 .
421 */
422          emul      %r0,%r8,$0,%r4
423          emul      %r0,%r9,%r5,%r5
424          emul      %r0,%r10,%r6,%r6
425
426          emul      %r1,%r8,$0,%r7
427          emul      %r1,%r9,%r8,%r8
428          emul      %r1,%r10,%r9,%r9
429          emul      %r1,%r11,%r10,%r10
430
431          addl2     %r4,%r8
432          adwc      %r5,%r9
433          adwc      %r6,%r10
434/*                                                          p.6
435 *
436 *  If there are more than five leading zeros
437 *  after the first two quotient bits or if there
438 *  are more than five leading ones after the first
439 *  two quotient bits, generate more fraction bits.
440 *  Otherwise, branch to code to produce the result.
441 */
442          bicl3     $0xc1ffffff,%r10,%r4
443          beql      more1
444          cmpl      $0x3e000000,%r4
445          bneq      result
446more1:
447/*                                                          p.7
448 *
449 *  generate another  32  result bits.
450 */
451          subl2     $4,%r3
452          ashq      %r2,(%r3),%r5
453          bgeq      signoff4
454
455          emul      %r1,%r6,$0,%r4
456          addl2     %r1,%r5
457          emul      %r0,%r6,%r5,%r5
458          addl2     %r0,%r6
459          jbr       addbits1
460
461signoff4:
462          emul      %r1,%r6,$0,%r4
463          emul      %r0,%r6,%r5,%r5
464
465addbits1:
466          addl2     %r5,%r7
467          adwc      %r6,%r8
468          adwc      $0,%r9
469          adwc      $0,%r10
470/*                                                          p.8
471 *
472 *  Check for massive cancellation.
473 */
474          bicl3     $0xc0000000,%r10,%r6
475/*        bneq      more2                         -S.McD  Test was backwards */
476          beql      more2
477          cmpl      $0x3fffffff,%r6
478          bneq      result
479more2:
480/*                                                          p.9
481 *
482 *  If massive cancellation has occurred,
483 *  generate another  24  result bits.
484 *  Testing has shown there will always be
485 *  enough bits after this point.
486 */
487          subl2     $4,%r3
488          ashq      %r2,(%r3),%r5
489          bgeq      signoff5
490
491          emul      %r0,%r6,%r4,%r5
492          addl2     %r0,%r6
493          jbr       addbits2
494
495signoff5:
496          emul      %r0,%r6,%r4,%r5
497
498addbits2:
499          addl2     %r6,%r7
500          adwc      $0,%r8
501          adwc      $0,%r9
502          adwc      $0,%r10
503/*                                                          p.10
504 *
505 *  The following code produces the reduced
506 *  argument from the product bits contained
507 *  in  %r10 - %r7 .
508 */
509result:
510/*
511 *  Extract the octant number from  %r10 .
512 */
513/*        extzv     $29,$3,%r10,%r0     ...used for  pi/4  reduction -S.McD */
514          extzv     $30,$2,%r10,%r0
515/*
516 *  Clear the octant bits in  %r10 .
517 */
518/*        bicl2     $0xe0000000,%r10    ...used for  pi/4  reduction -S.McD */
519          bicl2     $0xc0000000,%r10
520/*
521 *  Zero the sign flag.
522 */
523          clrl      %r5
524/*                                                          p.11
525 *
526 *  Check to see if the fraction is greater than
527 *  or equal to one-half.  If it is, add one
528 *  to the octant number, set the sign flag
529 *  on, and replace the fraction with  1 minus
530 *  the fraction.
531 */
532/*        bitl      $0x10000000,%r10              ...used for  pi/4  reduction -S.McD */
533          bitl      $0x20000000,%r10
534          beql      small
535          incl      %r0
536          incl      %r5
537/*        subl3     %r10,$0x1fffffff,%r10         ...used for  pi/4  reduction -S.McD */
538          subl3     %r10,$0x3fffffff,%r10
539          mcoml     %r9,%r9
540          mcoml     %r8,%r8
541          mcoml     %r7,%r7
542small:
543/*                                                          p.12
544 *
545 *  Test whether the first  29  bits of the ...used for  pi/4  reduction -S.McD
546 *  Test whether the first  30  bits of the
547 *  fraction are zero.
548 */
549          tstl      %r10
550          beql      tiny
551/*
552 *  Find the position of the first one bit in  %r10 .
553 */
554          cvtld     %r10,%r1
555          extzv     $7,$7,%r1,%r1
556/*
557 *  Compute the size of the shift needed.
558 */
559          subl3     %r1,$32,%r6
560/*
561 *  Shift up the high order  64  bits of the
562 *  product.
563 */
564          ashq      %r6,%r9,%r10
565          ashq      %r6,%r8,%r9
566          jbr       mult
567/*                                                          p.13
568 *
569 *  Test to see if the sign bit of  %r9  is on.
570 */
571tiny:
572          tstl      %r9
573          bgeq      tinier
574/*
575 *  If it is, shift the product bits up  32  bits.
576 */
577          movl      $32,%r6
578          movq      %r8,%r10
579          tstl      %r10
580          jbr       mult
581/*                                                          p.14
582 *
583 *  Test whether  %r9  is zero.  It is probably
584 *  impossible for both  %r10  and  %r9  to be
585 *  zero, but until proven to be so, the test
586 *  must be made.
587 */
588tinier:
589          beql      zero
590/*
591 *  Find the position of the first one bit in  %r9 .
592 */
593          cvtld     %r9,%r1
594          extzv     $7,$7,%r1,%r1
595/*
596 *  Compute the size of the shift needed.
597 */
598          subl3     %r1,$32,%r1
599          addl3     $32,%r1,%r6
600/*
601 *  Shift up the high order  64  bits of the
602 *  product.
603 */
604          ashq      %r1,%r8,%r10
605          ashq      %r1,%r7,%r9
606          jbr       mult
607/*                                                          p.15
608 *
609 *  The following code sets the reduced
610 *  argument to zero.
611 */
612zero:
613          clrl      %r1
614          clrl      %r2
615          clrl      %r3
616          jbr       return
617/*                                                          p.16
618 *
619 *  At this point,  %r0  contains the octant number,
620 *  %r6  indicates the number of bits the fraction
621 *  has been shifted,  %r5  indicates the sign of
622 *  the fraction,  %r11/%r10  contain the high order
623 *  64  bits of the fraction, and the condition
624 *  codes indicate where the sign bit of  %r10
625 *  is on.  The following code multiplies the
626 *  fraction by  pi/2 .
627 */
628mult:
629/*
630 *  Save  %r11/%r10  in  %r4/%r1 .                -S.McD
631 */
632          movl      %r11,%r4
633          movl      %r10,%r1
634/*
635 *  If the sign bit of  %r10  is on, add  1  to  %r11 .
636 */
637          bgeq      signoff6
638          incl      %r11
639signoff6:
640/*                                                          p.17
641 *
642 *  Move  pi/2  into  %r3/%r2 .
643 */
644          movq      $0xc90fdaa22168c235,%r2
645/*
646 *  Multiply the fraction by the portion of  pi/2
647 *  in  %r2 .
648 */
649          emul      %r2,%r10,$0,%r7
650          emul      %r2,%r11,%r8,%r7
651/*
652 *  Multiply the fraction by the portion of  pi/2
653 *  in  %r3 .
654 */
655          emul      %r3,%r10,$0,%r9
656          emul      %r3,%r11,%r10,%r10
657/*
658 *  Add the product bits together.
659 */
660          addl2     %r7,%r9
661          adwc      %r8,%r10
662          adwc      $0,%r11
663/*
664 *  Compensate for not sign extending  %r8  above.-S.McD
665 */
666          tstl      %r8
667          bgeq      signoff6a
668          decl      %r11
669signoff6a:
670/*
671 *  Compensate for  %r11/%r10  being unsigned.    -S.McD
672 */
673          addl2     %r2,%r10
674          adwc      %r3,%r11
675/*
676 *  Compensate for  %r3/%r2  being unsigned.      -S.McD
677 */
678          addl2     %r1,%r10
679          adwc      %r4,%r11
680/*                                                          p.18
681 *
682 *  If the sign bit of  %r11  is zero, shift the
683 *  product bits up one bit and increment  %r6 .
684 */
685          blss      signon
686          incl      %r6
687          ashq      $1,%r10,%r10
688          tstl      %r9
689          bgeq      signoff7
690          incl      %r10
691signoff7:
692signon:
693/*                                                          p.19
694 *
695 *  Shift the  56  most significant product
696 *  bits into  %r9/%r8 .  The sign extension
697 *  will be handled later.
698 */
699          ashq      $-8,%r10,%r8
700/*
701 *  Convert the low order  8  bits of  %r10
702 *  into an F-format number.
703 */
704          cvtbf     %r10,%r3
705/*
706 *  If the result of the conversion was
707 *  negative, add  1  to  %r9/%r8 .
708 */
709          bgeq      chop
710          incl      %r8
711          adwc      $0,%r9
712/*
713 *  If  %r9  is now zero, branch to special
714 *  code to handle that possibility.
715 */
716          beql      carryout
717chop:
718/*                                                          p.20
719 *
720 *  Convert the number in  %r9/%r8  into
721 *  D-format number in  %r2/%r1 .
722 */
723          rotl      $16,%r8,%r2
724          rotl      $16,%r9,%r1
725/*
726 *  Set the exponent field to the appropriate
727 *  value.  Note that the extra bits created by
728 *  sign extension are now eliminated.
729 */
730          subw3     %r6,$131,%r6
731          insv      %r6,$7,$9,%r1
732/*
733 *  Set the exponent field of the F-format
734 *  number in  %r3  to the appropriate value.
735 */
736          tstf      %r3
737          beql      return
738/*        extzv     $7,$8,%r3,%r4       -S.McD */
739          extzv     $7,$7,%r3,%r4
740          addw2     %r4,%r6
741/*        subw2     $217,%r6            -S.McD */
742          subw2     $64,%r6
743          insv      %r6,$7,$8,%r3
744          jbr       return
745/*                                                          p.21
746 *
747 *  The following code generates the appropriate
748 *  result for the unlikely possibility that
749 *  rounding the number in  %r9/%r8  resulted in
750 *  a carry out.
751 */
752carryout:
753          clrl      %r1
754          clrl      %r2
755          subw3     %r6,$132,%r6
756          insv      %r6,$7,$9,%r1
757          tstf      %r3
758          beql      return
759          extzv     $7,$8,%r3,%r4
760          addw2     %r4,%r6
761          subw2     $218,%r6
762          insv      %r6,$7,$8,%r3
763/*                                                          p.22
764 *
765 *  The following code makes an needed
766 *  adjustments to the signs of the
767 *  results or to the octant number, and
768 *  then returns.
769 */
770return:
771/*
772 *  Test if the fraction was greater than or
773 *  equal to  1/2 .  If so, negate the reduced
774 *  argument.
775 */
776          blbc      %r5,signoff8
777          mnegf     %r1,%r1
778          mnegf     %r3,%r3
779signoff8:
780/*                                                          p.23
781 *
782 *  If the original argument was negative,
783 *  negate the reduce argument and
784 *  adjust the octant number.
785 */
786          tstw      (%sp)+
787          bgeq      signoff9
788          mnegf     %r1,%r1
789          mnegf     %r3,%r3
790/*        subb3     %r0,$8,%r0          ...used for  pi/4  reduction -S.McD */
791          subb3     %r0,$4,%r0
792signoff9:
793/*
794 *  Clear all unneeded octant bits.
795 *
796 *        bicb2     $0xf8,%r0 ...used for  pi/4  reduction -S.McD */
797          bicb2     $0xfc,%r0
798/*
799 *  Return.
800 */
801          rsb
802