1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9          RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
10#endif
11
12ENTRY(strcat)
13          pushl     %ebx
14          movl      8(%esp),%ecx
15          movl      12(%esp),%eax
16
17          /*
18           * Align destination to word boundary.
19           * Consider unrolling loop?
20           */
21.Lscan:
22.Lscan_align:
23          testb     $3,%cl
24          je        .Lscan_aligned
25          cmpb      $0,(%ecx)
26          je        .Lcopy
27          incl      %ecx
28          jmp       .Lscan_align
29
30          _ALIGN_TEXT
31.Lscan_aligned:
32.Lscan_loop:
33          movl      (%ecx),%ebx
34          addl      $4,%ecx
35          leal      -0x01010101(%ebx),%edx
36          testl     $0x80808080,%edx
37          je        .Lscan_loop
38
39          /*
40           * In rare cases, the above loop may exit prematurely. We must
41           * return to the loop if none of the bytes in the word equal 0.
42           */
43
44          /*
45           * The optimal code for determining whether each byte is zero
46           * differs by processor.  This space-optimized code should be
47           * acceptable on all, especially since we don't expect it to
48           * be run frequently,
49           */
50
51          testb     %bl,%bl             /* 1st byte == 0? */
52          jne       1f
53          subl      $4,%ecx
54          jmp       .Lcopy
55
561:        testb     %bh,%bh             /* 2nd byte == 0? */
57          jne       1f
58          subl      $3,%ecx
59          jmp       .Lcopy
60
611:        shrl      $16,%ebx
62          testb     %bl,%bl             /* 3rd byte == 0? */
63          jne       1f
64          subl      $2,%ecx
65          jmp       .Lcopy
66
671:        testb     %bh,%bh             /* 4th byte == 0? */
68          jne       .Lscan_loop
69          subl      $1,%ecx
70
71          /*
72           * Align source to a word boundary.
73           * Consider unrolling loop?
74           */
75.Lcopy:
76.Lcopy_align:
77          testl     $3,%eax
78          je        .Lcopy_aligned
79          movb      (%eax),%bl
80          incl      %eax
81          movb      %bl,(%ecx)
82          incl      %ecx
83          testb     %bl,%bl
84          jne       .Lcopy_align
85          jmp       .Ldone
86
87          _ALIGN_TEXT
88.Lcopy_loop:
89          movl      %ebx,(%ecx)
90          addl      $4,%ecx
91.Lcopy_aligned:
92          movl      (%eax),%ebx
93          addl      $4,%eax
94          leal      -0x01010101(%ebx),%edx
95          testl     $0x80808080,%edx
96          je        .Lcopy_loop
97
98          /*
99           * In rare cases, the above loop may exit prematurely. We must
100           * return to the loop if none of the bytes in the word equal 0.
101           */
102
103          movb      %bl,(%ecx)
104          incl      %ecx
105          testb     %bl,%bl
106          je        .Ldone
107
108          movb      %bh,(%ecx)
109          incl      %ecx
110          testb     %bh,%bh
111          je        .Ldone
112
113          shrl      $16,%ebx
114          movb      %bl,(%ecx)
115          incl      %ecx
116          testb     %bl,%bl
117          je        .Ldone
118
119          movb      %bh,(%ecx)
120          incl      %ecx
121          testb     %bh,%bh
122          jne       .Lcopy_aligned
123
124.Ldone:
125          movl      8(%esp),%eax
126          popl      %ebx
127          ret
128END(strcat)
129