1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9          RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:16:34 jakllsch Exp $")
10#endif
11
12ENTRY(strcat)
13          movq      %rdi,%rax
14          movabsq   $0x0101010101010101,%r8
15          movabsq   $0x8080808080808080,%r9
16
17          /*
18           * Align destination to word boundary.
19           * Consider unrolling loop?
20           */
21.Lscan:
22.Lscan_align:
23          testb     $7,%dil
24          je        .Lscan_aligned
25          cmpb      $0,(%rdi)
26          je        .Lcopy
27          incq      %rdi
28          jmp       .Lscan_align
29
30          _ALIGN_TEXT
31.Lscan_aligned:
32.Lscan_loop:
33          movq      (%rdi),%rdx
34          addq      $8,%rdi
35          subq      %r8,%rdx
36          testq     %r9,%rdx
37          je        .Lscan_loop
38
39          /*
40           * In rare cases, the above loop may exit prematurely. We must
41           * return to the loop if none of the bytes in the word equal 0.
42           */
43
44          cmpb      $0,-8(%rdi)         /* 1st byte == 0? */
45          jne       1f
46          subq      $8,%rdi
47          jmp       .Lcopy
48
491:        cmpb      $0,-7(%rdi)         /* 2nd byte == 0? */
50          jne       1f
51          subq      $7,%rdi
52          jmp       .Lcopy
53
541:        cmpb      $0,-6(%rdi)         /* 3rd byte == 0? */
55          jne       1f
56          subq      $6,%rdi
57          jmp       .Lcopy
58
591:        cmpb      $0,-5(%rdi)         /* 4th byte == 0? */
60          jne       1f
61          subq      $5,%rdi
62          jmp       .Lcopy
63
641:        cmpb      $0,-4(%rdi)         /* 5th byte == 0? */
65          jne       1f
66          subq      $4,%rdi
67          jmp       .Lcopy
68
691:        cmpb      $0,-3(%rdi)         /* 6th byte == 0? */
70          jne       1f
71          subq      $3,%rdi
72          jmp       .Lcopy
73
741:        cmpb      $0,-2(%rdi)         /* 7th byte == 0? */
75          jne       1f
76          subq      $2,%rdi
77          jmp       .Lcopy
78
791:        cmpb      $0,-1(%rdi)         /* 8th byte == 0? */
80          jne       .Lscan_loop
81          subq      $1,%rdi
82
83          /*
84           * Align source to a word boundary.
85           * Consider unrolling loop?
86           */
87.Lcopy:
88.Lcopy_align:
89          testb     $7,%sil
90          je        .Lcopy_aligned
91          movb      (%rsi),%dl
92          incq      %rsi
93          movb      %dl,(%rdi)
94          incq      %rdi
95          testb     %dl,%dl
96          jne       .Lcopy_align
97          ret
98
99          _ALIGN_TEXT
100.Lcopy_loop:
101          movq      %rdx,(%rdi)
102          addq      $8,%rdi
103.Lcopy_aligned:
104          movq      (%rsi),%rdx
105          movq      %rdx,%rcx
106          addq      $8,%rsi
107          subq      %r8,%rcx
108          testq     %r9,%rcx
109          je        .Lcopy_loop
110
111          /*
112           * In rare cases, the above loop may exit prematurely. We must
113           * return to the loop if none of the bytes in the word equal 0.
114           */
115
116          movb      %dl,(%rdi)
117          incq      %rdi
118          testb     %dl,%dl             /* 1st byte == 0? */
119          je        .Ldone
120
121          shrq      $8,%rdx
122          movb      %dl,(%rdi)
123          incq      %rdi
124          testb     %dl,%dl             /* 2nd byte == 0? */
125          je        .Ldone
126
127          shrq      $8,%rdx
128          movb      %dl,(%rdi)
129          incq      %rdi
130          testb     %dl,%dl             /* 3rd byte == 0? */
131          je        .Ldone
132
133          shrq      $8,%rdx
134          movb      %dl,(%rdi)
135          incq      %rdi
136          testb     %dl,%dl             /* 4th byte == 0? */
137          je        .Ldone
138
139          shrq      $8,%rdx
140          movb      %dl,(%rdi)
141          incq      %rdi
142          testb     %dl,%dl             /* 5th byte == 0? */
143          je        .Ldone
144
145          shrq      $8,%rdx
146          movb      %dl,(%rdi)
147          incq      %rdi
148          testb     %dl,%dl             /* 6th byte == 0? */
149          je        .Ldone
150
151          shrq      $8,%rdx
152          movb      %dl,(%rdi)
153          incq      %rdi
154          testb     %dl,%dl             /* 7th byte == 0? */
155          je        .Ldone
156
157          shrq      $8,%rdx
158          movb      %dl,(%rdi)
159          incq      %rdi
160          testb     %dl,%dl             /* 8th byte == 0? */
161          jne       .Lcopy_aligned
162
163.Ldone:
164          ret
165END(strcat)
166