1/*        $NetBSD: memmove.S,v 1.11 2023/01/19 18:03:03 mlelstv Exp $ */
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33
34#ifndef _BCOPY
35/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
36ENTRY(memmove)
37#else
38/* bcopy = memcpy/memmove with arguments reversed. */
39/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
40ENTRY(bcopy)
41          /* switch the source and destination registers */
42          eor     r0, r1, r0
43          eor     r1, r0, r1
44          eor     r0, r1, r0
45#endif
46          /* Do the buffers overlap? */
47          cmp       r0, r1
48          RETc(eq)            /* Bail now if src/dst are the same */
49          subhs     r3, r0, r1          /* if (dst > src) r3 = dst - src */
50          sublo     r3, r1, r0          /* if (src > dst) r3 = src - dst */
51          cmp       r3, r2              /* if (r3 >= len) we have an overlap */
52          bhs       PLT_SYM(_C_LABEL(memcpy))
53
54          /* Determine copy direction */
55          cmp       r1, r0
56          bcc       .Lmemmove_backwards
57
58          moveq     r0, #0                        /* Quick abort for len=0 */
59          RETc(eq)
60
61          push      {r0, lr}            /* memmove() returns dest addr */
62          subs      r2, r2, #4
63          blo       .Lmemmove_fl4                 /* less than 4 bytes */
64          ands      r12, r0, #3
65          bne       .Lmemmove_fdestul   /* oh unaligned destination addr */
66          ands      r12, r1, #3
67          bne       .Lmemmove_fsrcul    /* oh unaligned source addr */
68
69.Lmemmove_ft8:
70          /* We have aligned source and destination */
71          subs      r2, r2, #8
72          blo       .Lmemmove_fl12                /* less than 12 bytes (4 from above) */
73          subs      r2, r2, #0x14
74          blo       .Lmemmove_fl32                /* less than 32 bytes (12 from above) */
75          push      {r4}                /* borrow r4 */
76
77          /* blat 32 bytes at a time */
78          /* XXX for really big copies perhaps we should use more registers */
79.Lmemmove_floop32:
80          ldmia     r1!, {r3, r4, r12, lr}
81          stmia     r0!, {r3, r4, r12, lr}
82          ldmia     r1!, {r3, r4, r12, lr}
83          stmia     r0!, {r3, r4, r12, lr}
84          subs      r2, r2, #0x20
85          bhs       .Lmemmove_floop32
86
87          cmn       r2, #0x10
88          ldmiahs   r1!, {r3, r4, r12, lr}        /* blat a remaining 16 bytes */
89          stmiahs   r0!, {r3, r4, r12, lr}
90          subhs     r2, r2, #0x10
91          pop       {r4}                /* return r4 */
92
93.Lmemmove_fl32:
94          adds      r2, r2, #0x14
95
96          /* blat 12 bytes at a time */
97.Lmemmove_floop12:
98          ldmiahs   r1!, {r3, r12, lr}
99          stmiahs   r0!, {r3, r12, lr}
100          subshs    r2, r2, #0x0c
101          bhs       .Lmemmove_floop12
102
103.Lmemmove_fl12:
104          adds      r2, r2, #8
105          blo       .Lmemmove_fl4
106
107          subs      r2, r2, #4
108          ldrlo     r3, [r1], #4
109          strlo     r3, [r0], #4
110          ldmiahs   r1!, {r3, r12}
111          stmiahs   r0!, {r3, r12}
112          subhs     r2, r2, #4
113
114.Lmemmove_fl4:
115          /* less than 4 bytes to go */
116          adds      r2, r2, #4
117          popeq     {r0, pc}            /* done */
118
119          /* copy the crud byte at a time */
120          cmp       r2, #2
121          ldrb      r3, [r1], #1
122          strb      r3, [r0], #1
123          ldrbhs    r3, [r1], #1
124          strbhs    r3, [r0], #1
125          ldrbhi    r3, [r1], #1
126          strbhi    r3, [r0], #1
127          pop       {r0, pc}
128
129          /* erg - unaligned destination */
130.Lmemmove_fdestul:
131          rsb       r12, r12, #4
132          cmp       r12, #2
133
134          /* align destination with byte copies */
135          ldrb      r3, [r1], #1
136          strb      r3, [r0], #1
137          ldrbhs    r3, [r1], #1
138          strbhs    r3, [r0], #1
139          ldrbhi    r3, [r1], #1
140          strbhi    r3, [r0], #1
141          subs      r2, r2, r12
142          blo       .Lmemmove_fl4                 /* less the 4 bytes */
143
144          ands      r12, r1, #3
145          beq       .Lmemmove_ft8                 /* we have an aligned source */
146
147          /* erg - unaligned source */
148          /* This is where it gets nasty ... */
149.Lmemmove_fsrcul:
150          bic       r1, r1, #3
151          ldr       lr, [r1], #4
152          cmp       r12, #2
153          bhi       .Lmemmove_fsrcul3
154          beq       .Lmemmove_fsrcul2
155          cmp       r2, #0x0c
156          blo       .Lmemmove_fsrcul1loop4
157          sub       r2, r2, #0x0c
158          push      {r4, r5}
159
160.Lmemmove_fsrcul1loop16:
161#ifdef __ARMEB__
162          mov       r3, lr, lsl #8
163#else
164          mov       r3, lr, lsr #8
165#endif
166          ldmia     r1!, {r4, r5, r12, lr}
167#ifdef __ARMEB__
168          orr       r3, r3, r4, lsr #24
169          mov       r4, r4, lsl #8
170          orr       r4, r4, r5, lsr #24
171          mov       r5, r5, lsl #8
172          orr       r5, r5, r12, lsr #24
173          mov       r12, r12, lsl #8
174          orr       r12, r12, lr, lsr #24
175#else
176          orr       r3, r3, r4, lsl #24
177          mov       r4, r4, lsr #8
178          orr       r4, r4, r5, lsl #24
179          mov       r5, r5, lsr #8
180          orr       r5, r5, r12, lsl #24
181          mov       r12, r12, lsr #8
182          orr       r12, r12, lr, lsl #24
183#endif
184          stmia     r0!, {r3-r5, r12}
185          subs      r2, r2, #0x10
186          bhs       .Lmemmove_fsrcul1loop16
187          pop       {r4, r5}
188          adds      r2, r2, #0x0c
189          blo       .Lmemmove_fsrcul1l4
190
191.Lmemmove_fsrcul1loop4:
192#ifdef __ARMEB__
193          mov       r12, lr, lsl #8
194#else
195          mov       r12, lr, lsr #8
196#endif
197          ldr       lr, [r1], #4
198#ifdef __ARMEB__
199          orr       r12, r12, lr, lsr #24
200#else
201          orr       r12, r12, lr, lsl #24
202#endif
203          str       r12, [r0], #4
204          subs      r2, r2, #4
205          bhs       .Lmemmove_fsrcul1loop4
206
207.Lmemmove_fsrcul1l4:
208          sub       r1, r1, #3
209          b         .Lmemmove_fl4
210
211.Lmemmove_fsrcul2:
212          cmp       r2, #0x0c
213          blo       .Lmemmove_fsrcul2loop4
214          sub       r2, r2, #0x0c
215          push      {r4, r5}
216
217.Lmemmove_fsrcul2loop16:
218#ifdef __ARMEB__
219          mov       r3, lr, lsl #16
220#else
221          mov       r3, lr, lsr #16
222#endif
223          ldmia     r1!, {r4, r5, r12, lr}
224#ifdef __ARMEB__
225          orr       r3, r3, r4, lsr #16
226          mov       r4, r4, lsl #16
227          orr       r4, r4, r5, lsr #16
228          mov       r5, r5, lsl #16
229          orr       r5, r5, r12, lsr #16
230          mov       r12, r12, lsl #16
231          orr       r12, r12, lr, lsr #16
232#else
233          orr       r3, r3, r4, lsl #16
234          mov       r4, r4, lsr #16
235          orr       r4, r4, r5, lsl #16
236          mov       r5, r5, lsr #16
237          orr       r5, r5, r12, lsl #16
238          mov       r12, r12, lsr #16
239          orr       r12, r12, lr, lsl #16
240#endif
241          stmia     r0!, {r3-r5, r12}
242          subs      r2, r2, #0x10
243          bhs       .Lmemmove_fsrcul2loop16
244          pop       {r4, r5}
245          adds      r2, r2, #0x0c
246          blo       .Lmemmove_fsrcul2l4
247
248.Lmemmove_fsrcul2loop4:
249#ifdef __ARMEB__
250          mov       r12, lr, lsl #16
251#else
252          mov       r12, lr, lsr #16
253#endif
254          ldr       lr, [r1], #4
255#ifdef __ARMEB__
256          orr       r12, r12, lr, lsr #16
257#else
258          orr       r12, r12, lr, lsl #16
259#endif
260          str       r12, [r0], #4
261          subs      r2, r2, #4
262          bhs       .Lmemmove_fsrcul2loop4
263
264.Lmemmove_fsrcul2l4:
265          sub       r1, r1, #2
266          b         .Lmemmove_fl4
267
268.Lmemmove_fsrcul3:
269          cmp       r2, #0x0c
270          blo       .Lmemmove_fsrcul3loop4
271          sub       r2, r2, #0x0c
272          push      {r4, r5}
273
274.Lmemmove_fsrcul3loop16:
275#ifdef __ARMEB__
276          mov       r3, lr, lsl #24
277#else
278          mov       r3, lr, lsr #24
279#endif
280          ldmia     r1!, {r4, r5, r12, lr}
281#ifdef __ARMEB__
282          orr       r3, r3, r4, lsr #8
283          mov       r4, r4, lsl #24
284          orr       r4, r4, r5, lsr #8
285          mov       r5, r5, lsl #24
286          orr       r5, r5, r12, lsr #8
287          mov       r12, r12, lsl #24
288          orr       r12, r12, lr, lsr #8
289#else
290          orr       r3, r3, r4, lsl #8
291          mov       r4, r4, lsr #24
292          orr       r4, r4, r5, lsl #8
293          mov       r5, r5, lsr #24
294          orr       r5, r5, r12, lsl #8
295          mov       r12, r12, lsr #24
296          orr       r12, r12, lr, lsl #8
297#endif
298          stmia     r0!, {r3-r5, r12}
299          subs      r2, r2, #0x10
300          bhs       .Lmemmove_fsrcul3loop16
301          pop       {r4, r5}
302          adds      r2, r2, #0x0c
303          blo       .Lmemmove_fsrcul3l4
304
305.Lmemmove_fsrcul3loop4:
306#ifdef __ARMEB__
307          mov       r12, lr, lsl #24
308#else
309          mov       r12, lr, lsr #24
310#endif
311          ldr       lr, [r1], #4
312#ifdef __ARMEB__
313          orr       r12, r12, lr, lsr #8
314#else
315          orr       r12, r12, lr, lsl #8
316#endif
317          str       r12, [r0], #4
318          subs      r2, r2, #4
319          bhs       .Lmemmove_fsrcul3loop4
320
321.Lmemmove_fsrcul3l4:
322          sub       r1, r1, #1
323          b         .Lmemmove_fl4
324
325.Lmemmove_backwards:
326          add       r1, r1, r2
327          add       r0, r0, r2
328          subs      r2, r2, #4
329          blo       .Lmemmove_bl4                 /* less than 4 bytes */
330          ands      r12, r0, #3
331          bne       .Lmemmove_bdestul   /* oh unaligned destination addr */
332          ands      r12, r1, #3
333          bne       .Lmemmove_bsrcul              /* oh unaligned source addr */
334
335.Lmemmove_bt8:
336          /* We have aligned source and destination */
337          subs      r2, r2, #8
338          blo       .Lmemmove_bl12                /* less than 12 bytes (4 from above) */
339          push      {r4, lr}
340          subs      r2, r2, #0x14                 /* less than 32 bytes (12 from above) */
341          blo       .Lmemmove_bl32
342
343          /* blat 32 bytes at a time */
344          /* XXX for really big copies perhaps we should use more registers */
345.Lmemmove_bloop32:
346          ldmdb     r1!, {r3, r4, r12, lr}
347          stmdb     r0!, {r3, r4, r12, lr}
348          ldmdb     r1!, {r3, r4, r12, lr}
349          stmdb     r0!, {r3, r4, r12, lr}
350          subs      r2, r2, #0x20
351          bhs       .Lmemmove_bloop32
352
353.Lmemmove_bl32:
354          cmn       r2, #0x10
355          ldmdbhs   r1!, {r3, r4, r12, lr}        /* blat a remaining 16 bytes */
356          stmdbhs   r0!, {r3, r4, r12, lr}
357          subhs     r2, r2, #0x10
358          adds      r2, r2, #0x14
359          ldmdbhs   r1!, {r3, r12, lr}  /* blat a remaining 12 bytes */
360          stmdbhs   r0!, {r3, r12, lr}
361          subhs     r2, r2, #0x0c
362          pop       {r4, lr}
363
364.Lmemmove_bl12:
365          adds      r2, r2, #8
366          blo       .Lmemmove_bl4
367          subs      r2, r2, #4
368          ldrlo     r3, [r1, #-4]!
369          strlo     r3, [r0, #-4]!
370          ldmdbhs   r1!, {r3, r12}
371          stmdbhs   r0!, {r3, r12}
372          subhs     r2, r2, #4
373
374.Lmemmove_bl4:
375          /* less than 4 bytes to go */
376          adds      r2, r2, #4
377          RETc(eq)
378
379          /* copy the crud byte at a time */
380          cmp       r2, #2
381          ldrb      r3, [r1, #-1]!
382          strb      r3, [r0, #-1]!
383          ldrbhs    r3, [r1, #-1]!
384          strbhs    r3, [r0, #-1]!
385          ldrbhi    r3, [r1, #-1]!
386          strbhi    r3, [r0, #-1]!
387          RET
388
389          /* erg - unaligned destination */
390.Lmemmove_bdestul:
391          cmp       r12, #2
392
393          /* align destination with byte copies */
394          ldrb      r3, [r1, #-1]!
395          strb      r3, [r0, #-1]!
396          ldrbhs    r3, [r1, #-1]!
397          strbhs    r3, [r0, #-1]!
398          ldrbhi    r3, [r1, #-1]!
399          strbhi    r3, [r0, #-1]!
400          subs      r2, r2, r12
401          blo       .Lmemmove_bl4                 /* less than 4 bytes to go */
402          ands      r12, r1, #3
403          beq       .Lmemmove_bt8                 /* we have an aligned source */
404
405          /* erg - unaligned source */
406          /* This is where it gets nasty ... */
407.Lmemmove_bsrcul:
408          bic       r1, r1, #3
409          ldr       r3, [r1, #0]
410          cmp       r12, #2
411          blo       .Lmemmove_bsrcul1
412          beq       .Lmemmove_bsrcul2
413          cmp       r2, #0x0c
414          blo       .Lmemmove_bsrcul3loop4
415          sub       r2, r2, #0x0c
416          push      {r4, r5, lr}
417
418.Lmemmove_bsrcul3loop16:
419#ifdef __ARMEB__
420          mov       lr, r3, lsr #8
421#else
422          mov       lr, r3, lsl #8
423#endif
424          ldmdb     r1!, {r3-r5, r12}
425#ifdef __ARMEB__
426          orr       lr, lr, r12, lsl #24
427          mov       r12, r12, lsr #8
428          orr       r12, r12, r5, lsl #24
429          mov       r5, r5, lsr #8
430          orr       r5, r5, r4, lsl #24
431          mov       r4, r4, lsr #8
432          orr       r4, r4, r3, lsl #24
433#else
434          orr       lr, lr, r12, lsr #24
435          mov       r12, r12, lsl #8
436          orr       r12, r12, r5, lsr #24
437          mov       r5, r5, lsl #8
438          orr       r5, r5, r4, lsr #24
439          mov       r4, r4, lsl #8
440          orr       r4, r4, r3, lsr #24
441#endif
442          stmdb     r0!, {r4, r5, r12, lr}
443          subs      r2, r2, #0x10
444          bhs       .Lmemmove_bsrcul3loop16
445          pop       {r4, r5, lr}
446          adds      r2, r2, #0x0c
447          blo       .Lmemmove_bsrcul3l4
448
449.Lmemmove_bsrcul3loop4:
450#ifdef __ARMEB__
451          mov       r12, r3, lsr #8
452#else
453          mov       r12, r3, lsl #8
454#endif
455          ldr       r3, [r1, #-4]!
456#ifdef __ARMEB__
457          orr       r12, r12, r3, lsl #24
458#else
459          orr       r12, r12, r3, lsr #24
460#endif
461          str       r12, [r0, #-4]!
462          subs      r2, r2, #4
463          bhs       .Lmemmove_bsrcul3loop4
464
465.Lmemmove_bsrcul3l4:
466          add       r1, r1, #3
467          b         .Lmemmove_bl4
468
469.Lmemmove_bsrcul2:
470          cmp       r2, #0x0c
471          blo       .Lmemmove_bsrcul2loop4
472          sub       r2, r2, #0x0c
473          push      {r4, r5, lr}
474
475.Lmemmove_bsrcul2loop16:
476#ifdef __ARMEB__
477          mov       lr, r3, lsr #16
478#else
479          mov       lr, r3, lsl #16
480#endif
481          ldmdb     r1!, {r3-r5, r12}
482#ifdef __ARMEB__
483          orr       lr, lr, r12, lsl #16
484          mov       r12, r12, lsr #16
485          orr       r12, r12, r5, lsl #16
486          mov       r5, r5, lsr #16
487          orr       r5, r5, r4, lsl #16
488          mov       r4, r4, lsr #16
489          orr       r4, r4, r3, lsl #16
490#else
491          orr       lr, lr, r12, lsr #16
492          mov       r12, r12, lsl #16
493          orr       r12, r12, r5, lsr #16
494          mov       r5, r5, lsl #16
495          orr       r5, r5, r4, lsr #16
496          mov       r4, r4, lsl #16
497          orr       r4, r4, r3, lsr #16
498#endif
499          stmdb     r0!, {r4, r5, r12, lr}
500          subs      r2, r2, #0x10
501          bhs       .Lmemmove_bsrcul2loop16
502          pop       {r4, r5, lr}
503          adds      r2, r2, #0x0c
504          blo       .Lmemmove_bsrcul2l4
505
506.Lmemmove_bsrcul2loop4:
507#ifdef __ARMEB__
508          mov       r12, r3, lsr #16
509#else
510          mov       r12, r3, lsl #16
511#endif
512          ldr       r3, [r1, #-4]!
513#ifdef __ARMEB__
514          orr       r12, r12, r3, lsl #16
515#else
516          orr       r12, r12, r3, lsr #16
517#endif
518          str       r12, [r0, #-4]!
519          subs      r2, r2, #4
520          bhs       .Lmemmove_bsrcul2loop4
521
522.Lmemmove_bsrcul2l4:
523          add       r1, r1, #2
524          b         .Lmemmove_bl4
525
526.Lmemmove_bsrcul1:
527          cmp       r2, #0x0c
528          blo       .Lmemmove_bsrcul1loop4
529          sub       r2, r2, #0x0c
530          push      {r4, r5, lr}
531
532.Lmemmove_bsrcul1loop32:
533#ifdef __ARMEB__
534          mov       lr, r3, lsr #24
535#else
536          mov       lr, r3, lsl #24
537#endif
538          ldmdb     r1!, {r3-r5, r12}
539#ifdef __ARMEB__
540          orr       lr, lr, r12, lsl #8
541          mov       r12, r12, lsr #24
542          orr       r12, r12, r5, lsl #8
543          mov       r5, r5, lsr #24
544          orr       r5, r5, r4, lsl #8
545          mov       r4, r4, lsr #24
546          orr       r4, r4, r3, lsl #8
547#else
548          orr       lr, lr, r12, lsr #8
549          mov       r12, r12, lsl #24
550          orr       r12, r12, r5, lsr #8
551          mov       r5, r5, lsl #24
552          orr       r5, r5, r4, lsr #8
553          mov       r4, r4, lsl #24
554          orr       r4, r4, r3, lsr #8
555#endif
556          stmdb     r0!, {r4, r5, r12, lr}
557          subs      r2, r2, #0x10
558          bhs       .Lmemmove_bsrcul1loop32
559          pop       {r4, r5, lr}
560          adds      r2, r2, #0x0c
561          blo       .Lmemmove_bsrcul1l4
562
563.Lmemmove_bsrcul1loop4:
564#ifdef __ARMEB__
565          mov       r12, r3, lsr #24
566#else
567          mov       r12, r3, lsl #24
568#endif
569          ldr       r3, [r1, #-4]!
570#ifdef __ARMEB__
571          orr       r12, r12, r3, lsl #8
572#else
573          orr       r12, r12, r3, lsr #8
574#endif
575          str       r12, [r0, #-4]!
576          subs      r2, r2, #4
577          bhs       .Lmemmove_bsrcul1loop4
578
579.Lmemmove_bsrcul1l4:
580          add       r1, r1, #1
581          b         .Lmemmove_bl4
582#ifndef _BCOPY
583END(memmove)
584#else
585END(bcopy)
586#endif
587
588#if defined(__ARM_EABI__) && !defined(BCOPY) && !defined(_RUMPKERNEL)
589STRONG_ALIAS(__aeabi_memmove, memmove)
590STRONG_ALIAS(__aeabi_memmove4, memmove)
591STRONG_ALIAS(__aeabi_memmove8, memmove)
592#endif
593