1/*-
2 * Copyright (c) 2013 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas of 3am Software Foundry.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32RCSID("$NetBSD: strchr_arm.S,v 1.8 2013/08/19 17:50:04 matt Exp $")
33
34#if defined(__thumb__) && !defined(_ARM_ARCH_T2)
35#error Only Thumb2 or ARM supported
36#endif
37
38#ifdef __ARMEL__
39#define   BYTE0     0x000000ff
40#define   BYTE1     0x0000ff00
41#define   BYTE2     0x00ff0000
42#define   BYTE3     0xff000000
43#define   lshi      lsl
44#define   lshis     lsls
45#else
46#define   BYTE0     0xff000000
47#define   BYTE1     0x00ff0000
48#define   BYTE2     0x0000ff00
49#define   BYTE3     0x000000ff
50#define   lshi      lsr
51#define   lshis     lsrs
52#endif
53
54          .text
55ENTRY(strchr)
56          and       r2, r1, #0xff                 /* restrict to byte value */
571:        tst       r0, #3                        /* test for word alignment */
58          beq       .Lpre_main_loop               /*   finally word aligned */
59          ldrb      r3, [r0], #1                  /* load a byte */
60          cmp       r3, r2                        /* is it a match? */
61          beq       2f                            /*   yes, return current ptr - 1 */
62          cmp       r3, #0                        /* no, was it 0? */
63          bne       1b                            /*   no, try next byte */
64          movs      r0, #0                        /*   yes, set return value to NULL */
65          RET                                     /* return */
662:        subs      r0, r0, #1                    /* back up by one */
67          RET                                     /* return */
68.Lpre_main_loop:
69#if defined(_ARM_ARCH_7)
70          movw      ip, #0xfefe                   /* magic constant; 254 in each byte */
71          movt      ip, #0xfefe                   /* magic constant; 254 in each byte */
72#elif defined(_ARM_ARCH_6)
73          mov       ip, #0xfe           /* put 254 in low byte */
74          orr       ip, ip, ip, lsl #8  /* move to next byte */
75          orr       ip, ip, ip, lsl #16 /* move to next halfword */
76#endif /* _ARM_ARCH_6 */
77          orr       r2, r2, r2, lsl #8  /* move to next byte */
78          orr       r2, r2, r2, lsl #16 /* move to next halfword */
79.Lmain_loop:
80          ldr       r3, [r0], #4                  /* load next word */
81#if defined(_ARM_ARCH_6)
82          /*
83           * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
84           * instruction.  For every non-NUL byte, the result for that byte will
85           * become 255.  For NUL, it will be 254.  When we complement the
86           * result, if the result is non-0 then we must have encountered a NUL.
87           */
88          uqadd8    r1, r3, ip                    /* NUL detection happens here */
89          eors      r3, r3, r2                    /* xor to clear each lane */
90          uqadd8    r3, r3, ip                    /* char detection happens here */
91          ands      r3, r3, r1                    /* merge results */
92          mvns      r3, r3                        /* is the complement non-0? */
93          beq       .Lmain_loop                   /*    no, then keep going */
94
95          /*
96           * We've encountered a NUL or a match but we don't know which happened
97           * first.
98           */
99#if defined(__thumb__) && defined(_ARM_ARCH_T2)
100          cbz       r2, .Lfind_match    /* searching for NUL? yes, find it */
101#else
102          cmp       r2, #0                        /* searching for NUL? */
103          beq       .Lfind_match                  /*   yes, find the match */
104#endif
105          mvns      r1, r1                        /* did we encounter a NUL? */
106          beq       .Lfind_match                  /*   no, find the match */
107          bics      r3, r3, r1                    /* clear match for the NUL(s) */
108          beq       .Lnomatch           /*   any left set? if not, no match */
109          lshis     r1, r1, #8                    /* replicate NUL bit to other bytes */
110#ifdef __thumb__
111          itt       ne
112#endif
113          orrne     r1, r1, r1, lshi #8 /* replicate NUL bit to other bytes */
114          orrne     r1, r1, r1, lshi #8 /* replicate NUL bit to other bytes */
115          bics      r3, r3, r1                    /* clear any match bits after the NUL */
116          beq       .Lnomatch           /*   any left set? if not, no match */
117.Lfind_match:
118#ifdef __ARMEL__
119          rev       r3, r3                        /* we want this in BE for the CLZ */
120#endif
121          clz       r3, r3                        /* count how many leading zeros */
122          add       r0, r0, r3, lsr #3  /* divide that by 8 and add to count */
123          subs      r0, r0, #4                    /* compensate for the post-inc */
124          RET
125.Lnomatch:
126          movs      r0, #0
127          RET
128#else
129          /*
130           * No fancy shortcuts so just test each byte lane for a NUL.
131           * (other tests for NULs in a word take more instructions/cycles).
132           */
133          eor       r1, r3, r2                    /* xor .. */
134          tst       r3, #BYTE0                    /* is this byte NUL? */
135          tstne     r1, #BYTE0                    /*   no, does this byte match? */
136          tstne     r3, #BYTE1                    /*   no, is this byte NUL? */
137          tstne     r1, #BYTE1                    /*   no, does this byte match? */
138          tstne     r3, #BYTE2                    /*   no, is this byte NUL? */
139          tstne     r1, #BYTE2                    /*   no, does this byte match? */
140          tstne     r3, #BYTE3                    /*   no, is this byte NUL? */
141          tstne     r1, #BYTE3                    /*   no, does this byte match? */
142          bne       .Lmain_loop
143
144          sub       r2, r0, #4                    /* un post-inc */
145          mov       r0, #0                        /* assume no match */
146
147          tst       r1, #BYTE0                    /* does this byte match? */
148          moveq     r0, r2                        /*   yes, point to it */
149          RETc(eq)                      /*        and return */
150          tst       r3, #BYTE0                    /* is this byte NUL? */
151          RETc(eq)                      /*   yes, return NULL */
152
153          tst       r1, #BYTE1                    /* does this byte match? */
154          addeq     r0, r2, #1                    /*   yes, point to it */
155          RETc(eq)                      /*        and return */
156          tst       r3, #BYTE1                    /* is this byte NUL? */
157          RETc(eq)                      /*   yes, return NULL */
158
159          tst       r1, #BYTE2                    /* does this byte match? */
160          addeq     r0, r2, #2                    /*   yes, point to it */
161          RETc(eq)                      /*        and return */
162          tst       r3, #BYTE2                    /* is this byte NUL? */
163          RETc(eq)                      /*   yes, return NULL */
164
165          tst       r1, #BYTE3                    /* does this byte match? */
166          addeq     r0, r2, #3                    /*   yes, point to it */
167          /*
168           * Since no NULs and no matches this must be the only case left.
169           */
170          RET                                     /* return */
171#endif /* _ARM_ARCH_6 */
172END(strchr)
173