1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2022 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Work around tree-optimization/91825.  */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "optabs.h"
35 #include "expmed.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46 
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51 
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53                                               unsigned HOST_WIDE_INT,
54                                               unsigned HOST_WIDE_INT,
55                                               poly_uint64, poly_uint64,
56                                               machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58                                            unsigned HOST_WIDE_INT,
59                                            unsigned HOST_WIDE_INT,
60                                            poly_uint64, poly_uint64,
61                                            rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63                                              unsigned HOST_WIDE_INT,
64                                              unsigned HOST_WIDE_INT,
65                                              rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67                                            unsigned HOST_WIDE_INT,
68                                            unsigned HOST_WIDE_INT,
69                                            poly_uint64, poly_uint64,
70                                            rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72                                                unsigned HOST_WIDE_INT,
73                                                unsigned HOST_WIDE_INT, int, rtx,
74                                                machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76                                             unsigned HOST_WIDE_INT,
77                                             unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79                                               unsigned HOST_WIDE_INT,
80                                               unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83                                             unsigned HOST_WIDE_INT,
84                                             unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91    The mask is truncated if necessary to the width of mode MODE.  The
92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
93 
94 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97   return immed_wide_int_const
98     (wi::shifted_mask (bitpos, bitsize, complement,
99                            GET_MODE_PRECISION (mode)), mode);
100 }
101 
102 /* Test whether a value is zero of a power of two.  */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105 
106 struct init_expmed_rtl
107 {
108   rtx reg;
109   rtx plus;
110   rtx neg;
111   rtx mult;
112   rtx sdiv;
113   rtx udiv;
114   rtx sdiv_32;
115   rtx smod_32;
116   rtx wide_mult;
117   rtx wide_lshr;
118   rtx wide_trunc;
119   rtx shift;
120   rtx shift_mult;
121   rtx shift_add;
122   rtx shift_sub0;
123   rtx shift_sub1;
124   rtx zext;
125   rtx trunc;
126 
127   rtx pow2[MAX_BITS_PER_WORD];
128   rtx cint[MAX_BITS_PER_WORD];
129 };
130 
131 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133                           scalar_int_mode from_mode, bool speed)
134 {
135   int to_size, from_size;
136   rtx which;
137 
138   to_size = GET_MODE_PRECISION (to_mode);
139   from_size = GET_MODE_PRECISION (from_mode);
140 
141   /* Most partial integers have a precision less than the "full"
142      integer it requires for storage.  In case one doesn't, for
143      comparison purposes here, reduce the bit size by one in that
144      case.  */
145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146       && pow2p_hwi (to_size))
147     to_size --;
148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149       && pow2p_hwi (from_size))
150     from_size --;
151 
152   /* Assume cost of zero-extend and sign-extend is the same.  */
153   which = (to_size < from_size ? all->trunc : all->zext);
154 
155   PUT_MODE (all->reg, from_mode);
156   set_convert_cost (to_mode, from_mode, speed,
157                         set_src_cost (which, to_mode, speed));
158   /* Restore all->reg's mode.  */
159   PUT_MODE (all->reg, to_mode);
160 }
161 
162 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)163 init_expmed_one_mode (struct init_expmed_rtl *all,
164                           machine_mode mode, int speed)
165 {
166   int m, n, mode_bitsize;
167   machine_mode mode_from;
168 
169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
170 
171   PUT_MODE (all->reg, mode);
172   PUT_MODE (all->plus, mode);
173   PUT_MODE (all->neg, mode);
174   PUT_MODE (all->mult, mode);
175   PUT_MODE (all->sdiv, mode);
176   PUT_MODE (all->udiv, mode);
177   PUT_MODE (all->sdiv_32, mode);
178   PUT_MODE (all->smod_32, mode);
179   PUT_MODE (all->wide_trunc, mode);
180   PUT_MODE (all->shift, mode);
181   PUT_MODE (all->shift_mult, mode);
182   PUT_MODE (all->shift_add, mode);
183   PUT_MODE (all->shift_sub0, mode);
184   PUT_MODE (all->shift_sub1, mode);
185   PUT_MODE (all->zext, mode);
186   PUT_MODE (all->trunc, mode);
187 
188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
193 
194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
195                                              <= 2 * add_cost (speed, mode)));
196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
197                                              <= 4 * add_cost (speed, mode)));
198 
199   set_shift_cost (speed, mode, 0, 0);
200   {
201     int cost = add_cost (speed, mode);
202     set_shiftadd_cost (speed, mode, 0, cost);
203     set_shiftsub0_cost (speed, mode, 0, cost);
204     set_shiftsub1_cost (speed, mode, 0, cost);
205   }
206 
207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
208   for (m = 1; m < n; m++)
209     {
210       XEXP (all->shift, 1) = all->cint[m];
211       XEXP (all->shift_mult, 1) = all->pow2[m];
212 
213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
215                                                                    speed));
216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
217                                                                       speed));
218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
219                                                                       speed));
220     }
221 
222   scalar_int_mode int_mode_to;
223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
224     {
225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
226              mode_from = (machine_mode)(mode_from + 1))
227           init_expmed_one_conv (all, int_mode_to,
228                                     as_a <scalar_int_mode> (mode_from), speed);
229 
230       scalar_int_mode wider_mode;
231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
232             && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
233           {
234             PUT_MODE (all->reg, mode);
235             PUT_MODE (all->zext, wider_mode);
236             PUT_MODE (all->wide_mult, wider_mode);
237             PUT_MODE (all->wide_lshr, wider_mode);
238             XEXP (all->wide_lshr, 1)
239               = gen_int_shift_amount (wider_mode, mode_bitsize);
240 
241             set_mul_widen_cost (speed, wider_mode,
242                                     set_src_cost (all->wide_mult, wider_mode, speed));
243             set_mul_highpart_cost (speed, int_mode_to,
244                                          set_src_cost (all->wide_trunc,
245                                                          int_mode_to, speed));
246           }
247     }
248 }
249 
250 void
init_expmed(void)251 init_expmed (void)
252 {
253   struct init_expmed_rtl all;
254   machine_mode mode = QImode;
255   int m, speed;
256 
257   memset (&all, 0, sizeof all);
258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
259     {
260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
261       all.cint[m] = GEN_INT (m);
262     }
263 
264   /* Avoid using hard regs in ways which may be unsupported.  */
265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
267   all.neg = gen_rtx_NEG (mode, all.reg);
268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
283 
284   for (speed = 0; speed < 2; speed++)
285     {
286       crtl->maybe_hot_insn_p = speed;
287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
288 
289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
290              mode = (machine_mode)(mode + 1))
291           init_expmed_one_mode (&all, mode, speed);
292 
293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
294           for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
295                mode = (machine_mode)(mode + 1))
296             init_expmed_one_mode (&all, mode, speed);
297 
298       if (MIN_MODE_VECTOR_INT != VOIDmode)
299           for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
300                mode = (machine_mode)(mode + 1))
301             init_expmed_one_mode (&all, mode, speed);
302     }
303 
304   if (alg_hash_used_p ())
305     {
306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
308     }
309   else
310     set_alg_hash_used_p (true);
311   default_rtl_profile ();
312 
313   ggc_free (all.trunc);
314   ggc_free (all.shift_sub1);
315   ggc_free (all.shift_sub0);
316   ggc_free (all.shift_add);
317   ggc_free (all.shift_mult);
318   ggc_free (all.shift);
319   ggc_free (all.wide_trunc);
320   ggc_free (all.wide_lshr);
321   ggc_free (all.wide_mult);
322   ggc_free (all.zext);
323   ggc_free (all.smod_32);
324   ggc_free (all.sdiv_32);
325   ggc_free (all.udiv);
326   ggc_free (all.sdiv);
327   ggc_free (all.mult);
328   ggc_free (all.neg);
329   ggc_free (all.plus);
330   ggc_free (all.reg);
331 }
332 
333 /* Return an rtx representing minus the value of X.
334    MODE is the intended mode of the result,
335    useful if X is a CONST_INT.  */
336 
337 rtx
negate_rtx(machine_mode mode,rtx x)338 negate_rtx (machine_mode mode, rtx x)
339 {
340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
341 
342   if (result == 0)
343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
344 
345   return result;
346 }
347 
348 /* Whether reverse storage order is supported on the target.  */
349 static int reverse_storage_order_supported = -1;
350 
351 /* Check whether reverse storage order is supported on the target.  */
352 
353 static void
check_reverse_storage_order_support(void)354 check_reverse_storage_order_support (void)
355 {
356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
357     {
358       reverse_storage_order_supported = 0;
359       sorry ("reverse scalar storage order");
360     }
361   else
362     reverse_storage_order_supported = 1;
363 }
364 
365 /* Whether reverse FP storage order is supported on the target.  */
366 static int reverse_float_storage_order_supported = -1;
367 
368 /* Check whether reverse FP storage order is supported on the target.  */
369 
370 static void
check_reverse_float_storage_order_support(void)371 check_reverse_float_storage_order_support (void)
372 {
373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
374     {
375       reverse_float_storage_order_supported = 0;
376       sorry ("reverse floating-point scalar storage order");
377     }
378   else
379     reverse_float_storage_order_supported = 1;
380 }
381 
382 /* Return an rtx representing value of X with reverse storage order.
383    MODE is the intended mode of the result,
384    useful if X is a CONST_INT.  */
385 
386 rtx
flip_storage_order(machine_mode mode,rtx x)387 flip_storage_order (machine_mode mode, rtx x)
388 {
389   scalar_int_mode int_mode;
390   rtx result;
391 
392   if (mode == QImode)
393     return x;
394 
395   if (COMPLEX_MODE_P (mode))
396     {
397       rtx real = read_complex_part (x, false);
398       rtx imag = read_complex_part (x, true);
399 
400       real = flip_storage_order (GET_MODE_INNER (mode), real);
401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
402 
403       return gen_rtx_CONCAT (mode, real, imag);
404     }
405 
406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
407     check_reverse_storage_order_support ();
408 
409   if (!is_a <scalar_int_mode> (mode, &int_mode))
410     {
411       if (FLOAT_MODE_P (mode)
412             && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
413           check_reverse_float_storage_order_support ();
414 
415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
416             || !targetm.scalar_mode_supported_p (int_mode))
417           {
418             sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
419             return x;
420           }
421       x = gen_lowpart (int_mode, x);
422     }
423 
424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
425   if (result == 0)
426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
427 
428   if (int_mode != mode)
429     result = gen_lowpart (mode, result);
430 
431   return result;
432 }
433 
434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
435    first unit of mode MODE that contains a bitfield of size BITSIZE at
436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
438    of the field within the new memory.  */
439 
440 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
442                           unsigned HOST_WIDE_INT bitsize,
443                           unsigned HOST_WIDE_INT bitnum,
444                           unsigned HOST_WIDE_INT *new_bitnum)
445 {
446   scalar_int_mode imode;
447   if (mode.exists (&imode))
448     {
449       unsigned int unit = GET_MODE_BITSIZE (imode);
450       *new_bitnum = bitnum % unit;
451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
452       return adjust_bitfield_address (mem, imode, offset);
453     }
454   else
455     {
456       *new_bitnum = bitnum % BITS_PER_UNIT;
457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
459                                   / BITS_PER_UNIT);
460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
461     }
462 }
463 
464 /* The caller wants to perform insertion or extraction PATTERN on a
465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
466    BITREGION_START and BITREGION_END are as for store_bit_field
467    and FIELDMODE is the natural mode of the field.
468 
469    Search for a mode that is compatible with the memory access
470    restrictions and (where applicable) with a register insertion or
471    extraction.  Return the new memory on success, storing the adjusted
472    bit position in *NEW_BITNUM.  Return null otherwise.  */
473 
474 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
476                                     rtx op0, HOST_WIDE_INT bitsize,
477                                     HOST_WIDE_INT bitnum,
478                                     poly_uint64 bitregion_start,
479                                     poly_uint64 bitregion_end,
480                                     machine_mode fieldmode,
481                                     unsigned HOST_WIDE_INT *new_bitnum)
482 {
483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
484                                         bitregion_end, MEM_ALIGN (op0),
485                                         MEM_VOLATILE_P (op0));
486   scalar_int_mode best_mode;
487   if (iter.next_mode (&best_mode))
488     {
489       /* We can use a memory in BEST_MODE.  See whether this is true for
490            any wider modes.  All other things being equal, we prefer to
491            use the widest mode possible because it tends to expose more
492            CSE opportunities.  */
493       if (!iter.prefer_smaller_modes ())
494           {
495             /* Limit the search to the mode required by the corresponding
496                register insertion or extraction instruction, if any.  */
497             scalar_int_mode limit_mode = word_mode;
498             extraction_insn insn;
499             if (get_best_reg_extraction_insn (&insn, pattern,
500                                                       GET_MODE_BITSIZE (best_mode),
501                                                       fieldmode))
502               limit_mode = insn.field_mode;
503 
504             scalar_int_mode wider_mode;
505             while (iter.next_mode (&wider_mode)
506                      && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
507               best_mode = wider_mode;
508           }
509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
510                                            new_bitnum);
511     }
512   return NULL_RTX;
513 }
514 
515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
517    offset is then BITNUM / BITS_PER_UNIT.  */
518 
519 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
521                          machine_mode struct_mode)
522 {
523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
524   if (BYTES_BIG_ENDIAN)
525     return (multiple_p (bitnum, BITS_PER_UNIT)
526               && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
527                     || multiple_p (bitnum + bitsize,
528                                      regsize * BITS_PER_UNIT)));
529   else
530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
531 }
532 
533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
535    Return false if the access would touch memory outside the range
536    BITREGION_START to BITREGION_END for conformance to the C++ memory
537    model.  */
538 
539 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
541                                   unsigned HOST_WIDE_INT bitnum,
542                                   scalar_int_mode fieldmode,
543                                   poly_uint64 bitregion_start,
544                                   poly_uint64 bitregion_end)
545 {
546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
547 
548   /* -fstrict-volatile-bitfields must be enabled and we must have a
549      volatile MEM.  */
550   if (!MEM_P (op0)
551       || !MEM_VOLATILE_P (op0)
552       || flag_strict_volatile_bitfields <= 0)
553     return false;
554 
555   /* The bit size must not be larger than the field mode, and
556      the field mode must not be larger than a word.  */
557   if (bitsize > modesize || modesize > BITS_PER_WORD)
558     return false;
559 
560   /* Check for cases of unaligned fields that must be split.  */
561   if (bitnum % modesize + bitsize > modesize)
562     return false;
563 
564   /* The memory must be sufficiently aligned for a MODESIZE access.
565      This condition guarantees, that the memory access will not
566      touch anything after the end of the structure.  */
567   if (MEM_ALIGN (op0) < modesize)
568     return false;
569 
570   /* Check for cases where the C++ memory model applies.  */
571   if (maybe_ne (bitregion_end, 0U)
572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
573             || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
574                            bitregion_end)))
575     return false;
576 
577   return true;
578 }
579 
580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
581    bit number BITNUM can be treated as a simple value of mode MODE.
582    Store the byte offset in *BYTENUM if so.  */
583 
584 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
586                            machine_mode mode, poly_uint64 *bytenum)
587 {
588   return (MEM_P (op0)
589             && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
590             && known_eq (bitsize, GET_MODE_BITSIZE (mode))
591             && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
592                 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
593                       && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
594 }
595 
596 /* Try to use instruction INSV to store VALUE into a field of OP0.
597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
599    are as for store_bit_field.  */
600 
601 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
603                                   opt_scalar_int_mode op0_mode,
604                                   unsigned HOST_WIDE_INT bitsize,
605                                   unsigned HOST_WIDE_INT bitnum,
606                                   rtx value, scalar_int_mode value_mode)
607 {
608   class expand_operand ops[4];
609   rtx value1;
610   rtx xop0 = op0;
611   rtx_insn *last = get_last_insn ();
612   bool copy_back = false;
613 
614   scalar_int_mode op_mode = insv->field_mode;
615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
616   if (bitsize == 0 || bitsize > unit)
617     return false;
618 
619   if (MEM_P (xop0))
620     /* Get a reference to the first byte of the field.  */
621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
622                                          &bitnum);
623   else
624     {
625       /* Convert from counting within OP0 to counting in OP_MODE.  */
626       if (BYTES_BIG_ENDIAN)
627           bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
628 
629       /* If xop0 is a register, we need it in OP_MODE
630            to make it acceptable to the format of insv.  */
631       if (GET_CODE (xop0) == SUBREG)
632           {
633             /* If such a SUBREG can't be created, give up.  */
634             if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
635                                         SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
636               return false;
637             /* We can't just change the mode, because this might clobber op0,
638                and we will need the original value of op0 if insv fails.  */
639             xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
640                                          SUBREG_BYTE (xop0));
641           }
642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
643           xop0 = gen_lowpart_SUBREG (op_mode, xop0);
644     }
645 
646   /* If the destination is a paradoxical subreg such that we need a
647      truncate to the inner mode, perform the insertion on a temporary and
648      truncate the result to the original destination.  Note that we can't
649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
650      X) 0)) is (reg:N X).  */
651   if (GET_CODE (xop0) == SUBREG
652       && REG_P (SUBREG_REG (xop0))
653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
654                                                    op_mode))
655     {
656       rtx tem = gen_reg_rtx (op_mode);
657       emit_move_insn (tem, xop0);
658       xop0 = tem;
659       copy_back = true;
660     }
661 
662   /* There are similar overflow check at the start of store_bit_field_1,
663      but that only check the situation where the field lies completely
664      outside the register, while there do have situation where the field
665      lies partialy in the register, we need to adjust bitsize for this
666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
667      will broken on those arch support bit insert instruction, like arm, aarch64
668      etc.  */
669   if (bitsize + bitnum > unit && bitnum < unit)
670     {
671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
672                  "destination object, data truncated into %wu-bit",
673                  bitsize, unit - bitnum);
674       bitsize = unit - bitnum;
675     }
676 
677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
678      "backwards" from the size of the unit we are inserting into.
679      Otherwise, we count bits from the most significant on a
680      BYTES/BITS_BIG_ENDIAN machine.  */
681 
682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
683     bitnum = unit - bitsize - bitnum;
684 
685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
686   value1 = value;
687   if (value_mode != op_mode)
688     {
689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
690           {
691             rtx tmp;
692             /* Optimization: Don't bother really extending VALUE
693                if it has all the bits we will actually use.  However,
694                if we must narrow it, be sure we do it correctly.  */
695 
696             if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
697               {
698                 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
699                 if (! tmp)
700                     tmp = simplify_gen_subreg (op_mode,
701                                                      force_reg (value_mode, value1),
702                                                      value_mode, 0);
703               }
704             else
705               {
706                 tmp = gen_lowpart_if_possible (op_mode, value1);
707                 if (! tmp)
708                     tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
709               }
710             value1 = tmp;
711           }
712       else if (CONST_INT_P (value))
713           value1 = gen_int_mode (INTVAL (value), op_mode);
714       else
715           /* Parse phase is supposed to make VALUE's data type
716              match that of the component reference, which is a type
717              at least as wide as the field; so VALUE should have
718              a mode that corresponds to that type.  */
719           gcc_assert (CONSTANT_P (value));
720     }
721 
722   create_fixed_operand (&ops[0], xop0);
723   create_integer_operand (&ops[1], bitsize);
724   create_integer_operand (&ops[2], bitnum);
725   create_input_operand (&ops[3], value1, op_mode);
726   if (maybe_expand_insn (insv->icode, 4, ops))
727     {
728       if (copy_back)
729           convert_move (op0, xop0, true);
730       return true;
731     }
732   delete_insns_since (last);
733   return false;
734 }
735 
736 /* A subroutine of store_bit_field, with the same arguments.  Return true
737    if the operation could be implemented.
738 
739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
740    no other way of implementing the operation.  If FALLBACK_P is false,
741    return false instead.  */
742 
743 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
745                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
746                        machine_mode fieldmode,
747                        rtx value, bool reverse, bool fallback_p)
748 {
749   rtx op0 = str_rtx;
750 
751   while (GET_CODE (op0) == SUBREG)
752     {
753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
754       op0 = SUBREG_REG (op0);
755     }
756 
757   /* No action is needed if the target is a register and if the field
758      lies completely outside that register.  This can occur if the source
759      code contains an out-of-bounds access to a small array.  */
760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
761     return true;
762 
763   /* Use vec_set patterns for inserting parts of vectors whenever
764      available.  */
765   machine_mode outermode = GET_MODE (op0);
766   scalar_mode innermode = GET_MODE_INNER (outermode);
767   poly_uint64 pos;
768   if (VECTOR_MODE_P (outermode)
769       && !MEM_P (op0)
770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
771       && fieldmode == innermode
772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
774     {
775       class expand_operand ops[3];
776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
777 
778       create_fixed_operand (&ops[0], op0);
779       create_input_operand (&ops[1], value, innermode);
780       create_integer_operand (&ops[2], pos);
781       if (maybe_expand_insn (icode, 3, ops))
782           return true;
783     }
784 
785   /* If the target is a register, overwriting the entire object, or storing
786      a full-word or multi-word field can be done with just a SUBREG.  */
787   if (!MEM_P (op0)
788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
789     {
790       /* Use the subreg machinery either to narrow OP0 to the required
791            words or to cope with mode punning between equal-sized modes.
792            In the latter case, use subreg on the rhs side, not lhs.  */
793       rtx sub;
794       HOST_WIDE_INT regnum;
795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
796       if (known_eq (bitnum, 0U)
797             && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
798           {
799             sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
800             if (sub)
801               {
802                 if (reverse)
803                     sub = flip_storage_order (GET_MODE (op0), sub);
804                 emit_move_insn (op0, sub);
805                 return true;
806               }
807           }
808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
809                  && multiple_p (bitsize, regsize * BITS_PER_UNIT)
810                  && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
811           {
812             sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
813                                              regnum * regsize);
814             if (sub)
815               {
816                 if (reverse)
817                     value = flip_storage_order (fieldmode, value);
818                 emit_move_insn (sub, value);
819                 return true;
820               }
821           }
822     }
823 
824   /* If the target is memory, storing any naturally aligned field can be
825      done with a simple store.  For targets that support fast unaligned
826      memory, any naturally sized, unit aligned field can be done directly.  */
827   poly_uint64 bytenum;
828   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
829     {
830       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
831       if (reverse)
832           value = flip_storage_order (fieldmode, value);
833       emit_move_insn (op0, value);
834       return true;
835     }
836 
837   /* It's possible we'll need to handle other cases here for
838      polynomial bitnum and bitsize.  */
839 
840   /* From here on we need to be looking at a fixed-size insertion.  */
841   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
842   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
843 
844   /* Make sure we are playing with integral modes.  Pun with subregs
845      if we aren't.  This must come after the entire register case above,
846      since that case is valid for any mode.  The following cases are only
847      valid for integral modes.  */
848   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
849   scalar_int_mode imode;
850   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
851     {
852       if (MEM_P (op0))
853           op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
854                                                       0, MEM_SIZE (op0));
855       else if (!op0_mode.exists ())
856           {
857             if (ibitnum == 0
858                 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
859                 && MEM_P (value)
860                 && !reverse)
861               {
862                 value = adjust_address (value, GET_MODE (op0), 0);
863                 emit_move_insn (op0, value);
864                 return true;
865               }
866             if (!fallback_p)
867               return false;
868             rtx temp = assign_stack_temp (GET_MODE (op0),
869                                                   GET_MODE_SIZE (GET_MODE (op0)));
870             emit_move_insn (temp, op0);
871             store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
872                                    reverse, fallback_p);
873             emit_move_insn (op0, temp);
874             return true;
875           }
876       else
877           op0 = gen_lowpart (op0_mode.require (), op0);
878     }
879 
880   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
881                                            bitregion_start, bitregion_end,
882                                            fieldmode, value, reverse, fallback_p);
883 }
884 
885 /* Subroutine of store_bit_field_1, with the same arguments, except
886    that BITSIZE and BITNUM are constant.  Handle cases specific to
887    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
888    otherwise OP0 is a BLKmode MEM.  */
889 
890 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)891 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
892                                 unsigned HOST_WIDE_INT bitsize,
893                                 unsigned HOST_WIDE_INT bitnum,
894                                 poly_uint64 bitregion_start,
895                                 poly_uint64 bitregion_end,
896                                 machine_mode fieldmode,
897                                 rtx value, bool reverse, bool fallback_p)
898 {
899   /* Storing an lsb-aligned field in a register
900      can be done with a movstrict instruction.  */
901 
902   if (!MEM_P (op0)
903       && !reverse
904       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
905       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
906       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
907     {
908       class expand_operand ops[2];
909       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
910       rtx arg0 = op0;
911       unsigned HOST_WIDE_INT subreg_off;
912 
913       if (GET_CODE (arg0) == SUBREG)
914           {
915             /* Else we've got some float mode source being extracted into
916                a different float mode destination -- this combination of
917                subregs results in Severe Tire Damage.  */
918             gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
919                           || GET_MODE_CLASS (fieldmode) == MODE_INT
920                           || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
921             arg0 = SUBREG_REG (arg0);
922           }
923 
924       subreg_off = bitnum / BITS_PER_UNIT;
925       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
926             /* STRICT_LOW_PART must have a non-paradoxical subreg as
927                operand.  */
928             && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
929           {
930             arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
931 
932             create_fixed_operand (&ops[0], arg0);
933             /* Shrink the source operand to FIELDMODE.  */
934             create_convert_operand_to (&ops[1], value, fieldmode, false);
935             if (maybe_expand_insn (icode, 2, ops))
936               return true;
937           }
938     }
939 
940   /* Handle fields bigger than a word.  */
941 
942   if (bitsize > BITS_PER_WORD)
943     {
944       /* Here we transfer the words of the field
945            in the order least significant first.
946            This is because the most significant word is the one which may
947            be less than full.
948            However, only do that if the value is not BLKmode.  */
949 
950       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
951       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
952       rtx_insn *last;
953 
954       /* This is the mode we must force value to, so that there will be enough
955            subwords to extract.  Note that fieldmode will often (always?) be
956            VOIDmode, because that is what store_field uses to indicate that this
957            is a bit field, but passing VOIDmode to operand_subword_force
958            is not allowed.
959 
960            The mode must be fixed-size, since insertions into variable-sized
961            objects are meant to be handled before calling this function.  */
962       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
963       if (value_mode == VOIDmode)
964           value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
965 
966       last = get_last_insn ();
967       for (int i = 0; i < nwords; i++)
968           {
969             /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
970                except maybe for the last iteration.  */
971             const unsigned HOST_WIDE_INT new_bitsize
972               = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
973             /* Bit offset from the starting bit number in the target.  */
974             const unsigned int bit_offset
975               = backwards ^ reverse
976                 ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
977                 : i * BITS_PER_WORD;
978             /* Starting word number in the value.  */
979             const unsigned int wordnum
980               = backwards
981                 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
982                 : i;
983             /* The chunk of the value in word_mode.  We use bit-field extraction
984                 in BLKmode to handle unaligned memory references and to shift the
985                 last chunk right on big-endian machines if need be.  */
986             rtx value_word
987               = fieldmode == BLKmode
988                 ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
989                                            1, NULL_RTX, word_mode, word_mode, false,
990                                            NULL)
991                 : operand_subword_force (value, wordnum, value_mode);
992 
993             if (!store_bit_field_1 (op0, new_bitsize,
994                                           bitnum + bit_offset,
995                                           bitregion_start, bitregion_end,
996                                           word_mode,
997                                           value_word, reverse, fallback_p))
998               {
999                 delete_insns_since (last);
1000                 return false;
1001               }
1002           }
1003       return true;
1004     }
1005 
1006   /* If VALUE has a floating-point or complex mode, access it as an
1007      integer of the corresponding size.  This can occur on a machine
1008      with 64 bit registers that uses SFmode for float.  It can also
1009      occur for unaligned float or complex fields.  */
1010   rtx orig_value = value;
1011   scalar_int_mode value_mode;
1012   if (GET_MODE (value) == VOIDmode)
1013     /* By this point we've dealt with values that are bigger than a word,
1014        so word_mode is a conservatively correct choice.  */
1015     value_mode = word_mode;
1016   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1017     {
1018       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1019       value = gen_reg_rtx (value_mode);
1020       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1021     }
1022 
1023   /* If OP0 is a multi-word register, narrow it to the affected word.
1024      If the region spans two words, defer to store_split_bit_field.
1025      Don't do this if op0 is a single hard register wider than word
1026      such as a float or vector register.  */
1027   if (!MEM_P (op0)
1028       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1029       && (!REG_P (op0)
1030             || !HARD_REGISTER_P (op0)
1031             || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1032     {
1033       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1034           {
1035             if (!fallback_p)
1036               return false;
1037 
1038             store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1039                                          bitregion_start, bitregion_end,
1040                                          value, value_mode, reverse);
1041             return true;
1042           }
1043       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1044                                          bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1045       gcc_assert (op0);
1046       op0_mode = word_mode;
1047       bitnum %= BITS_PER_WORD;
1048     }
1049 
1050   /* From here on we can assume that the field to be stored in fits
1051      within a word.  If the destination is a register, it too fits
1052      in a word.  */
1053 
1054   extraction_insn insv;
1055   if (!MEM_P (op0)
1056       && !reverse
1057       && get_best_reg_extraction_insn (&insv, EP_insv,
1058                                                GET_MODE_BITSIZE (op0_mode.require ()),
1059                                                fieldmode)
1060       && store_bit_field_using_insv (&insv, op0, op0_mode,
1061                                              bitsize, bitnum, value, value_mode))
1062     return true;
1063 
1064   /* If OP0 is a memory, try copying it to a register and seeing if a
1065      cheap register alternative is available.  */
1066   if (MEM_P (op0) && !reverse)
1067     {
1068       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1069                                                   fieldmode)
1070             && store_bit_field_using_insv (&insv, op0, op0_mode,
1071                                                    bitsize, bitnum, value, value_mode))
1072           return true;
1073 
1074       rtx_insn *last = get_last_insn ();
1075 
1076       /* Try loading part of OP0 into a register, inserting the bitfield
1077            into that, and then copying the result back to OP0.  */
1078       unsigned HOST_WIDE_INT bitpos;
1079       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1080                                                          bitregion_start, bitregion_end,
1081                                                          fieldmode, &bitpos);
1082       if (xop0)
1083           {
1084             rtx tempreg = copy_to_reg (xop0);
1085             if (store_bit_field_1 (tempreg, bitsize, bitpos,
1086                                          bitregion_start, bitregion_end,
1087                                          fieldmode, orig_value, reverse, false))
1088               {
1089                 emit_move_insn (xop0, tempreg);
1090                 return true;
1091               }
1092             delete_insns_since (last);
1093           }
1094     }
1095 
1096   if (!fallback_p)
1097     return false;
1098 
1099   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1100                                bitregion_end, value, value_mode, reverse);
1101   return true;
1102 }
1103 
1104 /* Generate code to store value from rtx VALUE
1105    into a bit-field within structure STR_RTX
1106    containing BITSIZE bits starting at bit BITNUM.
1107 
1108    BITREGION_START is bitpos of the first bitfield in this region.
1109    BITREGION_END is the bitpos of the ending bitfield in this region.
1110    These two fields are 0, if the C++ memory model does not apply,
1111    or we are not interested in keeping track of bitfield regions.
1112 
1113    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1114 
1115    If REVERSE is true, the store is to be done in reverse order.  */
1116 
1117 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1118 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1119                      poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1120                      machine_mode fieldmode,
1121                      rtx value, bool reverse)
1122 {
1123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1124   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1125   scalar_int_mode int_mode;
1126   if (bitsize.is_constant (&ibitsize)
1127       && bitnum.is_constant (&ibitnum)
1128       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1129       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1130                                              bitregion_start, bitregion_end))
1131     {
1132       /* Storing of a full word can be done with a simple store.
1133            We know here that the field can be accessed with one single
1134            instruction.  For targets that support unaligned memory,
1135            an unaligned access may be necessary.  */
1136       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1137           {
1138             str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1139                                                        ibitnum / BITS_PER_UNIT);
1140             if (reverse)
1141               value = flip_storage_order (int_mode, value);
1142             gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1143             emit_move_insn (str_rtx, value);
1144           }
1145       else
1146           {
1147             rtx temp;
1148 
1149             str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1150                                                     ibitnum, &ibitnum);
1151             gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1152             temp = copy_to_reg (str_rtx);
1153             if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1154                                           int_mode, value, reverse, true))
1155               gcc_unreachable ();
1156 
1157             emit_move_insn (str_rtx, temp);
1158           }
1159 
1160       return;
1161     }
1162 
1163   /* Under the C++0x memory model, we must not touch bits outside the
1164      bit region.  Adjust the address to start at the beginning of the
1165      bit region.  */
1166   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1167     {
1168       scalar_int_mode best_mode;
1169       machine_mode addr_mode = VOIDmode;
1170 
1171       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1172       bitnum -= bitregion_start;
1173       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1174       bitregion_end -= bitregion_start;
1175       bitregion_start = 0;
1176       if (bitsize.is_constant (&ibitsize)
1177             && bitnum.is_constant (&ibitnum)
1178             && get_best_mode (ibitsize, ibitnum,
1179                                   bitregion_start, bitregion_end,
1180                                   MEM_ALIGN (str_rtx), INT_MAX,
1181                                   MEM_VOLATILE_P (str_rtx), &best_mode))
1182           addr_mode = best_mode;
1183       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1184                                                         offset, size);
1185     }
1186 
1187   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1188                                 bitregion_start, bitregion_end,
1189                                 fieldmode, value, reverse, true))
1190     gcc_unreachable ();
1191 }
1192 
1193 /* Use shifts and boolean operations to store VALUE into a bit field of
1194    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1195    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1196    the mode of VALUE.
1197 
1198    If REVERSE is true, the store is to be done in reverse order.  */
1199 
1200 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1201 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1202                            unsigned HOST_WIDE_INT bitsize,
1203                            unsigned HOST_WIDE_INT bitnum,
1204                            poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1205                            rtx value, scalar_int_mode value_mode, bool reverse)
1206 {
1207   /* There is a case not handled here:
1208      a structure with a known alignment of just a halfword
1209      and a field split across two aligned halfwords within the structure.
1210      Or likewise a structure with a known alignment of just a byte
1211      and a field split across two bytes.
1212      Such cases are not supposed to be able to occur.  */
1213 
1214   scalar_int_mode best_mode;
1215   if (MEM_P (op0))
1216     {
1217       unsigned int max_bitsize = BITS_PER_WORD;
1218       scalar_int_mode imode;
1219       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1220           max_bitsize = GET_MODE_BITSIZE (imode);
1221 
1222       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1223                                 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1224                                 &best_mode))
1225           {
1226             /* The only way this should occur is if the field spans word
1227                boundaries.  */
1228             store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1229                                          bitregion_start, bitregion_end,
1230                                          value, value_mode, reverse);
1231             return;
1232           }
1233 
1234       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1235     }
1236   else
1237     best_mode = op0_mode.require ();
1238 
1239   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1240                                  value, value_mode, reverse);
1241 }
1242 
1243 /* Helper function for store_fixed_bit_field, stores
1244    the bit field always using MODE, which is the mode of OP0.  The other
1245    arguments are as for store_fixed_bit_field.  */
1246 
1247 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1248 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1249                                unsigned HOST_WIDE_INT bitsize,
1250                                unsigned HOST_WIDE_INT bitnum,
1251                                rtx value, scalar_int_mode value_mode, bool reverse)
1252 {
1253   rtx temp;
1254   int all_zero = 0;
1255   int all_one = 0;
1256 
1257   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1258      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1259 
1260   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1261     /* BITNUM is the distance between our msb
1262        and that of the containing datum.
1263        Convert it to the distance from the lsb.  */
1264     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1265 
1266   /* Now BITNUM is always the distance between our lsb
1267      and that of OP0.  */
1268 
1269   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1270      we must first convert its mode to MODE.  */
1271 
1272   if (CONST_INT_P (value))
1273     {
1274       unsigned HOST_WIDE_INT v = UINTVAL (value);
1275 
1276       if (bitsize < HOST_BITS_PER_WIDE_INT)
1277           v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1278 
1279       if (v == 0)
1280           all_zero = 1;
1281       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1282                     && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1283                  || (bitsize == HOST_BITS_PER_WIDE_INT
1284                        && v == HOST_WIDE_INT_M1U))
1285           all_one = 1;
1286 
1287       value = lshift_value (mode, v, bitnum);
1288     }
1289   else
1290     {
1291       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1292                           && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1293 
1294       if (value_mode != mode)
1295           value = convert_to_mode (mode, value, 1);
1296 
1297       if (must_and)
1298           value = expand_binop (mode, and_optab, value,
1299                                     mask_rtx (mode, 0, bitsize, 0),
1300                                     NULL_RTX, 1, OPTAB_LIB_WIDEN);
1301       if (bitnum > 0)
1302           value = expand_shift (LSHIFT_EXPR, mode, value,
1303                                     bitnum, NULL_RTX, 1);
1304     }
1305 
1306   if (reverse)
1307     value = flip_storage_order (mode, value);
1308 
1309   /* Now clear the chosen bits in OP0,
1310      except that if VALUE is -1 we need not bother.  */
1311   /* We keep the intermediates in registers to allow CSE to combine
1312      consecutive bitfield assignments.  */
1313 
1314   temp = force_reg (mode, op0);
1315 
1316   if (! all_one)
1317     {
1318       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1319       if (reverse)
1320           mask = flip_storage_order (mode, mask);
1321       temp = expand_binop (mode, and_optab, temp, mask,
1322                                  NULL_RTX, 1, OPTAB_LIB_WIDEN);
1323       temp = force_reg (mode, temp);
1324     }
1325 
1326   /* Now logical-or VALUE into OP0, unless it is zero.  */
1327 
1328   if (! all_zero)
1329     {
1330       temp = expand_binop (mode, ior_optab, temp, value,
1331                                  NULL_RTX, 1, OPTAB_LIB_WIDEN);
1332       temp = force_reg (mode, temp);
1333     }
1334 
1335   if (op0 != temp)
1336     {
1337       op0 = copy_rtx (op0);
1338       emit_move_insn (op0, temp);
1339     }
1340 }
1341 
1342 /* Store a bit field that is split across multiple accessible memory objects.
1343 
1344    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1345    BITSIZE is the field width; BITPOS the position of its first bit
1346    (within the word).
1347    VALUE is the value to store, which has mode VALUE_MODE.
1348    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1349    a BLKmode MEM.
1350 
1351    If REVERSE is true, the store is to be done in reverse order.
1352 
1353    This does not yet handle fields wider than BITS_PER_WORD.  */
1354 
1355 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1356 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1357                            unsigned HOST_WIDE_INT bitsize,
1358                            unsigned HOST_WIDE_INT bitpos,
1359                            poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1360                            rtx value, scalar_int_mode value_mode, bool reverse)
1361 {
1362   unsigned int unit, total_bits, bitsdone = 0;
1363 
1364   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1365      much at a time.  */
1366   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1367     unit = BITS_PER_WORD;
1368   else
1369     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1370 
1371   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1372      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1373      again, and we will mutually recurse forever.  */
1374   if (MEM_P (op0) && op0_mode.exists ())
1375     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1376 
1377   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1378      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1379      that VALUE might be a floating-point constant.  */
1380   if (CONSTANT_P (value) && !CONST_INT_P (value))
1381     {
1382       rtx word = gen_lowpart_common (word_mode, value);
1383 
1384       if (word && (value != word))
1385           value = word;
1386       else
1387           value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1388       value_mode = word_mode;
1389     }
1390 
1391   total_bits = GET_MODE_BITSIZE (value_mode);
1392 
1393   while (bitsdone < bitsize)
1394     {
1395       unsigned HOST_WIDE_INT thissize;
1396       unsigned HOST_WIDE_INT thispos;
1397       unsigned HOST_WIDE_INT offset;
1398       rtx part;
1399 
1400       offset = (bitpos + bitsdone) / unit;
1401       thispos = (bitpos + bitsdone) % unit;
1402 
1403       /* When region of bytes we can touch is restricted, decrease
1404            UNIT close to the end of the region as needed.  If op0 is a REG
1405            or SUBREG of REG, don't do this, as there can't be data races
1406            on a register and we can expand shorter code in some cases.  */
1407       if (maybe_ne (bitregion_end, 0U)
1408             && unit > BITS_PER_UNIT
1409             && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1410             && !REG_P (op0)
1411             && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1412           {
1413             unit = unit / 2;
1414             continue;
1415           }
1416 
1417       /* THISSIZE must not overrun a word boundary.  Otherwise,
1418            store_fixed_bit_field will call us again, and we will mutually
1419            recurse forever.  */
1420       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1421       thissize = MIN (thissize, unit - thispos);
1422 
1423       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1424           {
1425             /* Fetch successively less significant portions.  */
1426             if (CONST_INT_P (value))
1427               part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1428                                    >> (bitsize - bitsdone - thissize))
1429                                   & ((HOST_WIDE_INT_1 << thissize) - 1));
1430           /* Likewise, but the source is little-endian.  */
1431           else if (reverse)
1432               part = extract_fixed_bit_field (word_mode, value, value_mode,
1433                                                       thissize,
1434                                                       bitsize - bitsdone - thissize,
1435                                                       NULL_RTX, 1, false);
1436             else
1437               /* The args are chosen so that the last part includes the
1438                  lsb.  Give extract_bit_field the value it needs (with
1439                  endianness compensation) to fetch the piece we want.  */
1440               part = extract_fixed_bit_field (word_mode, value, value_mode,
1441                                                       thissize,
1442                                                       total_bits - bitsize + bitsdone,
1443                                                       NULL_RTX, 1, false);
1444           }
1445       else
1446           {
1447             /* Fetch successively more significant portions.  */
1448             if (CONST_INT_P (value))
1449               part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1450                                    >> bitsdone)
1451                                   & ((HOST_WIDE_INT_1 << thissize) - 1));
1452             /* Likewise, but the source is big-endian.  */
1453           else if (reverse)
1454               part = extract_fixed_bit_field (word_mode, value, value_mode,
1455                                                       thissize,
1456                                                       total_bits - bitsdone - thissize,
1457                                                       NULL_RTX, 1, false);
1458             else
1459               part = extract_fixed_bit_field (word_mode, value, value_mode,
1460                                                       thissize, bitsdone, NULL_RTX,
1461                                                       1, false);
1462           }
1463 
1464       /* If OP0 is a register, then handle OFFSET here.  */
1465       rtx op0_piece = op0;
1466       opt_scalar_int_mode op0_piece_mode = op0_mode;
1467       if (SUBREG_P (op0) || REG_P (op0))
1468           {
1469             scalar_int_mode imode;
1470             if (op0_mode.exists (&imode)
1471                 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1472               {
1473                 if (offset)
1474                     op0_piece = const0_rtx;
1475               }
1476             else
1477               {
1478                 op0_piece = operand_subword_force (op0,
1479                                                              offset * unit / BITS_PER_WORD,
1480                                                              GET_MODE (op0));
1481                 op0_piece_mode = word_mode;
1482               }
1483             offset &= BITS_PER_WORD / unit - 1;
1484           }
1485 
1486       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1487            it is just an out-of-bounds access.  Ignore it.  */
1488       if (op0_piece != const0_rtx)
1489           store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1490                                      offset * unit + thispos, bitregion_start,
1491                                      bitregion_end, part, word_mode, reverse);
1492       bitsdone += thissize;
1493     }
1494 }
1495 
1496 /* A subroutine of extract_bit_field_1 that converts return value X
1497    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1498    to extract_bit_field.  */
1499 
1500 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1501 convert_extracted_bit_field (rtx x, machine_mode mode,
1502                                    machine_mode tmode, bool unsignedp)
1503 {
1504   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1505     return x;
1506 
1507   /* If the x mode is not a scalar integral, first convert to the
1508      integer mode of that size and then access it as a floating-point
1509      value via a SUBREG.  */
1510   if (!SCALAR_INT_MODE_P (tmode))
1511     {
1512       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1513       x = convert_to_mode (int_mode, x, unsignedp);
1514       x = force_reg (int_mode, x);
1515       return gen_lowpart (tmode, x);
1516     }
1517 
1518   return convert_to_mode (tmode, x, unsignedp);
1519 }
1520 
1521 /* Try to use an ext(z)v pattern to extract a field from OP0.
1522    Return the extracted value on success, otherwise return null.
1523    EXTV describes the extraction instruction to use.  If OP0_MODE
1524    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1525    The other arguments are as for extract_bit_field.  */
1526 
1527 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1528 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1529                                     opt_scalar_int_mode op0_mode,
1530                                     unsigned HOST_WIDE_INT bitsize,
1531                                     unsigned HOST_WIDE_INT bitnum,
1532                                     int unsignedp, rtx target,
1533                                     machine_mode mode, machine_mode tmode)
1534 {
1535   class expand_operand ops[4];
1536   rtx spec_target = target;
1537   rtx spec_target_subreg = 0;
1538   scalar_int_mode ext_mode = extv->field_mode;
1539   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1540 
1541   if (bitsize == 0 || unit < bitsize)
1542     return NULL_RTX;
1543 
1544   if (MEM_P (op0))
1545     /* Get a reference to the first byte of the field.  */
1546     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1547                                         &bitnum);
1548   else
1549     {
1550       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1551       if (BYTES_BIG_ENDIAN)
1552           bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1553 
1554       /* If op0 is a register, we need it in EXT_MODE to make it
1555            acceptable to the format of ext(z)v.  */
1556       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1557           return NULL_RTX;
1558       if (REG_P (op0) && op0_mode.require () != ext_mode)
1559           op0 = gen_lowpart_SUBREG (ext_mode, op0);
1560     }
1561 
1562   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1563      "backwards" from the size of the unit we are extracting from.
1564      Otherwise, we count bits from the most significant on a
1565      BYTES/BITS_BIG_ENDIAN machine.  */
1566 
1567   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1568     bitnum = unit - bitsize - bitnum;
1569 
1570   if (target == 0)
1571     target = spec_target = gen_reg_rtx (tmode);
1572 
1573   if (GET_MODE (target) != ext_mode)
1574     {
1575       rtx temp;
1576       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577            between the mode of the extraction (word_mode) and the target
1578            mode.  Instead, create a temporary and use convert_move to set
1579            the target.  */
1580       if (REG_P (target)
1581             && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1582             && (temp = gen_lowpart_if_possible (ext_mode, target)))
1583           {
1584             target = temp;
1585             if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1586               spec_target_subreg = target;
1587           }
1588       else
1589           target = gen_reg_rtx (ext_mode);
1590     }
1591 
1592   create_output_operand (&ops[0], target, ext_mode);
1593   create_fixed_operand (&ops[1], op0);
1594   create_integer_operand (&ops[2], bitsize);
1595   create_integer_operand (&ops[3], bitnum);
1596   if (maybe_expand_insn (extv->icode, 4, ops))
1597     {
1598       target = ops[0].value;
1599       if (target == spec_target)
1600           return target;
1601       if (target == spec_target_subreg)
1602           return spec_target;
1603       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1604     }
1605   return NULL_RTX;
1606 }
1607 
1608 /* See whether it would be valid to extract the part of OP0 with
1609    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1610    mode MODE using a subreg operation.
1611    Return the subreg if so, otherwise return null.  */
1612 
1613 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,machine_mode op0_mode,poly_uint64 bitsize,poly_uint64 bitnum)1614 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1615                                    machine_mode op0_mode,
1616                                    poly_uint64 bitsize, poly_uint64 bitnum)
1617 {
1618   poly_uint64 bytenum;
1619   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1620       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1621       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1622       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1623     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1624   return NULL_RTX;
1625 }
1626 
1627 /* A subroutine of extract_bit_field, with the same arguments.
1628    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1629    if we can find no other means of implementing the operation.
1630    if FALLBACK_P is false, return NULL instead.  */
1631 
1632 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1633 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1634                          int unsignedp, rtx target, machine_mode mode,
1635                          machine_mode tmode, bool reverse, bool fallback_p,
1636                          rtx *alt_rtl)
1637 {
1638   rtx op0 = str_rtx;
1639   machine_mode mode1;
1640 
1641   if (tmode == VOIDmode)
1642     tmode = mode;
1643 
1644   while (GET_CODE (op0) == SUBREG)
1645     {
1646       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1647       op0 = SUBREG_REG (op0);
1648     }
1649 
1650   /* If we have an out-of-bounds access to a register, just return an
1651      uninitialized register of the required mode.  This can occur if the
1652      source code contains an out-of-bounds access to a small array.  */
1653   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1654     return gen_reg_rtx (tmode);
1655 
1656   if (REG_P (op0)
1657       && mode == GET_MODE (op0)
1658       && known_eq (bitnum, 0U)
1659       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1660     {
1661       if (reverse)
1662           op0 = flip_storage_order (mode, op0);
1663       /* We're trying to extract a full register from itself.  */
1664       return op0;
1665     }
1666 
1667   /* First try to check for vector from vector extractions.  */
1668   if (VECTOR_MODE_P (GET_MODE (op0))
1669       && !MEM_P (op0)
1670       && VECTOR_MODE_P (tmode)
1671       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1672       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1673     {
1674       machine_mode new_mode = GET_MODE (op0);
1675       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1676           {
1677             scalar_mode inner_mode = GET_MODE_INNER (tmode);
1678             poly_uint64 nunits;
1679             if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1680                                  GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1681                 || !related_vector_mode (tmode, inner_mode,
1682                                                nunits).exists (&new_mode)
1683                 || maybe_ne (GET_MODE_SIZE (new_mode),
1684                                  GET_MODE_SIZE (GET_MODE (op0))))
1685               new_mode = VOIDmode;
1686           }
1687       poly_uint64 pos;
1688       if (new_mode != VOIDmode
1689             && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1690                 != CODE_FOR_nothing)
1691             && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1692           {
1693             class expand_operand ops[3];
1694             machine_mode outermode = new_mode;
1695             machine_mode innermode = tmode;
1696             enum insn_code icode
1697               = convert_optab_handler (vec_extract_optab, outermode, innermode);
1698 
1699             if (new_mode != GET_MODE (op0))
1700               op0 = gen_lowpart (new_mode, op0);
1701             create_output_operand (&ops[0], target, innermode);
1702             ops[0].target = 1;
1703             create_input_operand (&ops[1], op0, outermode);
1704             create_integer_operand (&ops[2], pos);
1705             if (maybe_expand_insn (icode, 3, ops))
1706               {
1707                 if (alt_rtl && ops[0].target)
1708                     *alt_rtl = target;
1709                 target = ops[0].value;
1710                 if (GET_MODE (target) != mode)
1711                     return gen_lowpart (tmode, target);
1712                 return target;
1713               }
1714           }
1715     }
1716 
1717   /* See if we can get a better vector mode before extracting.  */
1718   if (VECTOR_MODE_P (GET_MODE (op0))
1719       && !MEM_P (op0)
1720       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1721     {
1722       machine_mode new_mode;
1723 
1724       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1725           new_mode = MIN_MODE_VECTOR_FLOAT;
1726       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1727           new_mode = MIN_MODE_VECTOR_FRACT;
1728       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1729           new_mode = MIN_MODE_VECTOR_UFRACT;
1730       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1731           new_mode = MIN_MODE_VECTOR_ACCUM;
1732       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1733           new_mode = MIN_MODE_VECTOR_UACCUM;
1734       else
1735           new_mode = MIN_MODE_VECTOR_INT;
1736 
1737       FOR_EACH_MODE_FROM (new_mode, new_mode)
1738           if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1739               && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1740               && targetm.vector_mode_supported_p (new_mode)
1741               && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1742             break;
1743       if (new_mode != VOIDmode)
1744           op0 = gen_lowpart (new_mode, op0);
1745     }
1746 
1747   /* Use vec_extract patterns for extracting parts of vectors whenever
1748      available.  If that fails, see whether the current modes and bitregion
1749      give a natural subreg.  */
1750   machine_mode outermode = GET_MODE (op0);
1751   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1752     {
1753       scalar_mode innermode = GET_MODE_INNER (outermode);
1754       enum insn_code icode
1755           = convert_optab_handler (vec_extract_optab, outermode, innermode);
1756       poly_uint64 pos;
1757       if (icode != CODE_FOR_nothing
1758             && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1759             && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1760           {
1761             class expand_operand ops[3];
1762 
1763             create_output_operand (&ops[0], target, innermode);
1764             ops[0].target = 1;
1765             create_input_operand (&ops[1], op0, outermode);
1766             create_integer_operand (&ops[2], pos);
1767             if (maybe_expand_insn (icode, 3, ops))
1768               {
1769                 if (alt_rtl && ops[0].target)
1770                     *alt_rtl = target;
1771                 target = ops[0].value;
1772                 if (GET_MODE (target) != mode)
1773                     return gen_lowpart (tmode, target);
1774                 return target;
1775               }
1776           }
1777       /* Using subregs is useful if we're extracting one register vector
1778            from a multi-register vector.  extract_bit_field_as_subreg checks
1779            for valid bitsize and bitnum, so we don't need to do that here.  */
1780       if (VECTOR_MODE_P (mode))
1781           {
1782             rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1783                                                              bitsize, bitnum);
1784             if (sub)
1785               return sub;
1786           }
1787     }
1788 
1789   /* Make sure we are playing with integral modes.  Pun with subregs
1790      if we aren't.  */
1791   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1792   scalar_int_mode imode;
1793   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1794     {
1795       if (MEM_P (op0))
1796           op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1797                                                       0, MEM_SIZE (op0));
1798       else if (op0_mode.exists (&imode))
1799           {
1800             op0 = gen_lowpart (imode, op0);
1801 
1802             /* If we got a SUBREG, force it into a register since we
1803                aren't going to be able to do another SUBREG on it.  */
1804             if (GET_CODE (op0) == SUBREG)
1805               op0 = force_reg (imode, op0);
1806           }
1807       else
1808           {
1809             poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1810             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1811             emit_move_insn (mem, op0);
1812             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1813           }
1814     }
1815 
1816   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1817      If that's wrong, the solution is to test for it and set TARGET to 0
1818      if needed.  */
1819 
1820   /* Get the mode of the field to use for atomic access or subreg
1821      conversion.  */
1822   if (!SCALAR_INT_MODE_P (tmode)
1823       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1824     mode1 = mode;
1825   gcc_assert (mode1 != BLKmode);
1826 
1827   /* Extraction of a full MODE1 value can be done with a subreg as long
1828      as the least significant bit of the value is the least significant
1829      bit of either OP0 or a word of OP0.  */
1830   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1831     {
1832       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1833                                                        bitsize, bitnum);
1834       if (sub)
1835           return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1836     }
1837 
1838   /* Extraction of a full MODE1 value can be done with a load as long as
1839      the field is on a byte boundary and is sufficiently aligned.  */
1840   poly_uint64 bytenum;
1841   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1842     {
1843       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1844       if (reverse)
1845           op0 = flip_storage_order (mode1, op0);
1846       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1847     }
1848 
1849   /* If we have a memory source and a non-constant bit offset, restrict
1850      the memory to the referenced bytes.  This is a worst-case fallback
1851      but is useful for things like vector booleans.  */
1852   if (MEM_P (op0) && !bitnum.is_constant ())
1853     {
1854       bytenum = bits_to_bytes_round_down (bitnum);
1855       bitnum = num_trailing_bits (bitnum);
1856       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1857       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1858       op0_mode = opt_scalar_int_mode ();
1859     }
1860 
1861   /* It's possible we'll need to handle other cases here for
1862      polynomial bitnum and bitsize.  */
1863 
1864   /* From here on we need to be looking at a fixed-size insertion.  */
1865   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1866                                              bitnum.to_constant (), unsignedp,
1867                                              target, mode, tmode, reverse, fallback_p);
1868 }
1869 
1870 /* Subroutine of extract_bit_field_1, with the same arguments, except
1871    that BITSIZE and BITNUM are constant.  Handle cases specific to
1872    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1873    otherwise OP0 is a BLKmode MEM.  */
1874 
1875 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1876 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1877                                   unsigned HOST_WIDE_INT bitsize,
1878                                   unsigned HOST_WIDE_INT bitnum, int unsignedp,
1879                                   rtx target, machine_mode mode, machine_mode tmode,
1880                                   bool reverse, bool fallback_p)
1881 {
1882   /* Handle fields bigger than a word.  */
1883 
1884   if (bitsize > BITS_PER_WORD)
1885     {
1886       /* Here we transfer the words of the field
1887            in the order least significant first.
1888            This is because the most significant word is the one which may
1889            be less than full.  */
1890 
1891       const bool backwards = WORDS_BIG_ENDIAN;
1892       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1893       unsigned int i;
1894       rtx_insn *last;
1895 
1896       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1897           target = gen_reg_rtx (mode);
1898 
1899       /* In case we're about to clobber a base register or something
1900            (see gcc.c-torture/execute/20040625-1.c).   */
1901       if (reg_mentioned_p (target, op0))
1902           target = gen_reg_rtx (mode);
1903 
1904       /* Indicate for flow that the entire target reg is being set.  */
1905       emit_clobber (target);
1906 
1907       /* The mode must be fixed-size, since extract_bit_field_1 handles
1908            extractions from variable-sized objects before calling this
1909            function.  */
1910       unsigned int target_size
1911           = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1912       last = get_last_insn ();
1913       for (i = 0; i < nwords; i++)
1914           {
1915             /* If I is 0, use the low-order word in both field and target;
1916                if I is 1, use the next to lowest word; and so on.  */
1917             /* Word number in TARGET to use.  */
1918             unsigned int wordnum
1919               = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1920             /* Offset from start of field in OP0.  */
1921             unsigned int bit_offset = (backwards ^ reverse
1922                                              ? MAX ((int) bitsize - ((int) i + 1)
1923                                                       * BITS_PER_WORD,
1924                                                       0)
1925                                              : (int) i * BITS_PER_WORD);
1926             rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1927             rtx result_part
1928               = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1929                                                        bitsize - i * BITS_PER_WORD),
1930                                            bitnum + bit_offset, 1, target_part,
1931                                            mode, word_mode, reverse, fallback_p, NULL);
1932 
1933             gcc_assert (target_part);
1934             if (!result_part)
1935               {
1936                 delete_insns_since (last);
1937                 return NULL;
1938               }
1939 
1940             if (result_part != target_part)
1941               emit_move_insn (target_part, result_part);
1942           }
1943 
1944       if (unsignedp)
1945           {
1946             /* Unless we've filled TARGET, the upper regs in a multi-reg value
1947                need to be zero'd out.  */
1948             if (target_size > nwords * UNITS_PER_WORD)
1949               {
1950                 unsigned int i, total_words;
1951 
1952                 total_words = target_size / UNITS_PER_WORD;
1953                 for (i = nwords; i < total_words; i++)
1954                     emit_move_insn
1955                       (operand_subword (target,
1956                                             backwards ? total_words - i - 1 : i,
1957                                             1, VOIDmode),
1958                        const0_rtx);
1959               }
1960             return target;
1961           }
1962 
1963       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1964       target = expand_shift (LSHIFT_EXPR, mode, target,
1965                                    GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1966       return expand_shift (RSHIFT_EXPR, mode, target,
1967                                  GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1968     }
1969 
1970   /* If OP0 is a multi-word register, narrow it to the affected word.
1971      If the region spans two words, defer to extract_split_bit_field.  */
1972   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1973     {
1974       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1975           {
1976             if (!fallback_p)
1977               return NULL_RTX;
1978             target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1979                                                       unsignedp, reverse);
1980             return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1981           }
1982       /* If OP0 is a hard register, copy it to a pseudo before calling
1983            simplify_gen_subreg.  */
1984       if (REG_P (op0) && HARD_REGISTER_P (op0))
1985           op0 = copy_to_reg (op0);
1986       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1987                                          bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1988       op0_mode = word_mode;
1989       bitnum %= BITS_PER_WORD;
1990     }
1991 
1992   /* From here on we know the desired field is smaller than a word.
1993      If OP0 is a register, it too fits within a word.  */
1994   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1995   extraction_insn extv;
1996   if (!MEM_P (op0)
1997       && !reverse
1998       /* ??? We could limit the structure size to the part of OP0 that
1999            contains the field, with appropriate checks for endianness
2000            and TARGET_TRULY_NOOP_TRUNCATION.  */
2001       && get_best_reg_extraction_insn (&extv, pattern,
2002                                                GET_MODE_BITSIZE (op0_mode.require ()),
2003                                                tmode))
2004     {
2005       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2006                                                              bitsize, bitnum,
2007                                                              unsignedp, target, mode,
2008                                                              tmode);
2009       if (result)
2010           return result;
2011     }
2012 
2013   /* If OP0 is a memory, try copying it to a register and seeing if a
2014      cheap register alternative is available.  */
2015   if (MEM_P (op0) & !reverse)
2016     {
2017       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2018                                                   tmode))
2019           {
2020             rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2021                                                                  bitsize, bitnum,
2022                                                                  unsignedp, target, mode,
2023                                                                  tmode);
2024             if (result)
2025               return result;
2026           }
2027 
2028       rtx_insn *last = get_last_insn ();
2029 
2030       /* Try loading part of OP0 into a register and extracting the
2031            bitfield from that.  */
2032       unsigned HOST_WIDE_INT bitpos;
2033       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2034                                                          0, 0, tmode, &bitpos);
2035       if (xop0)
2036           {
2037             xop0 = copy_to_reg (xop0);
2038             rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2039                                                       unsignedp, target,
2040                                                       mode, tmode, reverse, false, NULL);
2041             if (result)
2042               return result;
2043             delete_insns_since (last);
2044           }
2045     }
2046 
2047   if (!fallback_p)
2048     return NULL;
2049 
2050   /* Find a correspondingly-sized integer field, so we can apply
2051      shifts and masks to it.  */
2052   scalar_int_mode int_mode;
2053   if (!int_mode_for_mode (tmode).exists (&int_mode))
2054     /* If this fails, we should probably push op0 out to memory and then
2055        do a load.  */
2056     int_mode = int_mode_for_mode (mode).require ();
2057 
2058   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2059                                             bitnum, target, unsignedp, reverse);
2060 
2061   /* Complex values must be reversed piecewise, so we need to undo the global
2062      reversal, convert to the complex mode and reverse again.  */
2063   if (reverse && COMPLEX_MODE_P (tmode))
2064     {
2065       target = flip_storage_order (int_mode, target);
2066       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2067       target = flip_storage_order (tmode, target);
2068     }
2069   else
2070     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2071 
2072   return target;
2073 }
2074 
2075 /* Generate code to extract a byte-field from STR_RTX
2076    containing BITSIZE bits, starting at BITNUM,
2077    and put it in TARGET if possible (if TARGET is nonzero).
2078    Regardless of TARGET, we return the rtx for where the value is placed.
2079 
2080    STR_RTX is the structure containing the byte (a REG or MEM).
2081    UNSIGNEDP is nonzero if this is an unsigned bit field.
2082    MODE is the natural mode of the field value once extracted.
2083    TMODE is the mode the caller would like the value to have;
2084    but the value may be returned with type MODE instead.
2085 
2086    If REVERSE is true, the extraction is to be done in reverse order.
2087 
2088    If a TARGET is specified and we can store in it at no extra cost,
2089    we do so, and return TARGET.
2090    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2091    if they are equally easy.
2092 
2093    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2094    then *ALT_RTL is set to TARGET (before legitimziation).  */
2095 
2096 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2097 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2098                        int unsignedp, rtx target, machine_mode mode,
2099                        machine_mode tmode, bool reverse, rtx *alt_rtl)
2100 {
2101   machine_mode mode1;
2102 
2103   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2104   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2105     mode1 = GET_MODE (str_rtx);
2106   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2107     mode1 = GET_MODE (target);
2108   else
2109     mode1 = tmode;
2110 
2111   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2112   scalar_int_mode int_mode;
2113   if (bitsize.is_constant (&ibitsize)
2114       && bitnum.is_constant (&ibitnum)
2115       && is_a <scalar_int_mode> (mode1, &int_mode)
2116       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2117                                              int_mode, 0, 0))
2118     {
2119       /* Extraction of a full INT_MODE value can be done with a simple load.
2120            We know here that the field can be accessed with one single
2121            instruction.  For targets that support unaligned memory,
2122            an unaligned access may be necessary.  */
2123       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2124           {
2125             rtx result = adjust_bitfield_address (str_rtx, int_mode,
2126                                                             ibitnum / BITS_PER_UNIT);
2127             if (reverse)
2128               result = flip_storage_order (int_mode, result);
2129             gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2130             return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2131           }
2132 
2133       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2134                                               &ibitnum);
2135       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2136       str_rtx = copy_to_reg (str_rtx);
2137       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2138                                           target, mode, tmode, reverse, true, alt_rtl);
2139     }
2140 
2141   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2142                                     target, mode, tmode, reverse, true, alt_rtl);
2143 }
2144 
2145 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2146    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2147    otherwise OP0 is a BLKmode MEM.
2148 
2149    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2150    If REVERSE is true, the extraction is to be done in reverse order.
2151 
2152    If TARGET is nonzero, attempts to store the value there
2153    and return TARGET, but this is not guaranteed.
2154    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2155 
2156 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2157 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2158                                opt_scalar_int_mode op0_mode,
2159                                unsigned HOST_WIDE_INT bitsize,
2160                                unsigned HOST_WIDE_INT bitnum, rtx target,
2161                                int unsignedp, bool reverse)
2162 {
2163   scalar_int_mode mode;
2164   if (MEM_P (op0))
2165     {
2166       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2167                                 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2168           /* The only way this should occur is if the field spans word
2169              boundaries.  */
2170           return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2171                                                   unsignedp, reverse);
2172 
2173       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2174     }
2175   else
2176     mode = op0_mode.require ();
2177 
2178   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2179                                             target, unsignedp, reverse);
2180 }
2181 
2182 /* Helper function for extract_fixed_bit_field, extracts
2183    the bit field always using MODE, which is the mode of OP0.
2184    The other arguments are as for extract_fixed_bit_field.  */
2185 
2186 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2187 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2188                                  unsigned HOST_WIDE_INT bitsize,
2189                                  unsigned HOST_WIDE_INT bitnum, rtx target,
2190                                  int unsignedp, bool reverse)
2191 {
2192   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2193      for invalid input, such as extract equivalent of f5 from
2194      gcc.dg/pr48335-2.c.  */
2195 
2196   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2197     /* BITNUM is the distance between our msb and that of OP0.
2198        Convert it to the distance from the lsb.  */
2199     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2200 
2201   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2202      We have reduced the big-endian case to the little-endian case.  */
2203   if (reverse)
2204     op0 = flip_storage_order (mode, op0);
2205 
2206   if (unsignedp)
2207     {
2208       if (bitnum)
2209           {
2210             /* If the field does not already start at the lsb,
2211                shift it so it does.  */
2212             /* Maybe propagate the target for the shift.  */
2213             rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2214             if (tmode != mode)
2215               subtarget = 0;
2216             op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2217           }
2218       /* Convert the value to the desired mode.  TMODE must also be a
2219            scalar integer for this conversion to make sense, since we
2220            shouldn't reinterpret the bits.  */
2221       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2222       if (mode != new_mode)
2223           op0 = convert_to_mode (new_mode, op0, 1);
2224 
2225       /* Unless the msb of the field used to be the msb when we shifted,
2226            mask out the upper bits.  */
2227 
2228       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2229           return expand_binop (new_mode, and_optab, op0,
2230                                    mask_rtx (new_mode, 0, bitsize, 0),
2231                                    target, 1, OPTAB_LIB_WIDEN);
2232       return op0;
2233     }
2234 
2235   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2236      then arithmetic-shift its lsb to the lsb of the word.  */
2237   op0 = force_reg (mode, op0);
2238 
2239   /* Find the narrowest integer mode that contains the field.  */
2240 
2241   opt_scalar_int_mode mode_iter;
2242   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2243     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2244       break;
2245 
2246   mode = mode_iter.require ();
2247   op0 = convert_to_mode (mode, op0, 0);
2248 
2249   if (mode != tmode)
2250     target = 0;
2251 
2252   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2253     {
2254       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2255       /* Maybe propagate the target for the shift.  */
2256       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2257       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2258     }
2259 
2260   return expand_shift (RSHIFT_EXPR, mode, op0,
2261                            GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2262 }
2263 
2264 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2265    VALUE << BITPOS.  */
2266 
2267 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2268 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2269                 int bitpos)
2270 {
2271   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2272 }
2273 
2274 /* Extract a bit field that is split across two words
2275    and return an RTX for the result.
2276 
2277    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2278    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2279    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2280    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2281    a BLKmode MEM.
2282 
2283    If REVERSE is true, the extraction is to be done in reverse order.  */
2284 
2285 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2286 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2287                                unsigned HOST_WIDE_INT bitsize,
2288                                unsigned HOST_WIDE_INT bitpos, int unsignedp,
2289                                bool reverse)
2290 {
2291   unsigned int unit;
2292   unsigned int bitsdone = 0;
2293   rtx result = NULL_RTX;
2294   int first = 1;
2295 
2296   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2297      much at a time.  */
2298   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2299     unit = BITS_PER_WORD;
2300   else
2301     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2302 
2303   while (bitsdone < bitsize)
2304     {
2305       unsigned HOST_WIDE_INT thissize;
2306       rtx part;
2307       unsigned HOST_WIDE_INT thispos;
2308       unsigned HOST_WIDE_INT offset;
2309 
2310       offset = (bitpos + bitsdone) / unit;
2311       thispos = (bitpos + bitsdone) % unit;
2312 
2313       /* THISSIZE must not overrun a word boundary.  Otherwise,
2314            extract_fixed_bit_field will call us again, and we will mutually
2315            recurse forever.  */
2316       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2317       thissize = MIN (thissize, unit - thispos);
2318 
2319       /* If OP0 is a register, then handle OFFSET here.  */
2320       rtx op0_piece = op0;
2321       opt_scalar_int_mode op0_piece_mode = op0_mode;
2322       if (SUBREG_P (op0) || REG_P (op0))
2323           {
2324             op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2325             op0_piece_mode = word_mode;
2326             offset = 0;
2327           }
2328 
2329       /* Extract the parts in bit-counting order,
2330            whose meaning is determined by BYTES_PER_UNIT.
2331            OFFSET is in UNITs, and UNIT is in bits.  */
2332       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2333                                               thissize, offset * unit + thispos,
2334                                               0, 1, reverse);
2335       bitsdone += thissize;
2336 
2337       /* Shift this part into place for the result.  */
2338       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2339           {
2340             if (bitsize != bitsdone)
2341               part = expand_shift (LSHIFT_EXPR, word_mode, part,
2342                                          bitsize - bitsdone, 0, 1);
2343           }
2344       else
2345           {
2346             if (bitsdone != thissize)
2347               part = expand_shift (LSHIFT_EXPR, word_mode, part,
2348                                          bitsdone - thissize, 0, 1);
2349           }
2350 
2351       if (first)
2352           result = part;
2353       else
2354           /* Combine the parts with bitwise or.  This works
2355              because we extracted each part as an unsigned bit field.  */
2356           result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2357                                      OPTAB_LIB_WIDEN);
2358 
2359       first = 0;
2360     }
2361 
2362   /* Unsigned bit field: we are done.  */
2363   if (unsignedp)
2364     return result;
2365   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2366   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2367                                BITS_PER_WORD - bitsize, NULL_RTX, 0);
2368   return expand_shift (RSHIFT_EXPR, word_mode, result,
2369                            BITS_PER_WORD - bitsize, NULL_RTX, 0);
2370 }
2371 
2372 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2373    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2374    MODE, fill the upper bits with zeros.  Fail if the layout of either
2375    mode is unknown (as for CC modes) or if the extraction would involve
2376    unprofitable mode punning.  Return the value on success, otherwise
2377    return null.
2378 
2379    This is different from gen_lowpart* in these respects:
2380 
2381      - the returned value must always be considered an rvalue
2382 
2383      - when MODE is wider than SRC_MODE, the extraction involves
2384        a zero extension
2385 
2386      - when MODE is smaller than SRC_MODE, the extraction involves
2387        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2388 
2389    In other words, this routine performs a computation, whereas the
2390    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2391    operations.  */
2392 
2393 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2394 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2395 {
2396   scalar_int_mode int_mode, src_int_mode;
2397 
2398   if (mode == src_mode)
2399     return src;
2400 
2401   if (CONSTANT_P (src))
2402     {
2403       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2404            fails, it will happily create (subreg (symbol_ref)) or similar
2405            invalid SUBREGs.  */
2406       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2407       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2408       if (ret)
2409           return ret;
2410 
2411       if (GET_MODE (src) == VOIDmode
2412             || !validate_subreg (mode, src_mode, src, byte))
2413           return NULL_RTX;
2414 
2415       src = force_reg (GET_MODE (src), src);
2416       return gen_rtx_SUBREG (mode, src, byte);
2417     }
2418 
2419   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2420     return NULL_RTX;
2421 
2422   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2423       && targetm.modes_tieable_p (mode, src_mode))
2424     {
2425       rtx x = gen_lowpart_common (mode, src);
2426       if (x)
2427         return x;
2428     }
2429 
2430   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2431       || !int_mode_for_mode (mode).exists (&int_mode))
2432     return NULL_RTX;
2433 
2434   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2435     return NULL_RTX;
2436   if (!targetm.modes_tieable_p (int_mode, mode))
2437     return NULL_RTX;
2438 
2439   src = gen_lowpart (src_int_mode, src);
2440   if (!validate_subreg (int_mode, src_int_mode, src,
2441                               subreg_lowpart_offset (int_mode, src_int_mode)))
2442     return NULL_RTX;
2443 
2444   src = convert_modes (int_mode, src_int_mode, src, true);
2445   src = gen_lowpart (mode, src);
2446   return src;
2447 }
2448 
2449 /* Add INC into TARGET.  */
2450 
2451 void
expand_inc(rtx target,rtx inc)2452 expand_inc (rtx target, rtx inc)
2453 {
2454   rtx value = expand_binop (GET_MODE (target), add_optab,
2455                                   target, inc,
2456                                   target, 0, OPTAB_LIB_WIDEN);
2457   if (value != target)
2458     emit_move_insn (target, value);
2459 }
2460 
2461 /* Subtract DEC from TARGET.  */
2462 
2463 void
expand_dec(rtx target,rtx dec)2464 expand_dec (rtx target, rtx dec)
2465 {
2466   rtx value = expand_binop (GET_MODE (target), sub_optab,
2467                                   target, dec,
2468                                   target, 0, OPTAB_LIB_WIDEN);
2469   if (value != target)
2470     emit_move_insn (target, value);
2471 }
2472 
2473 /* Output a shift instruction for expression code CODE,
2474    with SHIFTED being the rtx for the value to shift,
2475    and AMOUNT the rtx for the amount to shift by.
2476    Store the result in the rtx TARGET, if that is convenient.
2477    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2478    Return the rtx for where the value is.
2479    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2480    in which case 0 is returned.  */
2481 
2482 static rtx
expand_shift_1(enum tree_code code,machine_mode mode,rtx shifted,rtx amount,rtx target,int unsignedp,bool may_fail=false)2483 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2484                     rtx amount, rtx target, int unsignedp, bool may_fail = false)
2485 {
2486   rtx op1, temp = 0;
2487   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2488   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2489   optab lshift_optab = ashl_optab;
2490   optab rshift_arith_optab = ashr_optab;
2491   optab rshift_uns_optab = lshr_optab;
2492   optab lrotate_optab = rotl_optab;
2493   optab rrotate_optab = rotr_optab;
2494   machine_mode op1_mode;
2495   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2496   int attempt;
2497   bool speed = optimize_insn_for_speed_p ();
2498 
2499   op1 = amount;
2500   op1_mode = GET_MODE (op1);
2501 
2502   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2503      shift amount is a vector, use the vector/vector shift patterns.  */
2504   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2505     {
2506       lshift_optab = vashl_optab;
2507       rshift_arith_optab = vashr_optab;
2508       rshift_uns_optab = vlshr_optab;
2509       lrotate_optab = vrotl_optab;
2510       rrotate_optab = vrotr_optab;
2511     }
2512 
2513   /* Previously detected shift-counts computed by NEGATE_EXPR
2514      and shifted in the other direction; but that does not work
2515      on all machines.  */
2516 
2517   if (SHIFT_COUNT_TRUNCATED)
2518     {
2519       if (CONST_INT_P (op1)
2520             && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2521                 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2522           op1 = gen_int_shift_amount (mode,
2523                                             (unsigned HOST_WIDE_INT) INTVAL (op1)
2524                                             % GET_MODE_BITSIZE (scalar_mode));
2525       else if (GET_CODE (op1) == SUBREG
2526                  && subreg_lowpart_p (op1)
2527                  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2528                  && SCALAR_INT_MODE_P (GET_MODE (op1)))
2529           op1 = SUBREG_REG (op1);
2530     }
2531 
2532   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2533      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2534      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2535      amount instead.  */
2536   if (rotate
2537       && CONST_INT_P (op1)
2538       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2539                        GET_MODE_BITSIZE (scalar_mode) - 1))
2540     {
2541       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2542                                                    - INTVAL (op1)));
2543       left = !left;
2544       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2545     }
2546 
2547   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2548      Note that this is not the case for bigger values.  For instance a rotation
2549      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2550      0x04030201 (bswapsi).  */
2551   if (rotate
2552       && CONST_INT_P (op1)
2553       && INTVAL (op1) == BITS_PER_UNIT
2554       && GET_MODE_SIZE (scalar_mode) == 2
2555       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2556     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2557 
2558   if (op1 == const0_rtx)
2559     return shifted;
2560 
2561   /* Check whether its cheaper to implement a left shift by a constant
2562      bit count by a sequence of additions.  */
2563   if (code == LSHIFT_EXPR
2564       && CONST_INT_P (op1)
2565       && INTVAL (op1) > 0
2566       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2567       && INTVAL (op1) < MAX_BITS_PER_WORD
2568       && (shift_cost (speed, mode, INTVAL (op1))
2569             > INTVAL (op1) * add_cost (speed, mode))
2570       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2571     {
2572       int i;
2573       for (i = 0; i < INTVAL (op1); i++)
2574           {
2575             temp = force_reg (mode, shifted);
2576             shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2577                                           unsignedp, OPTAB_LIB_WIDEN);
2578           }
2579       return shifted;
2580     }
2581 
2582   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2583     {
2584       enum optab_methods methods;
2585 
2586       if (attempt == 0)
2587           methods = OPTAB_DIRECT;
2588       else if (attempt == 1)
2589           methods = OPTAB_WIDEN;
2590       else
2591           methods = OPTAB_LIB_WIDEN;
2592 
2593       if (rotate)
2594           {
2595             /* Widening does not work for rotation.  */
2596             if (methods == OPTAB_WIDEN)
2597               continue;
2598             else if (methods == OPTAB_LIB_WIDEN)
2599               {
2600                 /* If we have been unable to open-code this by a rotation,
2601                      do it as the IOR of two shifts.  I.e., to rotate A
2602                      by N bits, compute
2603                      (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2604                      where C is the bitsize of A.
2605 
2606                      It is theoretically possible that the target machine might
2607                      not be able to perform either shift and hence we would
2608                      be making two libcalls rather than just the one for the
2609                      shift (similarly if IOR could not be done).  We will allow
2610                      this extremely unlikely lossage to avoid complicating the
2611                      code below.  */
2612 
2613                 rtx subtarget = target == shifted ? 0 : target;
2614                 rtx new_amount, other_amount;
2615                 rtx temp1;
2616 
2617                 new_amount = op1;
2618                 if (op1 == const0_rtx)
2619                     return shifted;
2620                 else if (CONST_INT_P (op1))
2621                     other_amount = gen_int_shift_amount
2622                       (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2623                 else
2624                     {
2625                       other_amount
2626                         = simplify_gen_unary (NEG, GET_MODE (op1),
2627                                                     op1, GET_MODE (op1));
2628                       HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2629                       other_amount
2630                         = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2631                                                      gen_int_mode (mask, GET_MODE (op1)));
2632                     }
2633 
2634                 shifted = force_reg (mode, shifted);
2635 
2636                 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2637                                              mode, shifted, new_amount, 0, 1);
2638                 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2639                                               mode, shifted, other_amount,
2640                                               subtarget, 1);
2641                 return expand_binop (mode, ior_optab, temp, temp1, target,
2642                                            unsignedp, methods);
2643               }
2644 
2645             temp = expand_binop (mode,
2646                                      left ? lrotate_optab : rrotate_optab,
2647                                      shifted, op1, target, unsignedp, methods);
2648           }
2649       else if (unsignedp)
2650           temp = expand_binop (mode,
2651                                    left ? lshift_optab : rshift_uns_optab,
2652                                    shifted, op1, target, unsignedp, methods);
2653 
2654       /* Do arithmetic shifts.
2655            Also, if we are going to widen the operand, we can just as well
2656            use an arithmetic right-shift instead of a logical one.  */
2657       if (temp == 0 && ! rotate
2658             && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2659           {
2660             enum optab_methods methods1 = methods;
2661 
2662             /* If trying to widen a log shift to an arithmetic shift,
2663                don't accept an arithmetic shift of the same size.  */
2664             if (unsignedp)
2665               methods1 = OPTAB_MUST_WIDEN;
2666 
2667             /* Arithmetic shift */
2668 
2669             temp = expand_binop (mode,
2670                                      left ? lshift_optab : rshift_arith_optab,
2671                                      shifted, op1, target, unsignedp, methods1);
2672           }
2673 
2674       /* We used to try extzv here for logical right shifts, but that was
2675            only useful for one machine, the VAX, and caused poor code
2676            generation there for lshrdi3, so the code was deleted and a
2677            define_expand for lshrsi3 was added to vax.md.  */
2678     }
2679 
2680   gcc_assert (temp != NULL_RTX || may_fail);
2681   return temp;
2682 }
2683 
2684 /* Output a shift instruction for expression code CODE,
2685    with SHIFTED being the rtx for the value to shift,
2686    and AMOUNT the amount to shift by.
2687    Store the result in the rtx TARGET, if that is convenient.
2688    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2689    Return the rtx for where the value is.  */
2690 
2691 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2692 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2693                 poly_int64 amount, rtx target, int unsignedp)
2694 {
2695   return expand_shift_1 (code, mode, shifted,
2696                                gen_int_shift_amount (mode, amount),
2697                                target, unsignedp);
2698 }
2699 
2700 /* Likewise, but return 0 if that cannot be done.  */
2701 
2702 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2703 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2704                         int amount, rtx target, int unsignedp)
2705 {
2706   return expand_shift_1 (code, mode,
2707                                shifted, GEN_INT (amount), target, unsignedp, true);
2708 }
2709 
2710 /* Output a shift instruction for expression code CODE,
2711    with SHIFTED being the rtx for the value to shift,
2712    and AMOUNT the tree for the amount to shift by.
2713    Store the result in the rtx TARGET, if that is convenient.
2714    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2715    Return the rtx for where the value is.  */
2716 
2717 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2718 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2719                            tree amount, rtx target, int unsignedp)
2720 {
2721   return expand_shift_1 (code, mode,
2722                                shifted, expand_normal (amount), target, unsignedp);
2723 }
2724 
2725 
2726 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2727                               const struct mult_cost *, machine_mode mode);
2728 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2729                                     const struct algorithm *, enum mult_variant);
2730 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2731 static rtx extract_high_half (scalar_int_mode, rtx);
2732 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2733 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2734                                                int, int);
2735 /* Compute and return the best algorithm for multiplying by T.
2736    The algorithm must cost less than cost_limit
2737    If retval.cost >= COST_LIMIT, no algorithm was found and all
2738    other field of the returned struct are undefined.
2739    MODE is the machine mode of the multiplication.  */
2740 
2741 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2742 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2743               const struct mult_cost *cost_limit, machine_mode mode)
2744 {
2745   int m;
2746   struct algorithm *alg_in, *best_alg;
2747   struct mult_cost best_cost;
2748   struct mult_cost new_limit;
2749   int op_cost, op_latency;
2750   unsigned HOST_WIDE_INT orig_t = t;
2751   unsigned HOST_WIDE_INT q;
2752   int maxm, hash_index;
2753   bool cache_hit = false;
2754   enum alg_code cache_alg = alg_zero;
2755   bool speed = optimize_insn_for_speed_p ();
2756   scalar_int_mode imode;
2757   struct alg_hash_entry *entry_ptr;
2758 
2759   /* Indicate that no algorithm is yet found.  If no algorithm
2760      is found, this value will be returned and indicate failure.  */
2761   alg_out->cost.cost = cost_limit->cost + 1;
2762   alg_out->cost.latency = cost_limit->latency + 1;
2763 
2764   if (cost_limit->cost < 0
2765       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2766     return;
2767 
2768   /* Be prepared for vector modes.  */
2769   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2770 
2771   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2772 
2773   /* Restrict the bits of "t" to the multiplication's mode.  */
2774   t &= GET_MODE_MASK (imode);
2775 
2776   /* t == 1 can be done in zero cost.  */
2777   if (t == 1)
2778     {
2779       alg_out->ops = 1;
2780       alg_out->cost.cost = 0;
2781       alg_out->cost.latency = 0;
2782       alg_out->op[0] = alg_m;
2783       return;
2784     }
2785 
2786   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2787      fail now.  */
2788   if (t == 0)
2789     {
2790       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2791           return;
2792       else
2793           {
2794             alg_out->ops = 1;
2795             alg_out->cost.cost = zero_cost (speed);
2796             alg_out->cost.latency = zero_cost (speed);
2797             alg_out->op[0] = alg_zero;
2798             return;
2799           }
2800     }
2801 
2802   /* We'll be needing a couple extra algorithm structures now.  */
2803 
2804   alg_in = XALLOCA (struct algorithm);
2805   best_alg = XALLOCA (struct algorithm);
2806   best_cost = *cost_limit;
2807 
2808   /* Compute the hash index.  */
2809   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2810 
2811   /* See if we already know what to do for T.  */
2812   entry_ptr = alg_hash_entry_ptr (hash_index);
2813   if (entry_ptr->t == t
2814       && entry_ptr->mode == mode
2815       && entry_ptr->speed == speed
2816       && entry_ptr->alg != alg_unknown)
2817     {
2818       cache_alg = entry_ptr->alg;
2819 
2820       if (cache_alg == alg_impossible)
2821           {
2822             /* The cache tells us that it's impossible to synthesize
2823                multiplication by T within entry_ptr->cost.  */
2824             if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2825               /* COST_LIMIT is at least as restrictive as the one
2826                  recorded in the hash table, in which case we have no
2827                  hope of synthesizing a multiplication.  Just
2828                  return.  */
2829               return;
2830 
2831             /* If we get here, COST_LIMIT is less restrictive than the
2832                one recorded in the hash table, so we may be able to
2833                synthesize a multiplication.  Proceed as if we didn't
2834                have the cache entry.  */
2835           }
2836       else
2837           {
2838             if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2839               /* The cached algorithm shows that this multiplication
2840                  requires more cost than COST_LIMIT.  Just return.  This
2841                  way, we don't clobber this cache entry with
2842                  alg_impossible but retain useful information.  */
2843               return;
2844 
2845             cache_hit = true;
2846 
2847             switch (cache_alg)
2848               {
2849               case alg_shift:
2850                 goto do_alg_shift;
2851 
2852               case alg_add_t_m2:
2853               case alg_sub_t_m2:
2854                 goto do_alg_addsub_t_m2;
2855 
2856               case alg_add_factor:
2857               case alg_sub_factor:
2858                 goto do_alg_addsub_factor;
2859 
2860               case alg_add_t2_m:
2861                 goto do_alg_add_t2_m;
2862 
2863               case alg_sub_t2_m:
2864                 goto do_alg_sub_t2_m;
2865 
2866               default:
2867                 gcc_unreachable ();
2868               }
2869           }
2870     }
2871 
2872   /* If we have a group of zero bits at the low-order part of T, try
2873      multiplying by the remaining bits and then doing a shift.  */
2874 
2875   if ((t & 1) == 0)
2876     {
2877     do_alg_shift:
2878       m = ctz_or_zero (t); /* m = number of low zero bits */
2879       if (m < maxm)
2880           {
2881             q = t >> m;
2882             /* The function expand_shift will choose between a shift and
2883                a sequence of additions, so the observed cost is given as
2884                MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2885             op_cost = m * add_cost (speed, mode);
2886             if (shift_cost (speed, mode, m) < op_cost)
2887               op_cost = shift_cost (speed, mode, m);
2888             new_limit.cost = best_cost.cost - op_cost;
2889             new_limit.latency = best_cost.latency - op_cost;
2890             synth_mult (alg_in, q, &new_limit, mode);
2891 
2892             alg_in->cost.cost += op_cost;
2893             alg_in->cost.latency += op_cost;
2894             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2895               {
2896                 best_cost = alg_in->cost;
2897                 std::swap (alg_in, best_alg);
2898                 best_alg->log[best_alg->ops] = m;
2899                 best_alg->op[best_alg->ops] = alg_shift;
2900               }
2901 
2902             /* See if treating ORIG_T as a signed number yields a better
2903                sequence.  Try this sequence only for a negative ORIG_T
2904                as it would be useless for a non-negative ORIG_T.  */
2905             if ((HOST_WIDE_INT) orig_t < 0)
2906               {
2907                 /* Shift ORIG_T as follows because a right shift of a
2908                      negative-valued signed type is implementation
2909                      defined.  */
2910                 q = ~(~orig_t >> m);
2911                 /* The function expand_shift will choose between a shift
2912                      and a sequence of additions, so the observed cost is
2913                      given as MIN (m * add_cost(speed, mode),
2914                      shift_cost(speed, mode, m)).  */
2915                 op_cost = m * add_cost (speed, mode);
2916                 if (shift_cost (speed, mode, m) < op_cost)
2917                     op_cost = shift_cost (speed, mode, m);
2918                 new_limit.cost = best_cost.cost - op_cost;
2919                 new_limit.latency = best_cost.latency - op_cost;
2920                 synth_mult (alg_in, q, &new_limit, mode);
2921 
2922                 alg_in->cost.cost += op_cost;
2923                 alg_in->cost.latency += op_cost;
2924                 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2925                     {
2926                       best_cost = alg_in->cost;
2927                       std::swap (alg_in, best_alg);
2928                       best_alg->log[best_alg->ops] = m;
2929                       best_alg->op[best_alg->ops] = alg_shift;
2930                     }
2931               }
2932           }
2933       if (cache_hit)
2934           goto done;
2935     }
2936 
2937   /* If we have an odd number, add or subtract one.  */
2938   if ((t & 1) != 0)
2939     {
2940       unsigned HOST_WIDE_INT w;
2941 
2942     do_alg_addsub_t_m2:
2943       for (w = 1; (w & t) != 0; w <<= 1)
2944           ;
2945       /* If T was -1, then W will be zero after the loop.  This is another
2946            case where T ends with ...111.  Handling this with (T + 1) and
2947            subtract 1 produces slightly better code and results in algorithm
2948            selection much faster than treating it like the ...0111 case
2949            below.  */
2950       if (w == 0
2951             || (w > 2
2952                 /* Reject the case where t is 3.
2953                      Thus we prefer addition in that case.  */
2954                 && t != 3))
2955           {
2956             /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2957 
2958             op_cost = add_cost (speed, mode);
2959             new_limit.cost = best_cost.cost - op_cost;
2960             new_limit.latency = best_cost.latency - op_cost;
2961             synth_mult (alg_in, t + 1, &new_limit, mode);
2962 
2963             alg_in->cost.cost += op_cost;
2964             alg_in->cost.latency += op_cost;
2965             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2966               {
2967                 best_cost = alg_in->cost;
2968                 std::swap (alg_in, best_alg);
2969                 best_alg->log[best_alg->ops] = 0;
2970                 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2971               }
2972           }
2973       else
2974           {
2975             /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2976 
2977             op_cost = add_cost (speed, mode);
2978             new_limit.cost = best_cost.cost - op_cost;
2979             new_limit.latency = best_cost.latency - op_cost;
2980             synth_mult (alg_in, t - 1, &new_limit, mode);
2981 
2982             alg_in->cost.cost += op_cost;
2983             alg_in->cost.latency += op_cost;
2984             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2985               {
2986                 best_cost = alg_in->cost;
2987                 std::swap (alg_in, best_alg);
2988                 best_alg->log[best_alg->ops] = 0;
2989                 best_alg->op[best_alg->ops] = alg_add_t_m2;
2990               }
2991           }
2992 
2993       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2994            quickly with a - a * n for some appropriate constant n.  */
2995       m = exact_log2 (-orig_t + 1);
2996       if (m >= 0 && m < maxm)
2997           {
2998             op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2999             /* If the target has a cheap shift-and-subtract insn use
3000                that in preference to a shift insn followed by a sub insn.
3001                Assume that the shift-and-sub is "atomic" with a latency
3002                equal to it's cost, otherwise assume that on superscalar
3003                hardware the shift may be executed concurrently with the
3004                earlier steps in the algorithm.  */
3005             if (shiftsub1_cost (speed, mode, m) <= op_cost)
3006               {
3007                 op_cost = shiftsub1_cost (speed, mode, m);
3008                 op_latency = op_cost;
3009               }
3010             else
3011               op_latency = add_cost (speed, mode);
3012 
3013             new_limit.cost = best_cost.cost - op_cost;
3014             new_limit.latency = best_cost.latency - op_latency;
3015             synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3016                           &new_limit, mode);
3017 
3018             alg_in->cost.cost += op_cost;
3019             alg_in->cost.latency += op_latency;
3020             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3021               {
3022                 best_cost = alg_in->cost;
3023                 std::swap (alg_in, best_alg);
3024                 best_alg->log[best_alg->ops] = m;
3025                 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3026               }
3027           }
3028 
3029       if (cache_hit)
3030           goto done;
3031     }
3032 
3033   /* Look for factors of t of the form
3034      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3035      If we find such a factor, we can multiply by t using an algorithm that
3036      multiplies by q, shift the result by m and add/subtract it to itself.
3037 
3038      We search for large factors first and loop down, even if large factors
3039      are less probable than small; if we find a large factor we will find a
3040      good sequence quickly, and therefore be able to prune (by decreasing
3041      COST_LIMIT) the search.  */
3042 
3043  do_alg_addsub_factor:
3044   for (m = floor_log2 (t - 1); m >= 2; m--)
3045     {
3046       unsigned HOST_WIDE_INT d;
3047 
3048       d = (HOST_WIDE_INT_1U << m) + 1;
3049       if (t % d == 0 && t > d && m < maxm
3050             && (!cache_hit || cache_alg == alg_add_factor))
3051           {
3052             op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3053             if (shiftadd_cost (speed, mode, m) <= op_cost)
3054               op_cost = shiftadd_cost (speed, mode, m);
3055 
3056             op_latency = op_cost;
3057 
3058 
3059             new_limit.cost = best_cost.cost - op_cost;
3060             new_limit.latency = best_cost.latency - op_latency;
3061             synth_mult (alg_in, t / d, &new_limit, mode);
3062 
3063             alg_in->cost.cost += op_cost;
3064             alg_in->cost.latency += op_latency;
3065             if (alg_in->cost.latency < op_cost)
3066               alg_in->cost.latency = op_cost;
3067             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3068               {
3069                 best_cost = alg_in->cost;
3070                 std::swap (alg_in, best_alg);
3071                 best_alg->log[best_alg->ops] = m;
3072                 best_alg->op[best_alg->ops] = alg_add_factor;
3073               }
3074             /* Other factors will have been taken care of in the recursion.  */
3075             break;
3076           }
3077 
3078       d = (HOST_WIDE_INT_1U << m) - 1;
3079       if (t % d == 0 && t > d && m < maxm
3080             && (!cache_hit || cache_alg == alg_sub_factor))
3081           {
3082             op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3083             if (shiftsub0_cost (speed, mode, m) <= op_cost)
3084               op_cost = shiftsub0_cost (speed, mode, m);
3085 
3086             op_latency = op_cost;
3087 
3088             new_limit.cost = best_cost.cost - op_cost;
3089             new_limit.latency = best_cost.latency - op_latency;
3090             synth_mult (alg_in, t / d, &new_limit, mode);
3091 
3092             alg_in->cost.cost += op_cost;
3093             alg_in->cost.latency += op_latency;
3094             if (alg_in->cost.latency < op_cost)
3095               alg_in->cost.latency = op_cost;
3096             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3097               {
3098                 best_cost = alg_in->cost;
3099                 std::swap (alg_in, best_alg);
3100                 best_alg->log[best_alg->ops] = m;
3101                 best_alg->op[best_alg->ops] = alg_sub_factor;
3102               }
3103             break;
3104           }
3105     }
3106   if (cache_hit)
3107     goto done;
3108 
3109   /* Try shift-and-add (load effective address) instructions,
3110      i.e. do a*3, a*5, a*9.  */
3111   if ((t & 1) != 0)
3112     {
3113     do_alg_add_t2_m:
3114       q = t - 1;
3115       m = ctz_hwi (q);
3116       if (q && m < maxm)
3117           {
3118             op_cost = shiftadd_cost (speed, mode, m);
3119             new_limit.cost = best_cost.cost - op_cost;
3120             new_limit.latency = best_cost.latency - op_cost;
3121             synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3122 
3123             alg_in->cost.cost += op_cost;
3124             alg_in->cost.latency += op_cost;
3125             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3126               {
3127                 best_cost = alg_in->cost;
3128                 std::swap (alg_in, best_alg);
3129                 best_alg->log[best_alg->ops] = m;
3130                 best_alg->op[best_alg->ops] = alg_add_t2_m;
3131               }
3132           }
3133       if (cache_hit)
3134           goto done;
3135 
3136     do_alg_sub_t2_m:
3137       q = t + 1;
3138       m = ctz_hwi (q);
3139       if (q && m < maxm)
3140           {
3141             op_cost = shiftsub0_cost (speed, mode, m);
3142             new_limit.cost = best_cost.cost - op_cost;
3143             new_limit.latency = best_cost.latency - op_cost;
3144             synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3145 
3146             alg_in->cost.cost += op_cost;
3147             alg_in->cost.latency += op_cost;
3148             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3149               {
3150                 best_cost = alg_in->cost;
3151                 std::swap (alg_in, best_alg);
3152                 best_alg->log[best_alg->ops] = m;
3153                 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3154               }
3155           }
3156       if (cache_hit)
3157           goto done;
3158     }
3159 
3160  done:
3161   /* If best_cost has not decreased, we have not found any algorithm.  */
3162   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3163     {
3164       /* We failed to find an algorithm.  Record alg_impossible for
3165            this case (that is, <T, MODE, COST_LIMIT>) so that next time
3166            we are asked to find an algorithm for T within the same or
3167            lower COST_LIMIT, we can immediately return to the
3168            caller.  */
3169       entry_ptr->t = t;
3170       entry_ptr->mode = mode;
3171       entry_ptr->speed = speed;
3172       entry_ptr->alg = alg_impossible;
3173       entry_ptr->cost = *cost_limit;
3174       return;
3175     }
3176 
3177   /* Cache the result.  */
3178   if (!cache_hit)
3179     {
3180       entry_ptr->t = t;
3181       entry_ptr->mode = mode;
3182       entry_ptr->speed = speed;
3183       entry_ptr->alg = best_alg->op[best_alg->ops];
3184       entry_ptr->cost.cost = best_cost.cost;
3185       entry_ptr->cost.latency = best_cost.latency;
3186     }
3187 
3188   /* If we are getting a too long sequence for `struct algorithm'
3189      to record, make this search fail.  */
3190   if (best_alg->ops == MAX_BITS_PER_WORD)
3191     return;
3192 
3193   /* Copy the algorithm from temporary space to the space at alg_out.
3194      We avoid using structure assignment because the majority of
3195      best_alg is normally undefined, and this is a critical function.  */
3196   alg_out->ops = best_alg->ops + 1;
3197   alg_out->cost = best_cost;
3198   memcpy (alg_out->op, best_alg->op,
3199             alg_out->ops * sizeof *alg_out->op);
3200   memcpy (alg_out->log, best_alg->log,
3201             alg_out->ops * sizeof *alg_out->log);
3202 }
3203 
3204 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3205    Try three variations:
3206 
3207        - a shift/add sequence based on VAL itself
3208        - a shift/add sequence based on -VAL, followed by a negation
3209        - a shift/add sequence based on VAL - 1, followed by an addition.
3210 
3211    Return true if the cheapest of these cost less than MULT_COST,
3212    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3213 
3214 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3215 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3216                          struct algorithm *alg, enum mult_variant *variant,
3217                          int mult_cost)
3218 {
3219   struct algorithm alg2;
3220   struct mult_cost limit;
3221   int op_cost;
3222   bool speed = optimize_insn_for_speed_p ();
3223 
3224   /* Fail quickly for impossible bounds.  */
3225   if (mult_cost < 0)
3226     return false;
3227 
3228   /* Ensure that mult_cost provides a reasonable upper bound.
3229      Any constant multiplication can be performed with less
3230      than 2 * bits additions.  */
3231   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3232   if (mult_cost > op_cost)
3233     mult_cost = op_cost;
3234 
3235   *variant = basic_variant;
3236   limit.cost = mult_cost;
3237   limit.latency = mult_cost;
3238   synth_mult (alg, val, &limit, mode);
3239 
3240   /* This works only if the inverted value actually fits in an
3241      `unsigned int' */
3242   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3243     {
3244       op_cost = neg_cost (speed, mode);
3245       if (MULT_COST_LESS (&alg->cost, mult_cost))
3246           {
3247             limit.cost = alg->cost.cost - op_cost;
3248             limit.latency = alg->cost.latency - op_cost;
3249           }
3250       else
3251           {
3252             limit.cost = mult_cost - op_cost;
3253             limit.latency = mult_cost - op_cost;
3254           }
3255 
3256       synth_mult (&alg2, -val, &limit, mode);
3257       alg2.cost.cost += op_cost;
3258       alg2.cost.latency += op_cost;
3259       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3260           *alg = alg2, *variant = negate_variant;
3261     }
3262 
3263   /* This proves very useful for division-by-constant.  */
3264   op_cost = add_cost (speed, mode);
3265   if (MULT_COST_LESS (&alg->cost, mult_cost))
3266     {
3267       limit.cost = alg->cost.cost - op_cost;
3268       limit.latency = alg->cost.latency - op_cost;
3269     }
3270   else
3271     {
3272       limit.cost = mult_cost - op_cost;
3273       limit.latency = mult_cost - op_cost;
3274     }
3275 
3276   synth_mult (&alg2, val - 1, &limit, mode);
3277   alg2.cost.cost += op_cost;
3278   alg2.cost.latency += op_cost;
3279   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3280     *alg = alg2, *variant = add_variant;
3281 
3282   return MULT_COST_LESS (&alg->cost, mult_cost);
3283 }
3284 
3285 /* A subroutine of expand_mult, used for constant multiplications.
3286    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3287    convenient.  Use the shift/add sequence described by ALG and apply
3288    the final fixup specified by VARIANT.  */
3289 
3290 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3291 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3292                        rtx target, const struct algorithm *alg,
3293                        enum mult_variant variant)
3294 {
3295   unsigned HOST_WIDE_INT val_so_far;
3296   rtx_insn *insn;
3297   rtx accum, tem;
3298   int opno;
3299   machine_mode nmode;
3300 
3301   /* Avoid referencing memory over and over and invalid sharing
3302      on SUBREGs.  */
3303   op0 = force_reg (mode, op0);
3304 
3305   /* ACCUM starts out either as OP0 or as a zero, depending on
3306      the first operation.  */
3307 
3308   if (alg->op[0] == alg_zero)
3309     {
3310       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3311       val_so_far = 0;
3312     }
3313   else if (alg->op[0] == alg_m)
3314     {
3315       accum = copy_to_mode_reg (mode, op0);
3316       val_so_far = 1;
3317     }
3318   else
3319     gcc_unreachable ();
3320 
3321   for (opno = 1; opno < alg->ops; opno++)
3322     {
3323       int log = alg->log[opno];
3324       rtx shift_subtarget = optimize ? 0 : accum;
3325       rtx add_target
3326           = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3327              && !optimize)
3328             ? target : 0;
3329       rtx accum_target = optimize ? 0 : accum;
3330       rtx accum_inner;
3331 
3332       switch (alg->op[opno])
3333           {
3334           case alg_shift:
3335             tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3336             /* REG_EQUAL note will be attached to the following insn.  */
3337             emit_move_insn (accum, tem);
3338             val_so_far <<= log;
3339             break;
3340 
3341           case alg_add_t_m2:
3342             tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3343             accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3344                                          add_target ? add_target : accum_target);
3345             val_so_far += HOST_WIDE_INT_1U << log;
3346             break;
3347 
3348           case alg_sub_t_m2:
3349             tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3350             accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3351                                          add_target ? add_target : accum_target);
3352             val_so_far -= HOST_WIDE_INT_1U << log;
3353             break;
3354 
3355           case alg_add_t2_m:
3356             accum = expand_shift (LSHIFT_EXPR, mode, accum,
3357                                         log, shift_subtarget, 0);
3358             accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3359                                          add_target ? add_target : accum_target);
3360             val_so_far = (val_so_far << log) + 1;
3361             break;
3362 
3363           case alg_sub_t2_m:
3364             accum = expand_shift (LSHIFT_EXPR, mode, accum,
3365                                         log, shift_subtarget, 0);
3366             accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3367                                          add_target ? add_target : accum_target);
3368             val_so_far = (val_so_far << log) - 1;
3369             break;
3370 
3371           case alg_add_factor:
3372             tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3373             accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3374                                          add_target ? add_target : accum_target);
3375             val_so_far += val_so_far << log;
3376             break;
3377 
3378           case alg_sub_factor:
3379             tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3380             accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3381                                          (add_target
3382                                           ? add_target : (optimize ? 0 : tem)));
3383             val_so_far = (val_so_far << log) - val_so_far;
3384             break;
3385 
3386           default:
3387             gcc_unreachable ();
3388           }
3389 
3390       if (SCALAR_INT_MODE_P (mode))
3391           {
3392             /* Write a REG_EQUAL note on the last insn so that we can cse
3393                multiplication sequences.  Note that if ACCUM is a SUBREG,
3394                we've set the inner register and must properly indicate that.  */
3395             tem = op0, nmode = mode;
3396             accum_inner = accum;
3397             if (GET_CODE (accum) == SUBREG)
3398               {
3399                 accum_inner = SUBREG_REG (accum);
3400                 nmode = GET_MODE (accum_inner);
3401                 tem = gen_lowpart (nmode, op0);
3402               }
3403 
3404             /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3405                In that case, only the low bits of accum would be guaranteed to
3406                be equal to the content of the REG_EQUAL note, the upper bits
3407                can be anything.  */
3408             if (!paradoxical_subreg_p (tem))
3409               {
3410                 insn = get_last_insn ();
3411                 wide_int wval_so_far
3412                     = wi::uhwi (val_so_far,
3413                                   GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3414                 rtx c = immed_wide_int_const (wval_so_far, nmode);
3415                 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3416                                         accum_inner);
3417               }
3418           }
3419     }
3420 
3421   if (variant == negate_variant)
3422     {
3423       val_so_far = -val_so_far;
3424       accum = expand_unop (mode, neg_optab, accum, target, 0);
3425     }
3426   else if (variant == add_variant)
3427     {
3428       val_so_far = val_so_far + 1;
3429       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3430     }
3431 
3432   /* Compare only the bits of val and val_so_far that are significant
3433      in the result mode, to avoid sign-/zero-extension confusion.  */
3434   nmode = GET_MODE_INNER (mode);
3435   val &= GET_MODE_MASK (nmode);
3436   val_so_far &= GET_MODE_MASK (nmode);
3437   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3438 
3439   return accum;
3440 }
3441 
3442 /* Perform a multiplication and return an rtx for the result.
3443    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3444    TARGET is a suggestion for where to store the result (an rtx).
3445 
3446    We check specially for a constant integer as OP1.
3447    If you want this check for OP0 as well, then before calling
3448    you should swap the two operands if OP0 would be constant.  */
3449 
3450 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3451 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3452                int unsignedp, bool no_libcall)
3453 {
3454   enum mult_variant variant;
3455   struct algorithm algorithm;
3456   rtx scalar_op1;
3457   int max_cost;
3458   bool speed = optimize_insn_for_speed_p ();
3459   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3460 
3461   if (CONSTANT_P (op0))
3462     std::swap (op0, op1);
3463 
3464   /* For vectors, there are several simplifications that can be made if
3465      all elements of the vector constant are identical.  */
3466   scalar_op1 = unwrap_const_vec_duplicate (op1);
3467 
3468   if (INTEGRAL_MODE_P (mode))
3469     {
3470       rtx fake_reg;
3471       HOST_WIDE_INT coeff;
3472       bool is_neg;
3473       int mode_bitsize;
3474 
3475       if (op1 == CONST0_RTX (mode))
3476           return op1;
3477       if (op1 == CONST1_RTX (mode))
3478           return op0;
3479       if (op1 == CONSTM1_RTX (mode))
3480           return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3481                                   op0, target, 0);
3482 
3483       if (do_trapv)
3484           goto skip_synth;
3485 
3486       /* If mode is integer vector mode, check if the backend supports
3487            vector lshift (by scalar or vector) at all.  If not, we can't use
3488            synthetized multiply.  */
3489       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3490             && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3491             && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3492           goto skip_synth;
3493 
3494       /* These are the operations that are potentially turned into
3495            a sequence of shifts and additions.  */
3496       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3497 
3498       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3499            less than or equal in size to `unsigned int' this doesn't matter.
3500            If the mode is larger than `unsigned int', then synth_mult works
3501            only if the constant value exactly fits in an `unsigned int' without
3502            any truncation.  This means that multiplying by negative values does
3503            not work; results are off by 2^32 on a 32 bit machine.  */
3504       if (CONST_INT_P (scalar_op1))
3505           {
3506             coeff = INTVAL (scalar_op1);
3507             is_neg = coeff < 0;
3508           }
3509 #if TARGET_SUPPORTS_WIDE_INT
3510       else if (CONST_WIDE_INT_P (scalar_op1))
3511 #else
3512       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3513 #endif
3514           {
3515             int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3516             /* Perfect power of 2 (other than 1, which is handled above).  */
3517             if (shift > 0)
3518               return expand_shift (LSHIFT_EXPR, mode, op0,
3519                                          shift, target, unsignedp);
3520             else
3521               goto skip_synth;
3522           }
3523       else
3524           goto skip_synth;
3525 
3526       /* We used to test optimize here, on the grounds that it's better to
3527            produce a smaller program when -O is not used.  But this causes
3528            such a terrible slowdown sometimes that it seems better to always
3529            use synth_mult.  */
3530 
3531       /* Special case powers of two.  */
3532       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3533             && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3534           return expand_shift (LSHIFT_EXPR, mode, op0,
3535                                    floor_log2 (coeff), target, unsignedp);
3536 
3537       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3538 
3539       /* Attempt to handle multiplication of DImode values by negative
3540            coefficients, by performing the multiplication by a positive
3541            multiplier and then inverting the result.  */
3542       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3543           {
3544             /* Its safe to use -coeff even for INT_MIN, as the
3545                result is interpreted as an unsigned coefficient.
3546                Exclude cost of op0 from max_cost to match the cost
3547                calculation of the synth_mult.  */
3548             coeff = -(unsigned HOST_WIDE_INT) coeff;
3549             max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3550                                             mode, speed)
3551                           - neg_cost (speed, mode));
3552             if (max_cost <= 0)
3553               goto skip_synth;
3554 
3555             /* Special case powers of two.  */
3556             if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3557               {
3558                 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3559                                                floor_log2 (coeff), target, unsignedp);
3560                 return expand_unop (mode, neg_optab, temp, target, 0);
3561               }
3562 
3563             if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3564                                            max_cost))
3565               {
3566                 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3567                                                       &algorithm, variant);
3568                 return expand_unop (mode, neg_optab, temp, target, 0);
3569               }
3570             goto skip_synth;
3571           }
3572 
3573       /* Exclude cost of op0 from max_cost to match the cost
3574            calculation of the synth_mult.  */
3575       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3576       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3577           return expand_mult_const (mode, op0, coeff, target,
3578                                           &algorithm, variant);
3579     }
3580  skip_synth:
3581 
3582   /* Expand x*2.0 as x+x.  */
3583   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3584       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3585     {
3586       op0 = force_reg (GET_MODE (op0), op0);
3587       return expand_binop (mode, add_optab, op0, op0,
3588                                  target, unsignedp,
3589                                  no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3590     }
3591 
3592   /* This used to use umul_optab if unsigned, but for non-widening multiply
3593      there is no difference between signed and unsigned.  */
3594   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3595                           op0, op1, target, unsignedp,
3596                           no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3597   gcc_assert (op0 || no_libcall);
3598   return op0;
3599 }
3600 
3601 /* Return a cost estimate for multiplying a register by the given
3602    COEFFicient in the given MODE and SPEED.  */
3603 
3604 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3605 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3606 {
3607   int max_cost;
3608   struct algorithm algorithm;
3609   enum mult_variant variant;
3610 
3611   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3612   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3613                                  mode, speed);
3614   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3615     return algorithm.cost.cost;
3616   else
3617     return max_cost;
3618 }
3619 
3620 /* Perform a widening multiplication and return an rtx for the result.
3621    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3622    TARGET is a suggestion for where to store the result (an rtx).
3623    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3624    or smul_widen_optab.
3625 
3626    We check specially for a constant integer as OP1, comparing the
3627    cost of a widening multiply against the cost of a sequence of shifts
3628    and adds.  */
3629 
3630 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3631 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3632                           int unsignedp, optab this_optab)
3633 {
3634   bool speed = optimize_insn_for_speed_p ();
3635   rtx cop1;
3636 
3637   if (CONST_INT_P (op1)
3638       && GET_MODE (op0) != VOIDmode
3639       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3640                                         this_optab == umul_widen_optab))
3641       && CONST_INT_P (cop1)
3642       && (INTVAL (cop1) >= 0
3643             || HWI_COMPUTABLE_MODE_P (mode)))
3644     {
3645       HOST_WIDE_INT coeff = INTVAL (cop1);
3646       int max_cost;
3647       enum mult_variant variant;
3648       struct algorithm algorithm;
3649 
3650       if (coeff == 0)
3651           return CONST0_RTX (mode);
3652 
3653       /* Special case powers of two.  */
3654       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3655           {
3656             op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3657             return expand_shift (LSHIFT_EXPR, mode, op0,
3658                                      floor_log2 (coeff), target, unsignedp);
3659           }
3660 
3661       /* Exclude cost of op0 from max_cost to match the cost
3662            calculation of the synth_mult.  */
3663       max_cost = mul_widen_cost (speed, mode);
3664       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3665                                      max_cost))
3666           {
3667             op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3668             return expand_mult_const (mode, op0, coeff, target,
3669                                             &algorithm, variant);
3670           }
3671     }
3672   return expand_binop (mode, this_optab, op0, op1, target,
3673                            unsignedp, OPTAB_LIB_WIDEN);
3674 }
3675 
3676 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3677    replace division by D, and put the least significant N bits of the result
3678    in *MULTIPLIER_PTR and return the most significant bit.
3679 
3680    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3681    needed precision is in PRECISION (should be <= N).
3682 
3683    PRECISION should be as small as possible so this function can choose
3684    multiplier more freely.
3685 
3686    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3687    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3688 
3689    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3690    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3691 
3692 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3693 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3694                        unsigned HOST_WIDE_INT *multiplier_ptr,
3695                        int *post_shift_ptr, int *lgup_ptr)
3696 {
3697   int lgup, post_shift;
3698   int pow, pow2;
3699 
3700   /* lgup = ceil(log2(divisor)); */
3701   lgup = ceil_log2 (d);
3702 
3703   gcc_assert (lgup <= n);
3704 
3705   pow = n + lgup;
3706   pow2 = n + lgup - precision;
3707 
3708   /* mlow = 2^(N + lgup)/d */
3709   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3710   wide_int mlow = wi::udiv_trunc (val, d);
3711 
3712   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3713   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3714   wide_int mhigh = wi::udiv_trunc (val, d);
3715 
3716   /* If precision == N, then mlow, mhigh exceed 2^N
3717      (but they do not exceed 2^(N+1)).  */
3718 
3719   /* Reduce to lowest terms.  */
3720   for (post_shift = lgup; post_shift > 0; post_shift--)
3721     {
3722       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3723                                                                    HOST_BITS_PER_WIDE_INT);
3724       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3725                                                                    HOST_BITS_PER_WIDE_INT);
3726       if (ml_lo >= mh_lo)
3727           break;
3728 
3729       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3730       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3731     }
3732 
3733   *post_shift_ptr = post_shift;
3734   *lgup_ptr = lgup;
3735   if (n < HOST_BITS_PER_WIDE_INT)
3736     {
3737       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3738       *multiplier_ptr = mhigh.to_uhwi () & mask;
3739       return mhigh.to_uhwi () > mask;
3740     }
3741   else
3742     {
3743       *multiplier_ptr = mhigh.to_uhwi ();
3744       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3745     }
3746 }
3747 
3748 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3749    congruent to 1 (mod 2**N).  */
3750 
3751 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3752 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3753 {
3754   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3755 
3756   /* The algorithm notes that the choice y = x satisfies
3757      x*y == 1 mod 2^3, since x is assumed odd.
3758      Each iteration doubles the number of bits of significance in y.  */
3759 
3760   unsigned HOST_WIDE_INT mask;
3761   unsigned HOST_WIDE_INT y = x;
3762   int nbit = 3;
3763 
3764   mask = (n == HOST_BITS_PER_WIDE_INT
3765             ? HOST_WIDE_INT_M1U
3766             : (HOST_WIDE_INT_1U << n) - 1);
3767 
3768   while (nbit < n)
3769     {
3770       y = y * (2 - x*y) & mask;                   /* Modulo 2^N */
3771       nbit *= 2;
3772     }
3773   return y;
3774 }
3775 
3776 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3777    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3778    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3779    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3780    become signed.
3781 
3782    The result is put in TARGET if that is convenient.
3783 
3784    MODE is the mode of operation.  */
3785 
3786 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3787 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3788                                    rtx op1, rtx target, int unsignedp)
3789 {
3790   rtx tem;
3791   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3792 
3793   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3794                           GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3795   tem = expand_and (mode, tem, op1, NULL_RTX);
3796   adj_operand
3797     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3798                          adj_operand);
3799 
3800   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3801                           GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3802   tem = expand_and (mode, tem, op0, NULL_RTX);
3803   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3804                                 target);
3805 
3806   return target;
3807 }
3808 
3809 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3810 
3811 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3812 extract_high_half (scalar_int_mode mode, rtx op)
3813 {
3814   if (mode == word_mode)
3815     return gen_highpart (mode, op);
3816 
3817   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3818 
3819   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3820                          GET_MODE_BITSIZE (mode), 0, 1);
3821   return convert_modes (mode, wider_mode, op, 0);
3822 }
3823 
3824 /* Like expmed_mult_highpart, but only consider using a multiplication
3825    optab.  OP1 is an rtx for the constant operand.  */
3826 
3827 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3828 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3829                                   rtx target, int unsignedp, int max_cost)
3830 {
3831   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3832   optab moptab;
3833   rtx tem;
3834   int size;
3835   bool speed = optimize_insn_for_speed_p ();
3836 
3837   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3838 
3839   size = GET_MODE_BITSIZE (mode);
3840 
3841   /* Firstly, try using a multiplication insn that only generates the needed
3842      high part of the product, and in the sign flavor of unsignedp.  */
3843   if (mul_highpart_cost (speed, mode) < max_cost)
3844     {
3845       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3846       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3847                                 unsignedp, OPTAB_DIRECT);
3848       if (tem)
3849           return tem;
3850     }
3851 
3852   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3853      Need to adjust the result after the multiplication.  */
3854   if (size - 1 < BITS_PER_WORD
3855       && (mul_highpart_cost (speed, mode)
3856             + 2 * shift_cost (speed, mode, size-1)
3857             + 4 * add_cost (speed, mode) < max_cost))
3858     {
3859       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3860       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3861                                 unsignedp, OPTAB_DIRECT);
3862       if (tem)
3863           /* We used the wrong signedness.  Adjust the result.  */
3864           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3865                                                       tem, unsignedp);
3866     }
3867 
3868   /* Try widening multiplication.  */
3869   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3870   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3871       && mul_widen_cost (speed, wider_mode) < max_cost)
3872     {
3873       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3874                                 unsignedp, OPTAB_WIDEN);
3875       if (tem)
3876           return extract_high_half (mode, tem);
3877     }
3878 
3879   /* Try widening the mode and perform a non-widening multiplication.  */
3880   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3881       && size - 1 < BITS_PER_WORD
3882       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3883             < max_cost))
3884     {
3885       rtx_insn *insns;
3886       rtx wop0, wop1;
3887 
3888       /* We need to widen the operands, for example to ensure the
3889            constant multiplier is correctly sign or zero extended.
3890            Use a sequence to clean-up any instructions emitted by
3891            the conversions if things don't work out.  */
3892       start_sequence ();
3893       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3894       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3895       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3896                                 unsignedp, OPTAB_WIDEN);
3897       insns = get_insns ();
3898       end_sequence ();
3899 
3900       if (tem)
3901           {
3902             emit_insn (insns);
3903             return extract_high_half (mode, tem);
3904           }
3905     }
3906 
3907   /* Try widening multiplication of opposite signedness, and adjust.  */
3908   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3909   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3910       && size - 1 < BITS_PER_WORD
3911       && (mul_widen_cost (speed, wider_mode)
3912             + 2 * shift_cost (speed, mode, size-1)
3913             + 4 * add_cost (speed, mode) < max_cost))
3914     {
3915       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3916                                 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3917       if (tem != 0)
3918           {
3919             tem = extract_high_half (mode, tem);
3920             /* We used the wrong signedness.  Adjust the result.  */
3921             return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3922                                                         target, unsignedp);
3923           }
3924     }
3925 
3926   return 0;
3927 }
3928 
3929 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3930    putting the high half of the result in TARGET if that is convenient,
3931    and return where the result is.  If the operation cannot be performed,
3932    0 is returned.
3933 
3934    MODE is the mode of operation and result.
3935 
3936    UNSIGNEDP nonzero means unsigned multiply.
3937 
3938    MAX_COST is the total allowed cost for the expanded RTL.  */
3939 
3940 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3941 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3942                           rtx target, int unsignedp, int max_cost)
3943 {
3944   unsigned HOST_WIDE_INT cnst1;
3945   int extra_cost;
3946   bool sign_adjust = false;
3947   enum mult_variant variant;
3948   struct algorithm alg;
3949   rtx tem;
3950   bool speed = optimize_insn_for_speed_p ();
3951 
3952   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3953   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3954 
3955   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3956 
3957   /* We can't optimize modes wider than BITS_PER_WORD.
3958      ??? We might be able to perform double-word arithmetic if
3959      mode == word_mode, however all the cost calculations in
3960      synth_mult etc. assume single-word operations.  */
3961   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3962   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3963     return expmed_mult_highpart_optab (mode, op0, op1, target,
3964                                                unsignedp, max_cost);
3965 
3966   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3967 
3968   /* Check whether we try to multiply by a negative constant.  */
3969   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3970     {
3971       sign_adjust = true;
3972       extra_cost += add_cost (speed, mode);
3973     }
3974 
3975   /* See whether shift/add multiplication is cheap enough.  */
3976   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3977                                  max_cost - extra_cost))
3978     {
3979       /* See whether the specialized multiplication optabs are
3980            cheaper than the shift/add version.  */
3981       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3982                                                   alg.cost.cost + extra_cost);
3983       if (tem)
3984           return tem;
3985 
3986       tem = convert_to_mode (wider_mode, op0, unsignedp);
3987       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3988       tem = extract_high_half (mode, tem);
3989 
3990       /* Adjust result for signedness.  */
3991       if (sign_adjust)
3992           tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3993 
3994       return tem;
3995     }
3996   return expmed_mult_highpart_optab (mode, op0, op1, target,
3997                                              unsignedp, max_cost);
3998 }
3999 
4000 
4001 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4002 
4003 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4004 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4005 {
4006   rtx result, temp, shift;
4007   rtx_code_label *label;
4008   int logd;
4009   int prec = GET_MODE_PRECISION (mode);
4010 
4011   logd = floor_log2 (d);
4012   result = gen_reg_rtx (mode);
4013 
4014   /* Avoid conditional branches when they're expensive.  */
4015   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4016       && optimize_insn_for_speed_p ())
4017     {
4018       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4019                                               mode, 0, -1);
4020       if (signmask)
4021           {
4022             HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4023             signmask = force_reg (mode, signmask);
4024             shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4025 
4026             /* Use the rtx_cost of a LSHIFTRT instruction to determine
4027                which instruction sequence to use.  If logical right shifts
4028                are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4029                use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4030 
4031             temp = gen_rtx_LSHIFTRT (mode, result, shift);
4032             if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4033                 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4034                       > COSTS_N_INSNS (2)))
4035               {
4036                 temp = expand_binop (mode, xor_optab, op0, signmask,
4037                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038                 temp = expand_binop (mode, sub_optab, temp, signmask,
4039                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040                 temp = expand_binop (mode, and_optab, temp,
4041                                            gen_int_mode (masklow, mode),
4042                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4043                 temp = expand_binop (mode, xor_optab, temp, signmask,
4044                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4045                 temp = expand_binop (mode, sub_optab, temp, signmask,
4046                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4047               }
4048             else
4049               {
4050                 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4051                                                NULL_RTX, 1, OPTAB_LIB_WIDEN);
4052                 signmask = force_reg (mode, signmask);
4053 
4054                 temp = expand_binop (mode, add_optab, op0, signmask,
4055                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4056                 temp = expand_binop (mode, and_optab, temp,
4057                                            gen_int_mode (masklow, mode),
4058                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4059                 temp = expand_binop (mode, sub_optab, temp, signmask,
4060                                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061               }
4062             return temp;
4063           }
4064     }
4065 
4066   /* Mask contains the mode's signbit and the significant bits of the
4067      modulus.  By including the signbit in the operation, many targets
4068      can avoid an explicit compare operation in the following comparison
4069      against zero.  */
4070   wide_int mask = wi::mask (logd, false, prec);
4071   mask = wi::set_bit (mask, prec - 1);
4072 
4073   temp = expand_binop (mode, and_optab, op0,
4074                            immed_wide_int_const (mask, mode),
4075                            result, 1, OPTAB_LIB_WIDEN);
4076   if (temp != result)
4077     emit_move_insn (result, temp);
4078 
4079   label = gen_label_rtx ();
4080   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4081 
4082   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4083                            0, OPTAB_LIB_WIDEN);
4084 
4085   mask = wi::mask (logd, true, prec);
4086   temp = expand_binop (mode, ior_optab, temp,
4087                            immed_wide_int_const (mask, mode),
4088                            result, 1, OPTAB_LIB_WIDEN);
4089   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4090                            0, OPTAB_LIB_WIDEN);
4091   if (temp != result)
4092     emit_move_insn (result, temp);
4093   emit_label (label);
4094   return result;
4095 }
4096 
4097 /* Expand signed division of OP0 by a power of two D in mode MODE.
4098    This routine is only called for positive values of D.  */
4099 
4100 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4101 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4102 {
4103   rtx temp;
4104   rtx_code_label *label;
4105   int logd;
4106 
4107   logd = floor_log2 (d);
4108 
4109   if (d == 2
4110       && BRANCH_COST (optimize_insn_for_speed_p (),
4111                           false) >= 1)
4112     {
4113       temp = gen_reg_rtx (mode);
4114       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4115       if (temp != NULL_RTX)
4116           {
4117             temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4118                                      0, OPTAB_LIB_WIDEN);
4119             return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4120           }
4121     }
4122 
4123   if (HAVE_conditional_move
4124       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4125     {
4126       rtx temp2;
4127 
4128       start_sequence ();
4129       temp2 = copy_to_mode_reg (mode, op0);
4130       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4131                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4132       temp = force_reg (mode, temp);
4133 
4134       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4135       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4136                                              temp, temp2, mode, 0);
4137       if (temp2)
4138           {
4139             rtx_insn *seq = get_insns ();
4140             end_sequence ();
4141             emit_insn (seq);
4142             return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4143           }
4144       end_sequence ();
4145     }
4146 
4147   if (BRANCH_COST (optimize_insn_for_speed_p (),
4148                        false) >= 2)
4149     {
4150       int ushift = GET_MODE_BITSIZE (mode) - logd;
4151 
4152       temp = gen_reg_rtx (mode);
4153       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4154       if (temp != NULL_RTX)
4155           {
4156             if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4157                 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4158                 > COSTS_N_INSNS (1))
4159               temp = expand_binop (mode, and_optab, temp,
4160                                          gen_int_mode (d - 1, mode),
4161                                          NULL_RTX, 0, OPTAB_LIB_WIDEN);
4162             else
4163               temp = expand_shift (RSHIFT_EXPR, mode, temp,
4164                                          ushift, NULL_RTX, 1);
4165             temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4166                                      0, OPTAB_LIB_WIDEN);
4167             return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4168           }
4169     }
4170 
4171   label = gen_label_rtx ();
4172   temp = copy_to_mode_reg (mode, op0);
4173   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4174   expand_inc (temp, gen_int_mode (d - 1, mode));
4175   emit_label (label);
4176   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4177 }
4178 
4179 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4180    if that is convenient, and returning where the result is.
4181    You may request either the quotient or the remainder as the result;
4182    specify REM_FLAG nonzero to get the remainder.
4183 
4184    CODE is the expression code for which kind of division this is;
4185    it controls how rounding is done.  MODE is the machine mode to use.
4186    UNSIGNEDP nonzero means do unsigned division.  */
4187 
4188 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4189    and then correct it by or'ing in missing high bits
4190    if result of ANDI is nonzero.
4191    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4192    This could optimize to a bfexts instruction.
4193    But C doesn't use these operations, so their optimizations are
4194    left for later.  */
4195 /* ??? For modulo, we don't actually need the highpart of the first product,
4196    the low part will do nicely.  And for small divisors, the second multiply
4197    can also be a low-part only multiply or even be completely left out.
4198    E.g. to calculate the remainder of a division by 3 with a 32 bit
4199    multiply, multiply with 0x55555556 and extract the upper two bits;
4200    the result is exact for inputs up to 0x1fffffff.
4201    The input range can be reduced by using cross-sum rules.
4202    For odd divisors >= 3, the following table gives right shift counts
4203    so that if a number is shifted by an integer multiple of the given
4204    amount, the remainder stays the same:
4205    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4206    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4207    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4208    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4209    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4210 
4211    Cross-sum rules for even numbers can be derived by leaving as many bits
4212    to the right alone as the divisor has zeros to the right.
4213    E.g. if x is an unsigned 32 bit number:
4214    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4215    */
4216 
4217 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,enum optab_methods methods)4218 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4219                  rtx op0, rtx op1, rtx target, int unsignedp,
4220                  enum optab_methods methods)
4221 {
4222   machine_mode compute_mode;
4223   rtx tquotient;
4224   rtx quotient = 0, remainder = 0;
4225   rtx_insn *last;
4226   rtx_insn *insn;
4227   optab optab1, optab2;
4228   int op1_is_constant, op1_is_pow2 = 0;
4229   int max_cost, extra_cost;
4230   static HOST_WIDE_INT last_div_const = 0;
4231   bool speed = optimize_insn_for_speed_p ();
4232 
4233   op1_is_constant = CONST_INT_P (op1);
4234   if (op1_is_constant)
4235     {
4236       wide_int ext_op1 = rtx_mode_t (op1, mode);
4237       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4238                          || (! unsignedp
4239                                && wi::popcount (wi::neg (ext_op1)) == 1));
4240     }
4241 
4242   /*
4243      This is the structure of expand_divmod:
4244 
4245      First comes code to fix up the operands so we can perform the operations
4246      correctly and efficiently.
4247 
4248      Second comes a switch statement with code specific for each rounding mode.
4249      For some special operands this code emits all RTL for the desired
4250      operation, for other cases, it generates only a quotient and stores it in
4251      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4252      to indicate that it has not done anything.
4253 
4254      Last comes code that finishes the operation.  If QUOTIENT is set and
4255      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4256      QUOTIENT is not set, it is computed using trunc rounding.
4257 
4258      We try to generate special code for division and remainder when OP1 is a
4259      constant.  If |OP1| = 2**n we can use shifts and some other fast
4260      operations.  For other values of OP1, we compute a carefully selected
4261      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4262      by m.
4263 
4264      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4265      half of the product.  Different strategies for generating the product are
4266      implemented in expmed_mult_highpart.
4267 
4268      If what we actually want is the remainder, we generate that by another
4269      by-constant multiplication and a subtraction.  */
4270 
4271   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4272      code below will malfunction if we are, so check here and handle
4273      the special case if so.  */
4274   if (op1 == const1_rtx)
4275     return rem_flag ? const0_rtx : op0;
4276 
4277     /* When dividing by -1, we could get an overflow.
4278      negv_optab can handle overflows.  */
4279   if (! unsignedp && op1 == constm1_rtx)
4280     {
4281       if (rem_flag)
4282           return const0_rtx;
4283       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4284                                 ? negv_optab : neg_optab, op0, target, 0);
4285     }
4286 
4287   if (target
4288       /* Don't use the function value register as a target
4289            since we have to read it as well as write it,
4290            and function-inlining gets confused by this.  */
4291       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4292             /* Don't clobber an operand while doing a multi-step calculation.  */
4293             || ((rem_flag || op1_is_constant)
4294                 && (reg_mentioned_p (target, op0)
4295                       || (MEM_P (op0) && MEM_P (target))))
4296             || reg_mentioned_p (target, op1)
4297             || (MEM_P (op1) && MEM_P (target))))
4298     target = 0;
4299 
4300   /* Get the mode in which to perform this computation.  Normally it will
4301      be MODE, but sometimes we can't do the desired operation in MODE.
4302      If so, pick a wider mode in which we can do the operation.  Convert
4303      to that mode at the start to avoid repeated conversions.
4304 
4305      First see what operations we need.  These depend on the expression
4306      we are evaluating.  (We assume that divxx3 insns exist under the
4307      same conditions that modxx3 insns and that these insns don't normally
4308      fail.  If these assumptions are not correct, we may generate less
4309      efficient code in some cases.)
4310 
4311      Then see if we find a mode in which we can open-code that operation
4312      (either a division, modulus, or shift).  Finally, check for the smallest
4313      mode for which we can do the operation with a library call.  */
4314 
4315   /* We might want to refine this now that we have division-by-constant
4316      optimization.  Since expmed_mult_highpart tries so many variants, it is
4317      not straightforward to generalize this.  Maybe we should make an array
4318      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4319 
4320   optab1 = (op1_is_pow2
4321               ? (unsignedp ? lshr_optab : ashr_optab)
4322               : (unsignedp ? udiv_optab : sdiv_optab));
4323   optab2 = (op1_is_pow2 ? optab1
4324               : (unsignedp ? udivmod_optab : sdivmod_optab));
4325 
4326   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4327     {
4328       FOR_EACH_MODE_FROM (compute_mode, mode)
4329       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4330             || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4331           break;
4332 
4333       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4334           FOR_EACH_MODE_FROM (compute_mode, mode)
4335             if (optab_libfunc (optab1, compute_mode)
4336                 || optab_libfunc (optab2, compute_mode))
4337               break;
4338     }
4339   else
4340     compute_mode = mode;
4341 
4342   /* If we still couldn't find a mode, use MODE, but expand_binop will
4343      probably die.  */
4344   if (compute_mode == VOIDmode)
4345     compute_mode = mode;
4346 
4347   if (target && GET_MODE (target) == compute_mode)
4348     tquotient = target;
4349   else
4350     tquotient = gen_reg_rtx (compute_mode);
4351 
4352 #if 0
4353   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4354      (mode), and thereby get better code when OP1 is a constant.  Do that
4355      later.  It will require going over all usages of SIZE below.  */
4356   size = GET_MODE_BITSIZE (mode);
4357 #endif
4358 
4359   /* Only deduct something for a REM if the last divide done was
4360      for a different constant.   Then set the constant of the last
4361      divide.  */
4362   max_cost = (unsignedp
4363                 ? udiv_cost (speed, compute_mode)
4364                 : sdiv_cost (speed, compute_mode));
4365   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4366                          && INTVAL (op1) == last_div_const))
4367     max_cost -= (mul_cost (speed, compute_mode)
4368                      + add_cost (speed, compute_mode));
4369 
4370   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4371 
4372   /* Now convert to the best mode to use.  */
4373   if (compute_mode != mode)
4374     {
4375       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4376       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4377 
4378       /* convert_modes may have placed op1 into a register, so we
4379            must recompute the following.  */
4380       op1_is_constant = CONST_INT_P (op1);
4381       if (op1_is_constant)
4382           {
4383             wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4384             op1_is_pow2 = (wi::popcount (ext_op1) == 1
4385                                || (! unsignedp
4386                                    && wi::popcount (wi::neg (ext_op1)) == 1));
4387           }
4388       else
4389           op1_is_pow2 = 0;
4390     }
4391 
4392   /* If one of the operands is a volatile MEM, copy it into a register.  */
4393 
4394   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4395     op0 = force_reg (compute_mode, op0);
4396   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4397     op1 = force_reg (compute_mode, op1);
4398 
4399   /* If we need the remainder or if OP1 is constant, we need to
4400      put OP0 in a register in case it has any queued subexpressions.  */
4401   if (rem_flag || op1_is_constant)
4402     op0 = force_reg (compute_mode, op0);
4403 
4404   last = get_last_insn ();
4405 
4406   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4407   if (unsignedp)
4408     {
4409       if (code == FLOOR_DIV_EXPR)
4410           code = TRUNC_DIV_EXPR;
4411       if (code == FLOOR_MOD_EXPR)
4412           code = TRUNC_MOD_EXPR;
4413       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4414           code = TRUNC_DIV_EXPR;
4415     }
4416 
4417   if (op1 != const0_rtx)
4418     switch (code)
4419       {
4420       case TRUNC_MOD_EXPR:
4421       case TRUNC_DIV_EXPR:
4422           if (op1_is_constant)
4423             {
4424               scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4425               int size = GET_MODE_BITSIZE (int_mode);
4426               if (unsignedp)
4427                 {
4428                     unsigned HOST_WIDE_INT mh, ml;
4429                     int pre_shift, post_shift;
4430                     int dummy;
4431                     wide_int wd = rtx_mode_t (op1, int_mode);
4432                     unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4433 
4434                     if (wi::popcount (wd) == 1)
4435                       {
4436                         pre_shift = floor_log2 (d);
4437                         if (rem_flag)
4438                           {
4439                               unsigned HOST_WIDE_INT mask
4440                                 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4441                               remainder
4442                                 = expand_binop (int_mode, and_optab, op0,
4443                                                     gen_int_mode (mask, int_mode),
4444                                                     remainder, 1, methods);
4445                               if (remainder)
4446                                 return gen_lowpart (mode, remainder);
4447                           }
4448                         quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4449                                                        pre_shift, tquotient, 1);
4450                       }
4451                     else if (size <= HOST_BITS_PER_WIDE_INT)
4452                       {
4453                         if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4454                           {
4455                               /* Most significant bit of divisor is set; emit an scc
4456                                  insn.  */
4457                               quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4458                                                                         int_mode, 1, 1);
4459                           }
4460                         else
4461                           {
4462                               /* Find a suitable multiplier and right shift count
4463                                  instead of multiplying with D.  */
4464 
4465                               mh = choose_multiplier (d, size, size,
4466                                                             &ml, &post_shift, &dummy);
4467 
4468                               /* If the suggested multiplier is more than SIZE bits,
4469                                  we can do better for even divisors, using an
4470                                  initial right shift.  */
4471                               if (mh != 0 && (d & 1) == 0)
4472                                 {
4473                                   pre_shift = ctz_or_zero (d);
4474                                   mh = choose_multiplier (d >> pre_shift, size,
4475                                                                 size - pre_shift,
4476                                                                 &ml, &post_shift, &dummy);
4477                                   gcc_assert (!mh);
4478                                 }
4479                               else
4480                                 pre_shift = 0;
4481 
4482                               if (mh != 0)
4483                                 {
4484                                   rtx t1, t2, t3, t4;
4485 
4486                                   if (post_shift - 1 >= BITS_PER_WORD)
4487                                     goto fail1;
4488 
4489                                   extra_cost
4490                                     = (shift_cost (speed, int_mode, post_shift - 1)
4491                                          + shift_cost (speed, int_mode, 1)
4492                                          + 2 * add_cost (speed, int_mode));
4493                                   t1 = expmed_mult_highpart
4494                                     (int_mode, op0, gen_int_mode (ml, int_mode),
4495                                      NULL_RTX, 1, max_cost - extra_cost);
4496                                   if (t1 == 0)
4497                                     goto fail1;
4498                                   t2 = force_operand (gen_rtx_MINUS (int_mode,
4499                                                                              op0, t1),
4500                                                             NULL_RTX);
4501                                   t3 = expand_shift (RSHIFT_EXPR, int_mode,
4502                                                          t2, 1, NULL_RTX, 1);
4503                                   t4 = force_operand (gen_rtx_PLUS (int_mode,
4504                                                                             t1, t3),
4505                                                             NULL_RTX);
4506                                   quotient = expand_shift
4507                                     (RSHIFT_EXPR, int_mode, t4,
4508                                      post_shift - 1, tquotient, 1);
4509                                 }
4510                               else
4511                                 {
4512                                   rtx t1, t2;
4513 
4514                                   if (pre_shift >= BITS_PER_WORD
4515                                         || post_shift >= BITS_PER_WORD)
4516                                     goto fail1;
4517 
4518                                   t1 = expand_shift
4519                                     (RSHIFT_EXPR, int_mode, op0,
4520                                      pre_shift, NULL_RTX, 1);
4521                                   extra_cost
4522                                     = (shift_cost (speed, int_mode, pre_shift)
4523                                          + shift_cost (speed, int_mode, post_shift));
4524                                   t2 = expmed_mult_highpart
4525                                     (int_mode, t1,
4526                                      gen_int_mode (ml, int_mode),
4527                                      NULL_RTX, 1, max_cost - extra_cost);
4528                                   if (t2 == 0)
4529                                     goto fail1;
4530                                   quotient = expand_shift
4531                                     (RSHIFT_EXPR, int_mode, t2,
4532                                      post_shift, tquotient, 1);
4533                                 }
4534                           }
4535                       }
4536                     else                /* Too wide mode to use tricky code */
4537                       break;
4538 
4539                     insn = get_last_insn ();
4540                     if (insn != last)
4541                       set_dst_reg_note (insn, REG_EQUAL,
4542                                             gen_rtx_UDIV (int_mode, op0, op1),
4543                                             quotient);
4544                 }
4545               else            /* TRUNC_DIV, signed */
4546                 {
4547                     unsigned HOST_WIDE_INT ml;
4548                     int lgup, post_shift;
4549                     rtx mlr;
4550                     HOST_WIDE_INT d = INTVAL (op1);
4551                     unsigned HOST_WIDE_INT abs_d;
4552 
4553                     /* Not prepared to handle division/remainder by
4554                        0xffffffffffffffff8000000000000000 etc.  */
4555                     if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4556                       break;
4557 
4558                     /* Since d might be INT_MIN, we have to cast to
4559                        unsigned HOST_WIDE_INT before negating to avoid
4560                        undefined signed overflow.  */
4561                     abs_d = (d >= 0
4562                                ? (unsigned HOST_WIDE_INT) d
4563                                : - (unsigned HOST_WIDE_INT) d);
4564 
4565                     /* n rem d = n rem -d */
4566                     if (rem_flag && d < 0)
4567                       {
4568                         d = abs_d;
4569                         op1 = gen_int_mode (abs_d, int_mode);
4570                       }
4571 
4572                     if (d == 1)
4573                       quotient = op0;
4574                     else if (d == -1)
4575                       quotient = expand_unop (int_mode, neg_optab, op0,
4576                                                     tquotient, 0);
4577                     else if (size <= HOST_BITS_PER_WIDE_INT
4578                                && abs_d == HOST_WIDE_INT_1U << (size - 1))
4579                       {
4580                         /* This case is not handled correctly below.  */
4581                         quotient = emit_store_flag (tquotient, EQ, op0, op1,
4582                                                             int_mode, 1, 1);
4583                         if (quotient == 0)
4584                           goto fail1;
4585                       }
4586                     else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4587                                && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4588                                && (rem_flag
4589                                    ? smod_pow2_cheap (speed, int_mode)
4590                                    : sdiv_pow2_cheap (speed, int_mode))
4591                                /* We assume that cheap metric is true if the
4592                                   optab has an expander for this mode.  */
4593                                && ((optab_handler ((rem_flag ? smod_optab
4594                                                         : sdiv_optab),
4595                                                        int_mode)
4596                                     != CODE_FOR_nothing)
4597                                    || (optab_handler (sdivmod_optab, int_mode)
4598                                          != CODE_FOR_nothing)))
4599                       ;
4600                     else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4601                       {
4602                         if (rem_flag)
4603                           {
4604                               remainder = expand_smod_pow2 (int_mode, op0, d);
4605                               if (remainder)
4606                                 return gen_lowpart (mode, remainder);
4607                           }
4608 
4609                         if (sdiv_pow2_cheap (speed, int_mode)
4610                               && ((optab_handler (sdiv_optab, int_mode)
4611                                    != CODE_FOR_nothing)
4612                                   || (optab_handler (sdivmod_optab, int_mode)
4613                                         != CODE_FOR_nothing)))
4614                           quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4615                                                             int_mode, op0,
4616                                                             gen_int_mode (abs_d,
4617                                                                             int_mode),
4618                                                             NULL_RTX, 0);
4619                         else
4620                           quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4621 
4622                         /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4623                            negate the quotient.  */
4624                         if (d < 0)
4625                           {
4626                               insn = get_last_insn ();
4627                               if (insn != last
4628                                   && abs_d < (HOST_WIDE_INT_1U
4629                                                   << (HOST_BITS_PER_WIDE_INT - 1)))
4630                                 set_dst_reg_note (insn, REG_EQUAL,
4631                                                       gen_rtx_DIV (int_mode, op0,
4632                                                                        gen_int_mode
4633                                                                          (abs_d,
4634                                                                           int_mode)),
4635                                                       quotient);
4636 
4637                               quotient = expand_unop (int_mode, neg_optab,
4638                                                             quotient, quotient, 0);
4639                           }
4640                       }
4641                     else if (size <= HOST_BITS_PER_WIDE_INT)
4642                       {
4643                         choose_multiplier (abs_d, size, size - 1,
4644                                                &ml, &post_shift, &lgup);
4645                         if (ml < HOST_WIDE_INT_1U << (size - 1))
4646                           {
4647                               rtx t1, t2, t3;
4648 
4649                               if (post_shift >= BITS_PER_WORD
4650                                   || size - 1 >= BITS_PER_WORD)
4651                                 goto fail1;
4652 
4653                               extra_cost = (shift_cost (speed, int_mode, post_shift)
4654                                               + shift_cost (speed, int_mode, size - 1)
4655                                               + add_cost (speed, int_mode));
4656                               t1 = expmed_mult_highpart
4657                                 (int_mode, op0, gen_int_mode (ml, int_mode),
4658                                  NULL_RTX, 0, max_cost - extra_cost);
4659                               if (t1 == 0)
4660                                 goto fail1;
4661                               t2 = expand_shift
4662                                 (RSHIFT_EXPR, int_mode, t1,
4663                                  post_shift, NULL_RTX, 0);
4664                               t3 = expand_shift
4665                                 (RSHIFT_EXPR, int_mode, op0,
4666                                  size - 1, NULL_RTX, 0);
4667                               if (d < 0)
4668                                 quotient
4669                                   = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4670                                                        tquotient);
4671                               else
4672                                 quotient
4673                                   = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4674                                                        tquotient);
4675                           }
4676                         else
4677                           {
4678                               rtx t1, t2, t3, t4;
4679 
4680                               if (post_shift >= BITS_PER_WORD
4681                                   || size - 1 >= BITS_PER_WORD)
4682                                 goto fail1;
4683 
4684                               ml |= HOST_WIDE_INT_M1U << (size - 1);
4685                               mlr = gen_int_mode (ml, int_mode);
4686                               extra_cost = (shift_cost (speed, int_mode, post_shift)
4687                                               + shift_cost (speed, int_mode, size - 1)
4688                                               + 2 * add_cost (speed, int_mode));
4689                               t1 = expmed_mult_highpart (int_mode, op0, mlr,
4690                                                                NULL_RTX, 0,
4691                                                                max_cost - extra_cost);
4692                               if (t1 == 0)
4693                                 goto fail1;
4694                               t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4695                                                       NULL_RTX);
4696                               t3 = expand_shift
4697                                 (RSHIFT_EXPR, int_mode, t2,
4698                                  post_shift, NULL_RTX, 0);
4699                               t4 = expand_shift
4700                                 (RSHIFT_EXPR, int_mode, op0,
4701                                  size - 1, NULL_RTX, 0);
4702                               if (d < 0)
4703                                 quotient
4704                                   = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4705                                                        tquotient);
4706                               else
4707                                 quotient
4708                                   = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4709                                                        tquotient);
4710                           }
4711                       }
4712                     else                /* Too wide mode to use tricky code */
4713                       break;
4714 
4715                     insn = get_last_insn ();
4716                     if (insn != last)
4717                       set_dst_reg_note (insn, REG_EQUAL,
4718                                             gen_rtx_DIV (int_mode, op0, op1),
4719                                             quotient);
4720                 }
4721               break;
4722             }
4723       fail1:
4724           delete_insns_since (last);
4725           break;
4726 
4727       case FLOOR_DIV_EXPR:
4728       case FLOOR_MOD_EXPR:
4729       /* We will come here only for signed operations.  */
4730           if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4731             {
4732               scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4733               int size = GET_MODE_BITSIZE (int_mode);
4734               unsigned HOST_WIDE_INT mh, ml;
4735               int pre_shift, lgup, post_shift;
4736               HOST_WIDE_INT d = INTVAL (op1);
4737 
4738               if (d > 0)
4739                 {
4740                     /* We could just as easily deal with negative constants here,
4741                        but it does not seem worth the trouble for GCC 2.6.  */
4742                     if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4743                       {
4744                         pre_shift = floor_log2 (d);
4745                         if (rem_flag)
4746                           {
4747                               unsigned HOST_WIDE_INT mask
4748                                 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4749                               remainder = expand_binop
4750                                 (int_mode, and_optab, op0,
4751                                  gen_int_mode (mask, int_mode),
4752                                  remainder, 0, methods);
4753                               if (remainder)
4754                                 return gen_lowpart (mode, remainder);
4755                           }
4756                         quotient = expand_shift
4757                           (RSHIFT_EXPR, int_mode, op0,
4758                            pre_shift, tquotient, 0);
4759                       }
4760                     else
4761                       {
4762                         rtx t1, t2, t3, t4;
4763 
4764                         mh = choose_multiplier (d, size, size - 1,
4765                                                       &ml, &post_shift, &lgup);
4766                         gcc_assert (!mh);
4767 
4768                         if (post_shift < BITS_PER_WORD
4769                               && size - 1 < BITS_PER_WORD)
4770                           {
4771                               t1 = expand_shift
4772                                 (RSHIFT_EXPR, int_mode, op0,
4773                                  size - 1, NULL_RTX, 0);
4774                               t2 = expand_binop (int_mode, xor_optab, op0, t1,
4775                                                      NULL_RTX, 0, OPTAB_WIDEN);
4776                               extra_cost = (shift_cost (speed, int_mode, post_shift)
4777                                               + shift_cost (speed, int_mode, size - 1)
4778                                               + 2 * add_cost (speed, int_mode));
4779                               t3 = expmed_mult_highpart
4780                                 (int_mode, t2, gen_int_mode (ml, int_mode),
4781                                  NULL_RTX, 1, max_cost - extra_cost);
4782                               if (t3 != 0)
4783                                 {
4784                                   t4 = expand_shift
4785                                     (RSHIFT_EXPR, int_mode, t3,
4786                                      post_shift, NULL_RTX, 1);
4787                                   quotient = expand_binop (int_mode, xor_optab,
4788                                                                  t4, t1, tquotient, 0,
4789                                                                  OPTAB_WIDEN);
4790                                 }
4791                           }
4792                       }
4793                 }
4794               else
4795                 {
4796                     rtx nsign, t1, t2, t3, t4;
4797                     t1 = force_operand (gen_rtx_PLUS (int_mode,
4798                                                               op0, constm1_rtx), NULL_RTX);
4799                     t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4800                                            0, OPTAB_WIDEN);
4801                     nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4802                                               size - 1, NULL_RTX, 0);
4803                     t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4804                                             NULL_RTX);
4805                     t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4806                                             NULL_RTX, 0);
4807                     if (t4)
4808                       {
4809                         rtx t5;
4810                         t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4811                                               NULL_RTX, 0);
4812                         quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4813                                                         tquotient);
4814                       }
4815                 }
4816             }
4817 
4818           if (quotient != 0)
4819             break;
4820           delete_insns_since (last);
4821 
4822           /* Try using an instruction that produces both the quotient and
4823              remainder, using truncation.  We can easily compensate the quotient
4824              or remainder to get floor rounding, once we have the remainder.
4825              Notice that we compute also the final remainder value here,
4826              and return the result right away.  */
4827           if (target == 0 || GET_MODE (target) != compute_mode)
4828             target = gen_reg_rtx (compute_mode);
4829 
4830           if (rem_flag)
4831             {
4832               remainder
4833                 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4834               quotient = gen_reg_rtx (compute_mode);
4835             }
4836           else
4837             {
4838               quotient
4839                 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4840               remainder = gen_reg_rtx (compute_mode);
4841             }
4842 
4843           if (expand_twoval_binop (sdivmod_optab, op0, op1,
4844                                          quotient, remainder, 0))
4845             {
4846               /* This could be computed with a branch-less sequence.
4847                  Save that for later.  */
4848               rtx tem;
4849               rtx_code_label *label = gen_label_rtx ();
4850               do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4851               tem = expand_binop (compute_mode, xor_optab, op0, op1,
4852                                         NULL_RTX, 0, OPTAB_WIDEN);
4853               do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4854               expand_dec (quotient, const1_rtx);
4855               expand_inc (remainder, op1);
4856               emit_label (label);
4857               return gen_lowpart (mode, rem_flag ? remainder : quotient);
4858             }
4859 
4860           /* No luck with division elimination or divmod.  Have to do it
4861              by conditionally adjusting op0 *and* the result.  */
4862           {
4863             rtx_code_label *label1, *label2, *label3, *label4, *label5;
4864             rtx adjusted_op0;
4865             rtx tem;
4866 
4867             quotient = gen_reg_rtx (compute_mode);
4868             adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4869             label1 = gen_label_rtx ();
4870             label2 = gen_label_rtx ();
4871             label3 = gen_label_rtx ();
4872             label4 = gen_label_rtx ();
4873             label5 = gen_label_rtx ();
4874             do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4875             do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4876             tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4877                                     quotient, 0, methods);
4878             if (tem != quotient)
4879               emit_move_insn (quotient, tem);
4880             emit_jump_insn (targetm.gen_jump (label5));
4881             emit_barrier ();
4882             emit_label (label1);
4883             expand_inc (adjusted_op0, const1_rtx);
4884             emit_jump_insn (targetm.gen_jump (label4));
4885             emit_barrier ();
4886             emit_label (label2);
4887             do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4888             tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4889                                     quotient, 0, methods);
4890             if (tem != quotient)
4891               emit_move_insn (quotient, tem);
4892             emit_jump_insn (targetm.gen_jump (label5));
4893             emit_barrier ();
4894             emit_label (label3);
4895             expand_dec (adjusted_op0, const1_rtx);
4896             emit_label (label4);
4897             tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4898                                     quotient, 0, methods);
4899             if (tem != quotient)
4900               emit_move_insn (quotient, tem);
4901             expand_dec (quotient, const1_rtx);
4902             emit_label (label5);
4903           }
4904           break;
4905 
4906       case CEIL_DIV_EXPR:
4907       case CEIL_MOD_EXPR:
4908           if (unsignedp)
4909             {
4910               if (op1_is_constant
4911                     && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4912                     && (HWI_COMPUTABLE_MODE_P (compute_mode)
4913                         || INTVAL (op1) >= 0))
4914                 {
4915                     scalar_int_mode int_mode
4916                       = as_a <scalar_int_mode> (compute_mode);
4917                     rtx t1, t2, t3;
4918                     unsigned HOST_WIDE_INT d = INTVAL (op1);
4919                     t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4920                                            floor_log2 (d), tquotient, 1);
4921                     t2 = expand_binop (int_mode, and_optab, op0,
4922                                            gen_int_mode (d - 1, int_mode),
4923                                            NULL_RTX, 1, methods);
4924                     t3 = gen_reg_rtx (int_mode);
4925                     t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4926                     if (t3 == 0)
4927                       {
4928                         rtx_code_label *lab;
4929                         lab = gen_label_rtx ();
4930                         do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4931                         expand_inc (t1, const1_rtx);
4932                         emit_label (lab);
4933                         quotient = t1;
4934                       }
4935                     else
4936                       quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4937                                                       tquotient);
4938                     break;
4939                 }
4940 
4941               /* Try using an instruction that produces both the quotient and
4942                  remainder, using truncation.  We can easily compensate the
4943                  quotient or remainder to get ceiling rounding, once we have the
4944                  remainder.  Notice that we compute also the final remainder
4945                  value here, and return the result right away.  */
4946               if (target == 0 || GET_MODE (target) != compute_mode)
4947                 target = gen_reg_rtx (compute_mode);
4948 
4949               if (rem_flag)
4950                 {
4951                     remainder = (REG_P (target)
4952                                    ? target : gen_reg_rtx (compute_mode));
4953                     quotient = gen_reg_rtx (compute_mode);
4954                 }
4955               else
4956                 {
4957                     quotient = (REG_P (target)
4958                                   ? target : gen_reg_rtx (compute_mode));
4959                     remainder = gen_reg_rtx (compute_mode);
4960                 }
4961 
4962               if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4963                                              remainder, 1))
4964                 {
4965                     /* This could be computed with a branch-less sequence.
4966                        Save that for later.  */
4967                     rtx_code_label *label = gen_label_rtx ();
4968                     do_cmp_and_jump (remainder, const0_rtx, EQ,
4969                                          compute_mode, label);
4970                     expand_inc (quotient, const1_rtx);
4971                     expand_dec (remainder, op1);
4972                     emit_label (label);
4973                     return gen_lowpart (mode, rem_flag ? remainder : quotient);
4974                 }
4975 
4976               /* No luck with division elimination or divmod.  Have to do it
4977                  by conditionally adjusting op0 *and* the result.  */
4978               {
4979                 rtx_code_label *label1, *label2;
4980                 rtx adjusted_op0, tem;
4981 
4982                 quotient = gen_reg_rtx (compute_mode);
4983                 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4984                 label1 = gen_label_rtx ();
4985                 label2 = gen_label_rtx ();
4986                 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4987                                      compute_mode, label1);
4988                 emit_move_insn  (quotient, const0_rtx);
4989                 emit_jump_insn (targetm.gen_jump (label2));
4990                 emit_barrier ();
4991                 emit_label (label1);
4992                 expand_dec (adjusted_op0, const1_rtx);
4993                 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4994                                           quotient, 1, methods);
4995                 if (tem != quotient)
4996                     emit_move_insn (quotient, tem);
4997                 expand_inc (quotient, const1_rtx);
4998                 emit_label (label2);
4999               }
5000             }
5001           else /* signed */
5002             {
5003               if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5004                     && INTVAL (op1) >= 0)
5005                 {
5006                     /* This is extremely similar to the code for the unsigned case
5007                        above.  For 2.7 we should merge these variants, but for
5008                        2.6.1 I don't want to touch the code for unsigned since that
5009                        get used in C.  The signed case will only be used by other
5010                        languages (Ada).  */
5011 
5012                     rtx t1, t2, t3;
5013                     unsigned HOST_WIDE_INT d = INTVAL (op1);
5014                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5015                                            floor_log2 (d), tquotient, 0);
5016                     t2 = expand_binop (compute_mode, and_optab, op0,
5017                                            gen_int_mode (d - 1, compute_mode),
5018                                            NULL_RTX, 1, methods);
5019                     t3 = gen_reg_rtx (compute_mode);
5020                     t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5021                                               compute_mode, 1, 1);
5022                     if (t3 == 0)
5023                       {
5024                         rtx_code_label *lab;
5025                         lab = gen_label_rtx ();
5026                         do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5027                         expand_inc (t1, const1_rtx);
5028                         emit_label (lab);
5029                         quotient = t1;
5030                       }
5031                     else
5032                       quotient = force_operand (gen_rtx_PLUS (compute_mode,
5033                                                                         t1, t3),
5034                                                       tquotient);
5035                     break;
5036                 }
5037 
5038               /* Try using an instruction that produces both the quotient and
5039                  remainder, using truncation.  We can easily compensate the
5040                  quotient or remainder to get ceiling rounding, once we have the
5041                  remainder.  Notice that we compute also the final remainder
5042                  value here, and return the result right away.  */
5043               if (target == 0 || GET_MODE (target) != compute_mode)
5044                 target = gen_reg_rtx (compute_mode);
5045               if (rem_flag)
5046                 {
5047                     remainder= (REG_P (target)
5048                                   ? target : gen_reg_rtx (compute_mode));
5049                     quotient = gen_reg_rtx (compute_mode);
5050                 }
5051               else
5052                 {
5053                     quotient = (REG_P (target)
5054                                   ? target : gen_reg_rtx (compute_mode));
5055                     remainder = gen_reg_rtx (compute_mode);
5056                 }
5057 
5058               if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5059                                              remainder, 0))
5060                 {
5061                     /* This could be computed with a branch-less sequence.
5062                        Save that for later.  */
5063                     rtx tem;
5064                     rtx_code_label *label = gen_label_rtx ();
5065                     do_cmp_and_jump (remainder, const0_rtx, EQ,
5066                                          compute_mode, label);
5067                     tem = expand_binop (compute_mode, xor_optab, op0, op1,
5068                                             NULL_RTX, 0, OPTAB_WIDEN);
5069                     do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5070                     expand_inc (quotient, const1_rtx);
5071                     expand_dec (remainder, op1);
5072                     emit_label (label);
5073                     return gen_lowpart (mode, rem_flag ? remainder : quotient);
5074                 }
5075 
5076               /* No luck with division elimination or divmod.  Have to do it
5077                  by conditionally adjusting op0 *and* the result.  */
5078               {
5079                 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5080                 rtx adjusted_op0;
5081                 rtx tem;
5082 
5083                 quotient = gen_reg_rtx (compute_mode);
5084                 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5085                 label1 = gen_label_rtx ();
5086                 label2 = gen_label_rtx ();
5087                 label3 = gen_label_rtx ();
5088                 label4 = gen_label_rtx ();
5089                 label5 = gen_label_rtx ();
5090                 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5091                 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5092                                      compute_mode, label1);
5093                 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5094                                           quotient, 0, methods);
5095                 if (tem != quotient)
5096                     emit_move_insn (quotient, tem);
5097                 emit_jump_insn (targetm.gen_jump (label5));
5098                 emit_barrier ();
5099                 emit_label (label1);
5100                 expand_dec (adjusted_op0, const1_rtx);
5101                 emit_jump_insn (targetm.gen_jump (label4));
5102                 emit_barrier ();
5103                 emit_label (label2);
5104                 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5105                                      compute_mode, label3);
5106                 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5107                                           quotient, 0, methods);
5108                 if (tem != quotient)
5109                     emit_move_insn (quotient, tem);
5110                 emit_jump_insn (targetm.gen_jump (label5));
5111                 emit_barrier ();
5112                 emit_label (label3);
5113                 expand_inc (adjusted_op0, const1_rtx);
5114                 emit_label (label4);
5115                 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5116                                           quotient, 0, methods);
5117                 if (tem != quotient)
5118                     emit_move_insn (quotient, tem);
5119                 expand_inc (quotient, const1_rtx);
5120                 emit_label (label5);
5121               }
5122             }
5123           break;
5124 
5125       case EXACT_DIV_EXPR:
5126           if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5127             {
5128               scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5129               int size = GET_MODE_BITSIZE (int_mode);
5130               HOST_WIDE_INT d = INTVAL (op1);
5131               unsigned HOST_WIDE_INT ml;
5132               int pre_shift;
5133               rtx t1;
5134 
5135               pre_shift = ctz_or_zero (d);
5136               ml = invert_mod2n (d >> pre_shift, size);
5137               t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5138                                      pre_shift, NULL_RTX, unsignedp);
5139               quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5140                                             NULL_RTX, 1);
5141 
5142               insn = get_last_insn ();
5143               set_dst_reg_note (insn, REG_EQUAL,
5144                                     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5145                                                         int_mode, op0, op1),
5146                                     quotient);
5147             }
5148           break;
5149 
5150       case ROUND_DIV_EXPR:
5151       case ROUND_MOD_EXPR:
5152           if (unsignedp)
5153             {
5154               scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5155               rtx tem;
5156               rtx_code_label *label;
5157               label = gen_label_rtx ();
5158               quotient = gen_reg_rtx (int_mode);
5159               remainder = gen_reg_rtx (int_mode);
5160               if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5161                 {
5162                     rtx tem;
5163                     quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5164                                                    quotient, 1, methods);
5165                     tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5166                     remainder = expand_binop (int_mode, sub_optab, op0, tem,
5167                                                     remainder, 1, methods);
5168                 }
5169               tem = plus_constant (int_mode, op1, -1);
5170               tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5171               do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5172               expand_inc (quotient, const1_rtx);
5173               expand_dec (remainder, op1);
5174               emit_label (label);
5175             }
5176           else
5177             {
5178               scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5179               int size = GET_MODE_BITSIZE (int_mode);
5180               rtx abs_rem, abs_op1, tem, mask;
5181               rtx_code_label *label;
5182               label = gen_label_rtx ();
5183               quotient = gen_reg_rtx (int_mode);
5184               remainder = gen_reg_rtx (int_mode);
5185               if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5186                 {
5187                     rtx tem;
5188                     quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5189                                                    quotient, 0, methods);
5190                     tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5191                     remainder = expand_binop (int_mode, sub_optab, op0, tem,
5192                                                     remainder, 0, methods);
5193                 }
5194               abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5195               abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5196               tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5197                                         1, NULL_RTX, 1);
5198               do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5199               tem = expand_binop (int_mode, xor_optab, op0, op1,
5200                                         NULL_RTX, 0, OPTAB_WIDEN);
5201               mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5202                                          size - 1, NULL_RTX, 0);
5203               tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5204                                         NULL_RTX, 0, OPTAB_WIDEN);
5205               tem = expand_binop (int_mode, sub_optab, tem, mask,
5206                                         NULL_RTX, 0, OPTAB_WIDEN);
5207               expand_inc (quotient, tem);
5208               tem = expand_binop (int_mode, xor_optab, mask, op1,
5209                                         NULL_RTX, 0, OPTAB_WIDEN);
5210               tem = expand_binop (int_mode, sub_optab, tem, mask,
5211                                         NULL_RTX, 0, OPTAB_WIDEN);
5212               expand_dec (remainder, tem);
5213               emit_label (label);
5214             }
5215           return gen_lowpart (mode, rem_flag ? remainder : quotient);
5216 
5217       default:
5218           gcc_unreachable ();
5219       }
5220 
5221   if (quotient == 0)
5222     {
5223       if (target && GET_MODE (target) != compute_mode)
5224           target = 0;
5225 
5226       if (rem_flag)
5227           {
5228             /* Try to produce the remainder without producing the quotient.
5229                If we seem to have a divmod pattern that does not require widening,
5230                don't try widening here.  We should really have a WIDEN argument
5231                to expand_twoval_binop, since what we'd really like to do here is
5232                1) try a mod insn in compute_mode
5233                2) try a divmod insn in compute_mode
5234                3) try a div insn in compute_mode and multiply-subtract to get
5235                   remainder
5236                4) try the same things with widening allowed.  */
5237             remainder
5238               = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5239                                          op0, op1, target,
5240                                          unsignedp,
5241                                          ((optab_handler (optab2, compute_mode)
5242                                            != CODE_FOR_nothing)
5243                                           ? OPTAB_DIRECT : OPTAB_WIDEN));
5244             if (remainder == 0)
5245               {
5246                 /* No luck there.  Can we do remainder and divide at once
5247                      without a library call?  */
5248                 remainder = gen_reg_rtx (compute_mode);
5249                 if (! expand_twoval_binop ((unsignedp
5250                                                     ? udivmod_optab
5251                                                     : sdivmod_optab),
5252                                                    op0, op1,
5253                                                    NULL_RTX, remainder, unsignedp))
5254                     remainder = 0;
5255               }
5256 
5257             if (remainder)
5258               return gen_lowpart (mode, remainder);
5259           }
5260 
5261       /* Produce the quotient.  Try a quotient insn, but not a library call.
5262            If we have a divmod in this mode, use it in preference to widening
5263            the div (for this test we assume it will not fail). Note that optab2
5264            is set to the one of the two optabs that the call below will use.  */
5265       quotient
5266           = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5267                                    op0, op1, rem_flag ? NULL_RTX : target,
5268                                    unsignedp,
5269                                    ((optab_handler (optab2, compute_mode)
5270                                      != CODE_FOR_nothing)
5271                                     ? OPTAB_DIRECT : OPTAB_WIDEN));
5272 
5273       if (quotient == 0)
5274           {
5275             /* No luck there.  Try a quotient-and-remainder insn,
5276                keeping the quotient alone.  */
5277             quotient = gen_reg_rtx (compute_mode);
5278             if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5279                                              op0, op1,
5280                                              quotient, NULL_RTX, unsignedp))
5281               {
5282                 quotient = 0;
5283                 if (! rem_flag)
5284                     /* Still no luck.  If we are not computing the remainder,
5285                        use a library call for the quotient.  */
5286                     quotient = sign_expand_binop (compute_mode,
5287                                                         udiv_optab, sdiv_optab,
5288                                                         op0, op1, target,
5289                                                         unsignedp, methods);
5290               }
5291           }
5292     }
5293 
5294   if (rem_flag)
5295     {
5296       if (target && GET_MODE (target) != compute_mode)
5297           target = 0;
5298 
5299       if (quotient == 0)
5300           {
5301             /* No divide instruction either.  Use library for remainder.  */
5302             remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5303                                                    op0, op1, target,
5304                                                    unsignedp, methods);
5305             /* No remainder function.  Try a quotient-and-remainder
5306                function, keeping the remainder.  */
5307             if (!remainder
5308                 && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5309               {
5310                 remainder = gen_reg_rtx (compute_mode);
5311                 if (!expand_twoval_binop_libfunc
5312                       (unsignedp ? udivmod_optab : sdivmod_optab,
5313                        op0, op1,
5314                        NULL_RTX, remainder,
5315                        unsignedp ? UMOD : MOD))
5316                     remainder = NULL_RTX;
5317               }
5318           }
5319       else
5320           {
5321             /* We divided.  Now finish doing X - Y * (X / Y).  */
5322             remainder = expand_mult (compute_mode, quotient, op1,
5323                                            NULL_RTX, unsignedp);
5324             remainder = expand_binop (compute_mode, sub_optab, op0,
5325                                             remainder, target, unsignedp,
5326                                             methods);
5327           }
5328     }
5329 
5330   if (methods != OPTAB_LIB_WIDEN
5331       && (rem_flag ? remainder : quotient) == NULL_RTX)
5332     return NULL_RTX;
5333 
5334   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5335 }
5336 
5337 /* Return a tree node with data type TYPE, describing the value of X.
5338    Usually this is an VAR_DECL, if there is no obvious better choice.
5339    X may be an expression, however we only support those expressions
5340    generated by loop.c.  */
5341 
5342 tree
make_tree(tree type,rtx x)5343 make_tree (tree type, rtx x)
5344 {
5345   tree t;
5346 
5347   switch (GET_CODE (x))
5348     {
5349     case CONST_INT:
5350     case CONST_WIDE_INT:
5351       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5352       return t;
5353 
5354     case CONST_DOUBLE:
5355       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5356       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5357           t = wide_int_to_tree (type,
5358                                     wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5359                                                                 HOST_BITS_PER_WIDE_INT * 2));
5360       else
5361           t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5362 
5363       return t;
5364 
5365     case CONST_VECTOR:
5366       {
5367           unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5368           unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5369           tree itype = TREE_TYPE (type);
5370 
5371           /* Build a tree with vector elements.  */
5372           tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5373           unsigned int count = elts.encoded_nelts ();
5374           for (unsigned int i = 0; i < count; ++i)
5375             {
5376               rtx elt = CONST_VECTOR_ELT (x, i);
5377               elts.quick_push (make_tree (itype, elt));
5378             }
5379 
5380           return elts.build ();
5381       }
5382 
5383     case PLUS:
5384       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5385                                 make_tree (type, XEXP (x, 1)));
5386 
5387     case MINUS:
5388       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5389                                 make_tree (type, XEXP (x, 1)));
5390 
5391     case NEG:
5392       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5393 
5394     case MULT:
5395       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5396                                 make_tree (type, XEXP (x, 1)));
5397 
5398     case ASHIFT:
5399       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5400                                 make_tree (type, XEXP (x, 1)));
5401 
5402     case LSHIFTRT:
5403       t = unsigned_type_for (type);
5404       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5405                                                    make_tree (t, XEXP (x, 0)),
5406                                                    make_tree (type, XEXP (x, 1))));
5407 
5408     case ASHIFTRT:
5409       t = signed_type_for (type);
5410       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5411                                                    make_tree (t, XEXP (x, 0)),
5412                                                    make_tree (type, XEXP (x, 1))));
5413 
5414     case DIV:
5415       if (TREE_CODE (type) != REAL_TYPE)
5416           t = signed_type_for (type);
5417       else
5418           t = type;
5419 
5420       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5421                                                    make_tree (t, XEXP (x, 0)),
5422                                                    make_tree (t, XEXP (x, 1))));
5423     case UDIV:
5424       t = unsigned_type_for (type);
5425       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5426                                                    make_tree (t, XEXP (x, 0)),
5427                                                    make_tree (t, XEXP (x, 1))));
5428 
5429     case SIGN_EXTEND:
5430     case ZERO_EXTEND:
5431       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5432                                                     GET_CODE (x) == ZERO_EXTEND);
5433       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5434 
5435     case CONST:
5436       return make_tree (type, XEXP (x, 0));
5437 
5438     case SYMBOL_REF:
5439       t = SYMBOL_REF_DECL (x);
5440       if (t)
5441           return fold_convert (type, build_fold_addr_expr (t));
5442       /* fall through.  */
5443 
5444     default:
5445       if (CONST_POLY_INT_P (x))
5446           return wide_int_to_tree (t, const_poly_int_value (x));
5447 
5448       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5449 
5450       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5451            address mode to pointer mode.  */
5452       if (POINTER_TYPE_P (type))
5453           x = convert_memory_address_addr_space
5454             (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5455 
5456       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5457            want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5458       t->decl_with_rtl.rtl = x;
5459 
5460       return t;
5461     }
5462 }
5463 
5464 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5465    and returning TARGET.
5466 
5467    If TARGET is 0, a pseudo-register or constant is returned.  */
5468 
5469 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5470 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5471 {
5472   rtx tem = 0;
5473 
5474   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5475     tem = simplify_binary_operation (AND, mode, op0, op1);
5476   if (tem == 0)
5477     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5478 
5479   if (target == 0)
5480     target = tem;
5481   else if (tem != target)
5482     emit_move_insn (target, tem);
5483   return target;
5484 }
5485 
5486 /* Helper function for emit_store_flag.  */
5487 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5488 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5489                machine_mode mode, machine_mode compare_mode,
5490                int unsignedp, rtx x, rtx y, int normalizep,
5491                machine_mode target_mode)
5492 {
5493   class expand_operand ops[4];
5494   rtx op0, comparison, subtarget;
5495   rtx_insn *last;
5496   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5497   scalar_int_mode int_target_mode;
5498 
5499   last = get_last_insn ();
5500   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5501   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5502   if (!x || !y)
5503     {
5504       delete_insns_since (last);
5505       return NULL_RTX;
5506     }
5507 
5508   if (target_mode == VOIDmode)
5509     int_target_mode = result_mode;
5510   else
5511     int_target_mode = as_a <scalar_int_mode> (target_mode);
5512   if (!target)
5513     target = gen_reg_rtx (int_target_mode);
5514 
5515   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5516 
5517   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5518   create_fixed_operand (&ops[1], comparison);
5519   create_fixed_operand (&ops[2], x);
5520   create_fixed_operand (&ops[3], y);
5521   if (!maybe_expand_insn (icode, 4, ops))
5522     {
5523       delete_insns_since (last);
5524       return NULL_RTX;
5525     }
5526   subtarget = ops[0].value;
5527 
5528   /* If we are converting to a wider mode, first convert to
5529      INT_TARGET_MODE, then normalize.  This produces better combining
5530      opportunities on machines that have a SIGN_EXTRACT when we are
5531      testing a single bit.  This mostly benefits the 68k.
5532 
5533      If STORE_FLAG_VALUE does not have the sign bit set when
5534      interpreted in MODE, we can do this conversion as unsigned, which
5535      is usually more efficient.  */
5536   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5537     {
5538       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5539                       || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5540 
5541       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5542       convert_move (target, subtarget, unsignedp);
5543 
5544       op0 = target;
5545       result_mode = int_target_mode;
5546     }
5547   else
5548     op0 = subtarget;
5549 
5550   /* If we want to keep subexpressions around, don't reuse our last
5551      target.  */
5552   if (optimize)
5553     subtarget = 0;
5554 
5555   /* Now normalize to the proper value in MODE.  Sometimes we don't
5556      have to do anything.  */
5557   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5558     ;
5559   /* STORE_FLAG_VALUE might be the most negative number, so write
5560      the comparison this way to avoid a compiler-time warning.  */
5561   else if (- normalizep == STORE_FLAG_VALUE)
5562     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5563 
5564   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5565      it hard to use a value of just the sign bit due to ANSI integer
5566      constant typing rules.  */
5567   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5568     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5569                               GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5570                               normalizep == 1);
5571   else
5572     {
5573       gcc_assert (STORE_FLAG_VALUE & 1);
5574 
5575       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5576       if (normalizep == -1)
5577           op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5578     }
5579 
5580   /* If we were converting to a smaller mode, do the conversion now.  */
5581   if (int_target_mode != result_mode)
5582     {
5583       convert_move (target, op0, 0);
5584       return target;
5585     }
5586   else
5587     return op0;
5588 }
5589 
5590 
5591 /* A subroutine of emit_store_flag only including "tricks" that do not
5592    need a recursive call.  These are kept separate to avoid infinite
5593    loops.  */
5594 
5595 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5596 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5597                        machine_mode mode, int unsignedp, int normalizep,
5598                        machine_mode target_mode)
5599 {
5600   rtx subtarget;
5601   enum insn_code icode;
5602   machine_mode compare_mode;
5603   enum mode_class mclass;
5604   enum rtx_code scode;
5605 
5606   if (unsignedp)
5607     code = unsigned_condition (code);
5608   scode = swap_condition (code);
5609 
5610   /* If one operand is constant, make it the second one.  Only do this
5611      if the other operand is not constant as well.  */
5612 
5613   if (swap_commutative_operands_p (op0, op1))
5614     {
5615       std::swap (op0, op1);
5616       code = swap_condition (code);
5617     }
5618 
5619   if (mode == VOIDmode)
5620     mode = GET_MODE (op0);
5621 
5622   if (CONST_SCALAR_INT_P (op1))
5623     canonicalize_comparison (mode, &code, &op1);
5624 
5625   /* For some comparisons with 1 and -1, we can convert this to
5626      comparisons with zero.  This will often produce more opportunities for
5627      store-flag insns.  */
5628 
5629   switch (code)
5630     {
5631     case LT:
5632       if (op1 == const1_rtx)
5633           op1 = const0_rtx, code = LE;
5634       break;
5635     case LE:
5636       if (op1 == constm1_rtx)
5637           op1 = const0_rtx, code = LT;
5638       break;
5639     case GE:
5640       if (op1 == const1_rtx)
5641           op1 = const0_rtx, code = GT;
5642       break;
5643     case GT:
5644       if (op1 == constm1_rtx)
5645           op1 = const0_rtx, code = GE;
5646       break;
5647     case GEU:
5648       if (op1 == const1_rtx)
5649           op1 = const0_rtx, code = NE;
5650       break;
5651     case LTU:
5652       if (op1 == const1_rtx)
5653           op1 = const0_rtx, code = EQ;
5654       break;
5655     default:
5656       break;
5657     }
5658 
5659   /* If we are comparing a double-word integer with zero or -1, we can
5660      convert the comparison into one involving a single word.  */
5661   scalar_int_mode int_mode;
5662   if (is_int_mode (mode, &int_mode)
5663       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5664       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5665     {
5666       rtx tem;
5667       if ((code == EQ || code == NE)
5668             && (op1 == const0_rtx || op1 == constm1_rtx))
5669           {
5670             rtx op00, op01;
5671 
5672             /* Do a logical OR or AND of the two words and compare the
5673                result.  */
5674             op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5675             op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5676             tem = expand_binop (word_mode,
5677                                     op1 == const0_rtx ? ior_optab : and_optab,
5678                                     op00, op01, NULL_RTX, unsignedp,
5679                                     OPTAB_DIRECT);
5680 
5681             if (tem != 0)
5682               tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5683                                            unsignedp, normalizep);
5684           }
5685       else if ((code == LT || code == GE) && op1 == const0_rtx)
5686           {
5687             rtx op0h;
5688 
5689             /* If testing the sign bit, can just test on high word.  */
5690             op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5691                                               subreg_highpart_offset (word_mode,
5692                                                                             int_mode));
5693             tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5694                                          unsignedp, normalizep);
5695           }
5696       else
5697           tem = NULL_RTX;
5698 
5699       if (tem)
5700           {
5701             if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5702               return tem;
5703             if (!target)
5704               target = gen_reg_rtx (target_mode);
5705 
5706             convert_move (target, tem,
5707                               !val_signbit_known_set_p (word_mode,
5708                                                               (normalizep ? normalizep
5709                                                                : STORE_FLAG_VALUE)));
5710             return target;
5711           }
5712     }
5713 
5714   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5715      complement of A (for GE) and shifting the sign bit to the low bit.  */
5716   if (op1 == const0_rtx && (code == LT || code == GE)
5717       && is_int_mode (mode, &int_mode)
5718       && (normalizep || STORE_FLAG_VALUE == 1
5719             || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5720     {
5721       scalar_int_mode int_target_mode;
5722       subtarget = target;
5723 
5724       if (!target)
5725           int_target_mode = int_mode;
5726       else
5727           {
5728             /* If the result is to be wider than OP0, it is best to convert it
5729                first.  If it is to be narrower, it is *incorrect* to convert it
5730                first.  */
5731             int_target_mode = as_a <scalar_int_mode> (target_mode);
5732             if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5733               {
5734                 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5735                 int_mode = int_target_mode;
5736               }
5737           }
5738 
5739       if (int_target_mode != int_mode)
5740           subtarget = 0;
5741 
5742       if (code == GE)
5743           op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5744                                  ((STORE_FLAG_VALUE == 1 || normalizep)
5745                                   ? 0 : subtarget), 0);
5746 
5747       if (STORE_FLAG_VALUE == 1 || normalizep)
5748           /* If we are supposed to produce a 0/1 value, we want to do
5749              a logical shift from the sign bit to the low-order bit; for
5750              a -1/0 value, we do an arithmetic shift.  */
5751           op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5752                                   GET_MODE_BITSIZE (int_mode) - 1,
5753                                   subtarget, normalizep != -1);
5754 
5755       if (int_mode != int_target_mode)
5756           op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5757 
5758       return op0;
5759     }
5760 
5761   mclass = GET_MODE_CLASS (mode);
5762   FOR_EACH_MODE_FROM (compare_mode, mode)
5763     {
5764      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5765      icode = optab_handler (cstore_optab, optab_mode);
5766      if (icode != CODE_FOR_nothing)
5767           {
5768             do_pending_stack_adjust ();
5769             rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5770                                          unsignedp, op0, op1, normalizep, target_mode);
5771             if (tem)
5772               return tem;
5773 
5774             if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5775               {
5776                 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5777                                          unsignedp, op1, op0, normalizep, target_mode);
5778                 if (tem)
5779                   return tem;
5780               }
5781             break;
5782           }
5783     }
5784 
5785   return 0;
5786 }
5787 
5788 /* Subroutine of emit_store_flag that handles cases in which the operands
5789    are scalar integers.  SUBTARGET is the target to use for temporary
5790    operations and TRUEVAL is the value to store when the condition is
5791    true.  All other arguments are as for emit_store_flag.  */
5792 
5793 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5794 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5795                          rtx op1, scalar_int_mode mode, int unsignedp,
5796                          int normalizep, rtx trueval)
5797 {
5798   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5799   rtx_insn *last = get_last_insn ();
5800 
5801   /* If this is an equality comparison of integers, we can try to exclusive-or
5802      (or subtract) the two operands and use a recursive call to try the
5803      comparison with zero.  Don't do any of these cases if branches are
5804      very cheap.  */
5805 
5806   if ((code == EQ || code == NE) && op1 != const0_rtx)
5807     {
5808       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5809                                     OPTAB_WIDEN);
5810 
5811       if (tem == 0)
5812           tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5813                                   OPTAB_WIDEN);
5814       if (tem != 0)
5815           tem = emit_store_flag (target, code, tem, const0_rtx,
5816                                      mode, unsignedp, normalizep);
5817       if (tem != 0)
5818           return tem;
5819 
5820       delete_insns_since (last);
5821     }
5822 
5823   /* For integer comparisons, try the reverse comparison.  However, for
5824      small X and if we'd have anyway to extend, implementing "X != 0"
5825      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5826   rtx_code rcode = reverse_condition (code);
5827   if (can_compare_p (rcode, mode, ccp_store_flag)
5828       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5829               && code == NE
5830               && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5831               && op1 == const0_rtx))
5832     {
5833       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5834                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5835 
5836       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5837       if (want_add
5838             && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5839                            optimize_insn_for_speed_p ()) == 0)
5840           {
5841             rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5842                                                STORE_FLAG_VALUE, target_mode);
5843             if (tem != 0)
5844               tem = expand_binop (target_mode, add_optab, tem,
5845                                         gen_int_mode (normalizep, target_mode),
5846                                         target, 0, OPTAB_WIDEN);
5847             if (tem != 0)
5848               return tem;
5849           }
5850       else if (!want_add
5851                  && rtx_cost (trueval, mode, XOR, 1,
5852                                   optimize_insn_for_speed_p ()) == 0)
5853           {
5854             rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5855                                                normalizep, target_mode);
5856             if (tem != 0)
5857               tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5858                                         INTVAL (trueval) >= 0, OPTAB_WIDEN);
5859             if (tem != 0)
5860               return tem;
5861           }
5862 
5863       delete_insns_since (last);
5864     }
5865 
5866   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5867      the constant zero.  Reject all other comparisons at this point.  Only
5868      do LE and GT if branches are expensive since they are expensive on
5869      2-operand machines.  */
5870 
5871   if (op1 != const0_rtx
5872       || (code != EQ && code != NE
5873             && (BRANCH_COST (optimize_insn_for_speed_p (),
5874                                  false) <= 1 || (code != LE && code != GT))))
5875     return 0;
5876 
5877   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5878      do the necessary operation below.  */
5879 
5880   rtx tem = 0;
5881 
5882   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5883      the sign bit set.  */
5884 
5885   if (code == LE)
5886     {
5887       /* This is destructive, so SUBTARGET can't be OP0.  */
5888       if (rtx_equal_p (subtarget, op0))
5889           subtarget = 0;
5890 
5891       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5892                                 OPTAB_WIDEN);
5893       if (tem)
5894           tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5895                                   OPTAB_WIDEN);
5896     }
5897 
5898   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5899      number of bits in the mode of OP0, minus one.  */
5900 
5901   if (code == GT)
5902     {
5903       if (rtx_equal_p (subtarget, op0))
5904           subtarget = 0;
5905 
5906       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5907                                         GET_MODE_BITSIZE (mode) - 1,
5908                                         subtarget, 0);
5909       if (tem)
5910           tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5911                                   OPTAB_WIDEN);
5912     }
5913 
5914   if (code == EQ || code == NE)
5915     {
5916       /* For EQ or NE, one way to do the comparison is to apply an operation
5917            that converts the operand into a positive number if it is nonzero
5918            or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5919            for NE we negate.  This puts the result in the sign bit.  Then we
5920            normalize with a shift, if needed.
5921 
5922            Two operations that can do the above actions are ABS and FFS, so try
5923            them.  If that doesn't work, and MODE is smaller than a full word,
5924            we can use zero-extension to the wider mode (an unsigned conversion)
5925            as the operation.  */
5926 
5927       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5928            that is compensated by the subsequent overflow when subtracting
5929            one / negating.  */
5930 
5931       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5932           tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5933       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5934           tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5935       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5936           {
5937             tem = convert_modes (word_mode, mode, op0, 1);
5938             mode = word_mode;
5939           }
5940 
5941       if (tem != 0)
5942           {
5943             if (code == EQ)
5944               tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5945                                         0, OPTAB_WIDEN);
5946             else
5947               tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5948           }
5949 
5950       /* If we couldn't do it that way, for NE we can "or" the two's complement
5951            of the value with itself.  For EQ, we take the one's complement of
5952            that "or", which is an extra insn, so we only handle EQ if branches
5953            are expensive.  */
5954 
5955       if (tem == 0
5956             && (code == NE
5957                 || BRANCH_COST (optimize_insn_for_speed_p (),
5958                                     false) > 1))
5959           {
5960             if (rtx_equal_p (subtarget, op0))
5961               subtarget = 0;
5962 
5963             tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5964             tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5965                                     OPTAB_WIDEN);
5966 
5967             if (tem && code == EQ)
5968               tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5969           }
5970     }
5971 
5972   if (tem && normalizep)
5973     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5974                                     GET_MODE_BITSIZE (mode) - 1,
5975                                     subtarget, normalizep == 1);
5976 
5977   if (tem)
5978     {
5979       if (!target)
5980           ;
5981       else if (GET_MODE (tem) != target_mode)
5982           {
5983             convert_move (target, tem, 0);
5984             tem = target;
5985           }
5986       else if (!subtarget)
5987           {
5988             emit_move_insn (target, tem);
5989             tem = target;
5990           }
5991     }
5992   else
5993     delete_insns_since (last);
5994 
5995   return tem;
5996 }
5997 
5998 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5999    and storing in TARGET.  Normally return TARGET.
6000    Return 0 if that cannot be done.
6001 
6002    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6003    it is VOIDmode, they cannot both be CONST_INT.
6004 
6005    UNSIGNEDP is for the case where we have to widen the operands
6006    to perform the operation.  It says to use zero-extension.
6007 
6008    NORMALIZEP is 1 if we should convert the result to be either zero
6009    or one.  Normalize is -1 if we should convert the result to be
6010    either zero or -1.  If NORMALIZEP is zero, the result will be left
6011    "raw" out of the scc insn.  */
6012 
6013 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6014 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6015                      machine_mode mode, int unsignedp, int normalizep)
6016 {
6017   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6018   enum rtx_code rcode;
6019   rtx subtarget;
6020   rtx tem, trueval;
6021   rtx_insn *last;
6022 
6023   /* If we compare constants, we shouldn't use a store-flag operation,
6024      but a constant load.  We can get there via the vanilla route that
6025      usually generates a compare-branch sequence, but will in this case
6026      fold the comparison to a constant, and thus elide the branch.  */
6027   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6028     return NULL_RTX;
6029 
6030   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6031                                  target_mode);
6032   if (tem)
6033     return tem;
6034 
6035   /* If we reached here, we can't do this with a scc insn, however there
6036      are some comparisons that can be done in other ways.  Don't do any
6037      of these cases if branches are very cheap.  */
6038   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6039     return 0;
6040 
6041   /* See what we need to return.  We can only return a 1, -1, or the
6042      sign bit.  */
6043 
6044   if (normalizep == 0)
6045     {
6046       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6047           normalizep = STORE_FLAG_VALUE;
6048 
6049       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6050           ;
6051       else
6052           return 0;
6053     }
6054 
6055   last = get_last_insn ();
6056 
6057   /* If optimizing, use different pseudo registers for each insn, instead
6058      of reusing the same pseudo.  This leads to better CSE, but slows
6059      down the compiler, since there are more pseudos.  */
6060   subtarget = (!optimize
6061                  && (target_mode == mode)) ? target : NULL_RTX;
6062   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6063 
6064   /* For floating-point comparisons, try the reverse comparison or try
6065      changing the "orderedness" of the comparison.  */
6066   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6067     {
6068       enum rtx_code first_code;
6069       bool and_them;
6070 
6071       rcode = reverse_condition_maybe_unordered (code);
6072       if (can_compare_p (rcode, mode, ccp_store_flag)
6073             && (code == ORDERED || code == UNORDERED
6074                 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6075                 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6076           {
6077             int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6078                                 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6079 
6080             /* For the reverse comparison, use either an addition or a XOR.  */
6081             if (want_add
6082                 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6083                                  optimize_insn_for_speed_p ()) == 0)
6084               {
6085                 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6086                                                STORE_FLAG_VALUE, target_mode);
6087                 if (tem)
6088                     return expand_binop (target_mode, add_optab, tem,
6089                                              gen_int_mode (normalizep, target_mode),
6090                                              target, 0, OPTAB_WIDEN);
6091               }
6092             else if (!want_add
6093                        && rtx_cost (trueval, mode, XOR, 1,
6094                                         optimize_insn_for_speed_p ()) == 0)
6095               {
6096                 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6097                                                normalizep, target_mode);
6098                 if (tem)
6099                     return expand_binop (target_mode, xor_optab, tem, trueval,
6100                                              target, INTVAL (trueval) >= 0,
6101                                              OPTAB_WIDEN);
6102               }
6103           }
6104 
6105       delete_insns_since (last);
6106 
6107       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6108       if (code == ORDERED || code == UNORDERED)
6109           return 0;
6110 
6111       and_them = split_comparison (code, mode, &first_code, &code);
6112 
6113       /* If there are no NaNs, the first comparison should always fall through.
6114            Effectively change the comparison to the other one.  */
6115       if (!HONOR_NANS (mode))
6116           {
6117             gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6118             return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6119                                             target_mode);
6120           }
6121 
6122       if (!HAVE_conditional_move)
6123           return 0;
6124 
6125       /* Do not turn a trapping comparison into a non-trapping one.  */
6126       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6127             && flag_trapping_math)
6128           return 0;
6129 
6130       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6131            conditional move.  */
6132       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6133                                      normalizep, target_mode);
6134       if (tem == 0)
6135           return 0;
6136 
6137       if (and_them)
6138           tem = emit_conditional_move (target, { code, op0, op1, mode },
6139                                              tem, const0_rtx, GET_MODE (tem), 0);
6140       else
6141           tem = emit_conditional_move (target, { code, op0, op1, mode },
6142                                              trueval, tem, GET_MODE (tem), 0);
6143 
6144       if (tem == 0)
6145           delete_insns_since (last);
6146       return tem;
6147     }
6148 
6149   /* The remaining tricks only apply to integer comparisons.  */
6150 
6151   scalar_int_mode int_mode;
6152   if (is_int_mode (mode, &int_mode))
6153     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6154                                         unsignedp, normalizep, trueval);
6155 
6156   return 0;
6157 }
6158 
6159 /* Like emit_store_flag, but always succeeds.  */
6160 
6161 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6162 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6163                            machine_mode mode, int unsignedp, int normalizep)
6164 {
6165   rtx tem;
6166   rtx_code_label *label;
6167   rtx trueval, falseval;
6168 
6169   /* First see if emit_store_flag can do the job.  */
6170   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6171   if (tem != 0)
6172     return tem;
6173 
6174   /* If one operand is constant, make it the second one.  Only do this
6175      if the other operand is not constant as well.  */
6176   if (swap_commutative_operands_p (op0, op1))
6177     {
6178       std::swap (op0, op1);
6179       code = swap_condition (code);
6180     }
6181 
6182   if (mode == VOIDmode)
6183     mode = GET_MODE (op0);
6184 
6185   if (!target)
6186     target = gen_reg_rtx (word_mode);
6187 
6188   /* If this failed, we have to do this with set/compare/jump/set code.
6189      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6190   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6191   if (code == NE
6192       && GET_MODE_CLASS (mode) == MODE_INT
6193       && REG_P (target)
6194       && op0 == target
6195       && op1 == const0_rtx)
6196     {
6197       label = gen_label_rtx ();
6198       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6199                                      NULL_RTX, NULL, label,
6200                                      profile_probability::uninitialized ());
6201       emit_move_insn (target, trueval);
6202       emit_label (label);
6203       return target;
6204     }
6205 
6206   if (!REG_P (target)
6207       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6208     target = gen_reg_rtx (GET_MODE (target));
6209 
6210   /* Jump in the right direction if the target cannot implement CODE
6211      but can jump on its reverse condition.  */
6212   falseval = const0_rtx;
6213   if (! can_compare_p (code, mode, ccp_jump)
6214       && (! FLOAT_MODE_P (mode)
6215           || code == ORDERED || code == UNORDERED
6216           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6217           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6218     {
6219       enum rtx_code rcode;
6220       if (FLOAT_MODE_P (mode))
6221         rcode = reverse_condition_maybe_unordered (code);
6222       else
6223         rcode = reverse_condition (code);
6224 
6225       /* Canonicalize to UNORDERED for the libcall.  */
6226       if (can_compare_p (rcode, mode, ccp_jump)
6227           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6228           {
6229             falseval = trueval;
6230             trueval = const0_rtx;
6231             code = rcode;
6232           }
6233     }
6234 
6235   emit_move_insn (target, trueval);
6236   label = gen_label_rtx ();
6237   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6238                                  label, profile_probability::uninitialized ());
6239 
6240   emit_move_insn (target, falseval);
6241   emit_label (label);
6242 
6243   return target;
6244 }
6245 
6246 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6247    and exclusive ranges in order to create an equivalent comparison.  See
6248    canonicalize_cmp_for_target for the possible cases.  */
6249 
6250 static enum rtx_code
equivalent_cmp_code(enum rtx_code code)6251 equivalent_cmp_code (enum rtx_code code)
6252 {
6253   switch (code)
6254     {
6255     case GT:
6256       return GE;
6257     case GE:
6258       return GT;
6259     case LT:
6260       return LE;
6261     case LE:
6262       return LT;
6263     case GTU:
6264       return GEU;
6265     case GEU:
6266       return GTU;
6267     case LTU:
6268       return LEU;
6269     case LEU:
6270       return LTU;
6271 
6272     default:
6273       return code;
6274     }
6275 }
6276 
6277 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6278    purpose of this is to end up with an immediate which can be loaded into a
6279    register in fewer moves, if possible.
6280 
6281    For each integer comparison there exists an equivalent choice:
6282      i)   a >  b or a >= b + 1
6283      ii)  a <= b or a <  b + 1
6284      iii) a >= b or a >  b - 1
6285      iv)  a <  b or a <= b - 1
6286 
6287    MODE is the mode of the first operand.
6288    CODE points to the comparison code.
6289    IMM points to the rtx containing the immediate.  *IMM must satisfy
6290    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6291    on exit.  */
6292 
6293 void
canonicalize_comparison(machine_mode mode,enum rtx_code * code,rtx * imm)6294 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6295 {
6296   if (!SCALAR_INT_MODE_P (mode))
6297     return;
6298 
6299   int to_add = 0;
6300   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6301 
6302   /* Extract the immediate value from the rtx.  */
6303   wide_int imm_val = rtx_mode_t (*imm, mode);
6304 
6305   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6306     to_add = 1;
6307   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6308     to_add = -1;
6309   else
6310     return;
6311 
6312   /* Check for overflow/underflow in the case of signed values and
6313      wrapping around in the case of unsigned values.  If any occur
6314      cancel the optimization.  */
6315   wi::overflow_type overflow = wi::OVF_NONE;
6316   wide_int imm_modif;
6317 
6318   if (to_add == 1)
6319     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6320   else
6321     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6322 
6323   if (overflow)
6324     return;
6325 
6326   /* The following creates a pseudo; if we cannot do that, bail out.  */
6327   if (!can_create_pseudo_p ())
6328     return;
6329 
6330   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6331   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6332 
6333   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6334   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6335 
6336   /* Update the immediate and the code.  */
6337   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6338     {
6339       *code = equivalent_cmp_code (*code);
6340       *imm = new_imm;
6341     }
6342 }
6343 
6344 
6345 
6346 /* Perform possibly multi-word comparison and conditional jump to LABEL
6347    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6348    now a thin wrapper around do_compare_rtx_and_jump.  */
6349 
6350 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6351 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6352                      rtx_code_label *label)
6353 {
6354   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6355   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6356                                  NULL, label, profile_probability::uninitialized ());
6357 }
6358