1 /* Subroutines for insn-output.cc for HPPA.
2    Copyright (C) 1992-2022 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71             && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99                                             reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106                                    rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138                                             HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168                                           const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171                                              const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177                                                   machine_mode,
178                                                   secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180                                                   reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183                                                                machine_mode, int *,
184                                                                const_tree, int);
185 
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207 
208 /* The following extra sections are only used for SOM.  */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213 
214 /* Counts for the number of callee-saved general and floating point
215    registers which were saved by the current function's prologue.  */
216 static int gr_saved, fr_saved;
217 
218 /* Boolean indicating whether the return pointer was saved by the
219    current function's prologue.  */
220 static bool rp_saved;
221 
222 static rtx find_addr_reg (rtx);
223 
224 /* Keep track of the number of bytes we have output in the CODE subspace
225    during this compilation so we'll know when to emit inline long-calls.  */
226 unsigned long total_code_bytes;
227 
228 /* The last address of the previous function plus the number of bytes in
229    associated thunks that have been output.  This is used to determine if
230    a thunk can use an IA-relative branch to reach its target function.  */
231 static unsigned int last_address;
232 
233 /* Variables to handle plabels that we discover are necessary at assembly
234    output time.  They are output after the current function.  */
235 struct GTY(()) deferred_plabel
236 {
237   rtx internal_label;
238   rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241   deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243 
244 /* Initialize the GCC target structure.  */
245 
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248 
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263 
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266 
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273 
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276 
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281 
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286 
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289 
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292 
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297 
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376 
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379 
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398 
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405 
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408 
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415 
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418 
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421 
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424 
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427 
428 struct gcc_target targetm = TARGET_INITIALIZER;
429 
430 /* Parse the -mfixed-range= option string.  */
431 
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435   int i, first, last;
436   char *str, *dash, *comma;
437 
438   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439      REG2 are either register names or register numbers.  The effect
440      of this option is to mark the registers in the range from REG1 to
441      REG2 as ``fixed'' so they won't be used by the compiler.  This is
442      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
443 
444   i = strlen (const_str);
445   str = (char *) alloca (i + 1);
446   memcpy (str, const_str, i + 1);
447 
448   while (1)
449     {
450       dash = strchr (str, '-');
451       if (!dash)
452           {
453             warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454             return;
455           }
456       *dash = '\0';
457 
458       comma = strchr (dash + 1, ',');
459       if (comma)
460           *comma = '\0';
461 
462       first = decode_reg_name (str);
463       if (first < 0)
464           {
465             warning (0, "unknown register name: %s", str);
466             return;
467           }
468 
469       last = decode_reg_name (dash + 1);
470       if (last < 0)
471           {
472             warning (0, "unknown register name: %s", dash + 1);
473             return;
474           }
475 
476       *dash = '-';
477 
478       if (first > last)
479           {
480             warning (0, "%s-%s is an empty range", str, dash + 1);
481             return;
482           }
483 
484       for (i = first; i <= last; ++i)
485           fixed_regs[i] = call_used_regs[i] = 1;
486 
487       if (!comma)
488           break;
489 
490       *comma = ',';
491       str = comma + 1;
492     }
493 
494   /* Check if all floating point registers have been fixed.  */
495   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496     if (!fixed_regs[i])
497       break;
498 
499   if (i > FP_REG_LAST)
500     target_flags |= MASK_SOFT_FLOAT;
501 }
502 
503 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
504 
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508   unsigned int i;
509   cl_deferred_option *opt;
510   vec<cl_deferred_option> *v
511     = (vec<cl_deferred_option> *) pa_deferred_options;
512 
513   if (v)
514     FOR_EACH_VEC_ELT (*v, i, opt)
515       {
516           switch (opt->opt_index)
517             {
518             case OPT_mfixed_range_:
519               fix_range (opt->arg);
520               break;
521 
522             default:
523               gcc_unreachable ();
524             }
525       }
526 
527   if (flag_pic && TARGET_PORTABLE_RUNTIME)
528     {
529       warning (0, "PIC code generation is not supported in the portable runtime model");
530     }
531 
532   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533    {
534       warning (0, "PIC code generation is not compatible with fast indirect calls");
535    }
536 
537   if (! TARGET_GAS && write_symbols != NO_DEBUG)
538     {
539       warning (0, "%<-g%> is only supported when using GAS on this processor");
540       warning (0, "%<-g%> option disabled");
541       write_symbols = NO_DEBUG;
542     }
543 
544   if (TARGET_64BIT && TARGET_HPUX)
545     {
546       /* DWARF5 is not supported by gdb.  Don't emit DWARF5 unless
547            specifically selected.  */
548       if (!OPTION_SET_P (dwarf_strict))
549           dwarf_strict = 1;
550       if (!OPTION_SET_P (dwarf_version))
551           dwarf_version = 4;
552     }
553 
554   /* We only support the "big PIC" model now.  And we always generate PIC
555      code when in 64bit mode.  */
556   if (flag_pic == 1 || TARGET_64BIT)
557     flag_pic = 2;
558 
559   /* Disable -freorder-blocks-and-partition as we don't support hot and
560      cold partitioning.  */
561   if (flag_reorder_blocks_and_partition)
562     {
563       inform (input_location,
564                 "%<-freorder-blocks-and-partition%> does not work "
565                 "on this architecture");
566       flag_reorder_blocks_and_partition = 0;
567       flag_reorder_blocks = 1;
568     }
569 
570   /* Disable -fstack-protector to suppress warning.  */
571   flag_stack_protect = 0;
572 
573   /* We can't guarantee that .dword is available for 32-bit targets.  */
574   if (UNITS_PER_WORD == 4)
575     targetm.asm_out.aligned_op.di = NULL;
576 
577   /* The unaligned ops are only available when using GAS.  */
578   if (!TARGET_GAS)
579     {
580       targetm.asm_out.unaligned_op.hi = NULL;
581       targetm.asm_out.unaligned_op.si = NULL;
582       targetm.asm_out.unaligned_op.di = NULL;
583     }
584 
585   init_machine_status = pa_init_machine_status;
586 }
587 
588 enum pa_builtins
589 {
590   PA_BUILTIN_COPYSIGNQ,
591   PA_BUILTIN_FABSQ,
592   PA_BUILTIN_INFQ,
593   PA_BUILTIN_HUGE_VALQ,
594   PA_BUILTIN_max
595 };
596 
597 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
598 
599 static void
pa_init_builtins(void)600 pa_init_builtins (void)
601 {
602 #ifdef DONT_HAVE_FPUTC_UNLOCKED
603   {
604     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
605     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
606                           builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
607   }
608 #endif
609 #if TARGET_HPUX_11
610   {
611     tree decl;
612 
613     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
614       set_user_assembler_name (decl, "_Isfinite");
615     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
616       set_user_assembler_name (decl, "_Isfinitef");
617   }
618 #endif
619 
620   if (HPUX_LONG_DOUBLE_LIBRARY)
621     {
622       tree decl, ftype;
623 
624       /* Under HPUX, the __float128 type is a synonym for "long double".  */
625       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
626                                                              "__float128");
627 
628       /* TFmode support builtins.  */
629       ftype = build_function_type_list (long_double_type_node,
630                                                   long_double_type_node,
631                                                   NULL_TREE);
632       decl = add_builtin_function ("__builtin_fabsq", ftype,
633                                            PA_BUILTIN_FABSQ, BUILT_IN_MD,
634                                            "_U_Qfabs", NULL_TREE);
635       TREE_READONLY (decl) = 1;
636       pa_builtins[PA_BUILTIN_FABSQ] = decl;
637 
638       ftype = build_function_type_list (long_double_type_node,
639                                                   long_double_type_node,
640                                                   long_double_type_node,
641                                                   NULL_TREE);
642       decl = add_builtin_function ("__builtin_copysignq", ftype,
643                                            PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
644                                            "_U_Qfcopysign", NULL_TREE);
645       TREE_READONLY (decl) = 1;
646       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
647 
648       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
649       decl = add_builtin_function ("__builtin_infq", ftype,
650                                            PA_BUILTIN_INFQ, BUILT_IN_MD,
651                                            NULL, NULL_TREE);
652       pa_builtins[PA_BUILTIN_INFQ] = decl;
653 
654       decl = add_builtin_function ("__builtin_huge_valq", ftype,
655                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
656                                    NULL, NULL_TREE);
657       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
658     }
659 }
660 
661 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)662 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
663                        machine_mode mode ATTRIBUTE_UNUSED,
664                        int ignore ATTRIBUTE_UNUSED)
665 {
666   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
667   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
668 
669   switch (fcode)
670     {
671     case PA_BUILTIN_FABSQ:
672     case PA_BUILTIN_COPYSIGNQ:
673       return expand_call (exp, target, ignore);
674 
675     case PA_BUILTIN_INFQ:
676     case PA_BUILTIN_HUGE_VALQ:
677       {
678           machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
679           REAL_VALUE_TYPE inf;
680           rtx tmp;
681 
682           real_inf (&inf);
683           tmp = const_double_from_real_value (inf, target_mode);
684 
685           tmp = validize_mem (force_const_mem (target_mode, tmp));
686 
687           if (target == 0)
688             target = gen_reg_rtx (target_mode);
689 
690           emit_move_insn (target, tmp);
691           return target;
692       }
693 
694     default:
695       gcc_unreachable ();
696     }
697 
698   return NULL_RTX;
699 }
700 
701 /* Function to init struct machine_function.
702    This will be called, via a pointer variable,
703    from push_function_context.  */
704 
705 static struct machine_function *
pa_init_machine_status(void)706 pa_init_machine_status (void)
707 {
708   return ggc_cleared_alloc<machine_function> ();
709 }
710 
711 /* If FROM is a probable pointer register, mark TO as a probable
712    pointer register with the same pointer alignment as FROM.  */
713 
714 static void
copy_reg_pointer(rtx to,rtx from)715 copy_reg_pointer (rtx to, rtx from)
716 {
717   if (REG_POINTER (from))
718     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
719 }
720 
721 /* Return 1 if X contains a symbolic expression.  We know these
722    expressions will have one of a few well defined forms, so
723    we need only check those forms.  */
724 int
pa_symbolic_expression_p(rtx x)725 pa_symbolic_expression_p (rtx x)
726 {
727 
728   /* Strip off any HIGH.  */
729   if (GET_CODE (x) == HIGH)
730     x = XEXP (x, 0);
731 
732   return symbolic_operand (x, VOIDmode);
733 }
734 
735 /* Accept any constant that can be moved in one instruction into a
736    general register.  */
737 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)738 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
739 {
740   /* OK if ldo, ldil, or zdepi, can be used.  */
741   return (VAL_14_BITS_P (ival)
742             || pa_ldil_cint_p (ival)
743             || pa_zdepi_cint_p (ival));
744 }
745 
746 /* True iff ldil can be used to load this CONST_INT.  The least
747    significant 11 bits of the value must be zero and the value must
748    not change sign when extended from 32 to 64 bits.  */
749 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)750 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
751 {
752   unsigned HOST_WIDE_INT x;
753 
754   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
755   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
756 }
757 
758 /* True iff zdepi can be used to generate this CONST_INT.
759    zdepi first sign extends a 5-bit signed number to a given field
760    length, then places this field anywhere in a zero.  */
761 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)762 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
763 {
764   unsigned HOST_WIDE_INT lsb_mask, t;
765 
766   /* This might not be obvious, but it's at least fast.
767      This function is critical; we don't have the time loops would take.  */
768   lsb_mask = x & -x;
769   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
770   /* Return true iff t is a power of two.  */
771   return ((t & (t - 1)) == 0);
772 }
773 
774 /* True iff depi or extru can be used to compute (reg & mask).
775    Accept bit pattern like these:
776    0....01....1
777    1....10....0
778    1..10..01..1  */
779 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)780 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
781 {
782   mask = ~mask;
783   mask += mask & -mask;
784   return (mask & (mask - 1)) == 0;
785 }
786 
787 /* True iff depi can be used to compute (reg | MASK).  */
788 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)789 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
790 {
791   mask += mask & -mask;
792   return (mask & (mask - 1)) == 0;
793 }
794 
795 /* Legitimize PIC addresses.  If the address is already
796    position-independent, we return ORIG.  Newly generated
797    position-independent addresses go to REG.  If we need more
798    than one register, we lose.  */
799 
800 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)801 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
802 {
803   rtx pic_ref = orig;
804 
805   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
806 
807   /* Labels need special handling.  */
808   if (pic_label_operand (orig, mode))
809     {
810       rtx_insn *insn;
811 
812       /* We do not want to go through the movXX expanders here since that
813            would create recursion.
814 
815            Nor do we really want to call a generator for a named pattern
816            since that requires multiple patterns if we want to support
817            multiple word sizes.
818 
819            So instead we just emit the raw set, which avoids the movXX
820            expanders completely.  */
821       mark_reg_pointer (reg, BITS_PER_UNIT);
822       insn = emit_insn (gen_rtx_SET (reg, orig));
823 
824       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
825       add_reg_note (insn, REG_EQUAL, orig);
826 
827       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
828            and update LABEL_NUSES because this is not done automatically.  */
829       if (reload_in_progress || reload_completed)
830           {
831             /* Extract LABEL_REF.  */
832             if (GET_CODE (orig) == CONST)
833               orig = XEXP (XEXP (orig, 0), 0);
834             /* Extract CODE_LABEL.  */
835             orig = XEXP (orig, 0);
836             add_reg_note (insn, REG_LABEL_OPERAND, orig);
837             /* Make sure we have label and not a note.  */
838             if (LABEL_P (orig))
839               LABEL_NUSES (orig)++;
840           }
841       crtl->uses_pic_offset_table = 1;
842       return reg;
843     }
844   if (GET_CODE (orig) == SYMBOL_REF)
845     {
846       rtx_insn *insn;
847       rtx tmp_reg;
848 
849       gcc_assert (reg);
850 
851       /* Before reload, allocate a temporary register for the intermediate
852            result.  This allows the sequence to be deleted when the final
853            result is unused and the insns are trivially dead.  */
854       tmp_reg = ((reload_in_progress || reload_completed)
855                      ? reg : gen_reg_rtx (Pmode));
856 
857       if (function_label_operand (orig, VOIDmode))
858           {
859             /* Force function label into memory in word mode.  */
860             orig = XEXP (force_const_mem (word_mode, orig), 0);
861             /* Load plabel address from DLT.  */
862             emit_move_insn (tmp_reg,
863                                 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
864                                                   gen_rtx_HIGH (word_mode, orig)));
865             pic_ref
866               = gen_const_mem (Pmode,
867                                    gen_rtx_LO_SUM (Pmode, tmp_reg,
868                                                        gen_rtx_UNSPEC (Pmode,
869                                                                      gen_rtvec (1, orig),
870                                                                      UNSPEC_DLTIND14R)));
871             emit_move_insn (reg, pic_ref);
872             /* Now load address of function descriptor.  */
873             pic_ref = gen_rtx_MEM (Pmode, reg);
874           }
875       else
876           {
877             /* Load symbol reference from DLT.  */
878             emit_move_insn (tmp_reg,
879                                 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
880                                                   gen_rtx_HIGH (word_mode, orig)));
881             pic_ref
882               = gen_const_mem (Pmode,
883                                    gen_rtx_LO_SUM (Pmode, tmp_reg,
884                                                        gen_rtx_UNSPEC (Pmode,
885                                                                      gen_rtvec (1, orig),
886                                                                      UNSPEC_DLTIND14R)));
887           }
888 
889       crtl->uses_pic_offset_table = 1;
890       mark_reg_pointer (reg, BITS_PER_UNIT);
891       insn = emit_move_insn (reg, pic_ref);
892 
893       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
894       set_unique_reg_note (insn, REG_EQUAL, orig);
895 
896       return reg;
897     }
898   else if (GET_CODE (orig) == CONST)
899     {
900       rtx base;
901 
902       if (GET_CODE (XEXP (orig, 0)) == PLUS
903             && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
904           return orig;
905 
906       gcc_assert (reg);
907       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
908 
909       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
910       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
911                                              base == reg ? 0 : reg);
912 
913       if (GET_CODE (orig) == CONST_INT)
914           {
915             if (INT_14_BITS (orig))
916               return plus_constant (Pmode, base, INTVAL (orig));
917             orig = force_reg (Pmode, orig);
918           }
919       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
920       /* Likewise, should we set special REG_NOTEs here?  */
921     }
922 
923   return pic_ref;
924 }
925 
926 static GTY(()) rtx gen_tls_tga;
927 
928 static rtx
gen_tls_get_addr(void)929 gen_tls_get_addr (void)
930 {
931   if (!gen_tls_tga)
932     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
933   return gen_tls_tga;
934 }
935 
936 static rtx
hppa_tls_call(rtx arg)937 hppa_tls_call (rtx arg)
938 {
939   rtx ret;
940 
941   ret = gen_reg_rtx (Pmode);
942   emit_library_call_value (gen_tls_get_addr (), ret,
943                                  LCT_CONST, Pmode, arg, Pmode);
944 
945   return ret;
946 }
947 
948 static rtx
legitimize_tls_address(rtx addr)949 legitimize_tls_address (rtx addr)
950 {
951   rtx ret, tmp, t1, t2, tp;
952   rtx_insn *insn;
953 
954   /* Currently, we can't handle anything but a SYMBOL_REF.  */
955   if (GET_CODE (addr) != SYMBOL_REF)
956     return addr;
957 
958   switch (SYMBOL_REF_TLS_MODEL (addr))
959     {
960       case TLS_MODEL_GLOBAL_DYNAMIC:
961           tmp = gen_reg_rtx (Pmode);
962           if (flag_pic)
963             emit_insn (gen_tgd_load_pic (tmp, addr));
964           else
965             emit_insn (gen_tgd_load (tmp, addr));
966           ret = hppa_tls_call (tmp);
967           break;
968 
969       case TLS_MODEL_LOCAL_DYNAMIC:
970           ret = gen_reg_rtx (Pmode);
971           tmp = gen_reg_rtx (Pmode);
972           start_sequence ();
973           if (flag_pic)
974             emit_insn (gen_tld_load_pic (tmp, addr));
975           else
976             emit_insn (gen_tld_load (tmp, addr));
977           t1 = hppa_tls_call (tmp);
978           insn = get_insns ();
979           end_sequence ();
980           t2 = gen_reg_rtx (Pmode);
981           emit_libcall_block (insn, t2, t1,
982                                   gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
983                                                     UNSPEC_TLSLDBASE));
984           emit_insn (gen_tld_offset_load (ret, addr, t2));
985           break;
986 
987       case TLS_MODEL_INITIAL_EXEC:
988           tp = gen_reg_rtx (Pmode);
989           tmp = gen_reg_rtx (Pmode);
990           ret = gen_reg_rtx (Pmode);
991           emit_insn (gen_tp_load (tp));
992           if (flag_pic)
993             emit_insn (gen_tie_load_pic (tmp, addr));
994           else
995             emit_insn (gen_tie_load (tmp, addr));
996           emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
997           break;
998 
999       case TLS_MODEL_LOCAL_EXEC:
1000           tp = gen_reg_rtx (Pmode);
1001           ret = gen_reg_rtx (Pmode);
1002           emit_insn (gen_tp_load (tp));
1003           emit_insn (gen_tle_load (ret, addr, tp));
1004           break;
1005 
1006       default:
1007           gcc_unreachable ();
1008     }
1009 
1010   return ret;
1011 }
1012 
1013 /* Helper for hppa_legitimize_address.  Given X, return true if it
1014    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1015 
1016    This respectively represent canonical shift-add rtxs or scaled
1017    memory addresses.  */
1018 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1019 mem_shadd_or_shadd_rtx_p (rtx x)
1020 {
1021   return ((GET_CODE (x) == ASHIFT
1022              || GET_CODE (x) == MULT)
1023             && GET_CODE (XEXP (x, 1)) == CONST_INT
1024             && ((GET_CODE (x) == ASHIFT
1025                  && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1026                 || (GET_CODE (x) == MULT
1027                       && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1028 }
1029 
1030 /* Try machine-dependent ways of modifying an illegitimate address
1031    to be legitimate.  If we find one, return the new, valid address.
1032    This macro is used in only one place: `memory_address' in explow.cc.
1033 
1034    OLDX is the address as it was before break_out_memory_refs was called.
1035    In some cases it is useful to look at this to decide what needs to be done.
1036 
1037    It is always safe for this macro to do nothing.  It exists to recognize
1038    opportunities to optimize the output.
1039 
1040    For the PA, transform:
1041 
1042           memory(X + <large int>)
1043 
1044    into:
1045 
1046           if (<large int> & mask) >= 16
1047             Y = (<large int> & ~mask) + mask + 1  Round up.
1048           else
1049             Y = (<large int> & ~mask)             Round down.
1050           Z = X + Y
1051           memory (Z + (<large int> - Y));
1052 
1053    This is for CSE to find several similar references, and only use one Z.
1054 
1055    X can either be a SYMBOL_REF or REG, but because combine cannot
1056    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1057    D will not fit in 14 bits.
1058 
1059    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1060    0x1f as the mask.
1061 
1062    MODE_INT references allow displacements which fit in 14 bits, so use
1063    0x3fff as the mask.
1064 
1065    This relies on the fact that most mode MODE_FLOAT references will use FP
1066    registers and most mode MODE_INT references will use integer registers.
1067    (In the rare case of an FP register used in an integer MODE, we depend
1068    on secondary reloads to clean things up.)
1069 
1070 
1071    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1072    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1073    addressing modes to be used).
1074 
1075    Note that the addresses passed into hppa_legitimize_address always
1076    come from a MEM, so we only have to match the MULT form on incoming
1077    addresses.  But to be future proof we also match the ASHIFT form.
1078 
1079    However, this routine always places those shift-add sequences into
1080    registers, so we have to generate the ASHIFT form as our output.
1081 
1082    Put X and Z into registers.  Then put the entire expression into
1083    a register.  */
1084 
1085 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1086 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1087                                machine_mode mode)
1088 {
1089   rtx orig = x;
1090 
1091   /* We need to canonicalize the order of operands in unscaled indexed
1092      addresses since the code that checks if an address is valid doesn't
1093      always try both orders.  */
1094   if (!TARGET_NO_SPACE_REGS
1095       && GET_CODE (x) == PLUS
1096       && GET_MODE (x) == Pmode
1097       && REG_P (XEXP (x, 0))
1098       && REG_P (XEXP (x, 1))
1099       && REG_POINTER (XEXP (x, 0))
1100       && !REG_POINTER (XEXP (x, 1)))
1101     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1102 
1103   if (tls_referenced_p (x))
1104     return legitimize_tls_address (x);
1105   else if (flag_pic)
1106     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1107 
1108   /* Strip off CONST.  */
1109   if (GET_CODE (x) == CONST)
1110     x = XEXP (x, 0);
1111 
1112   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1113      That should always be safe.  */
1114   if (GET_CODE (x) == PLUS
1115       && GET_CODE (XEXP (x, 0)) == REG
1116       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1117     {
1118       rtx reg = force_reg (Pmode, XEXP (x, 1));
1119       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1120     }
1121 
1122   /* Note we must reject symbols which represent function addresses
1123      since the assembler/linker can't handle arithmetic on plabels.  */
1124   if (GET_CODE (x) == PLUS
1125       && GET_CODE (XEXP (x, 1)) == CONST_INT
1126       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1127              && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1128             || GET_CODE (XEXP (x, 0)) == REG))
1129     {
1130       rtx int_part, ptr_reg;
1131       HOST_WIDE_INT newoffset;
1132       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1133       HOST_WIDE_INT mask;
1134 
1135       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1136                 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1137 
1138       /* Choose which way to round the offset.  Round up if we
1139            are >= halfway to the next boundary.  */
1140       if ((offset & mask) >= ((mask + 1) / 2))
1141           newoffset = (offset & ~ mask) + mask + 1;
1142       else
1143           newoffset = (offset & ~ mask);
1144 
1145       /* If the newoffset will not fit in 14 bits (ldo), then
1146            handling this would take 4 or 5 instructions (2 to load
1147            the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1148            add the new offset and the SYMBOL_REF.)  Combine cannot
1149            handle 4->2 or 5->2 combinations, so do not create
1150            them.  */
1151       if (! VAL_14_BITS_P (newoffset)
1152             && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1153           {
1154             rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1155             rtx tmp_reg
1156               = force_reg (Pmode,
1157                                gen_rtx_HIGH (Pmode, const_part));
1158             ptr_reg
1159               = force_reg (Pmode,
1160                                gen_rtx_LO_SUM (Pmode,
1161                                                    tmp_reg, const_part));
1162           }
1163       else
1164           {
1165             if (! VAL_14_BITS_P (newoffset))
1166               int_part = force_reg (Pmode, GEN_INT (newoffset));
1167             else
1168               int_part = GEN_INT (newoffset);
1169 
1170             ptr_reg = force_reg (Pmode,
1171                                      gen_rtx_PLUS (Pmode,
1172                                                        force_reg (Pmode, XEXP (x, 0)),
1173                                                        int_part));
1174           }
1175       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1176     }
1177 
1178   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1179 
1180   if (GET_CODE (x) == PLUS
1181       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1182       && (OBJECT_P (XEXP (x, 1))
1183             || GET_CODE (XEXP (x, 1)) == SUBREG)
1184       && GET_CODE (XEXP (x, 1)) != CONST)
1185     {
1186       /* If we were given a MULT, we must fix the constant
1187            as we're going to create the ASHIFT form.  */
1188       HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1189       if (GET_CODE (XEXP (x, 0)) == MULT)
1190           shift_val = exact_log2 (shift_val);
1191 
1192       rtx reg1, reg2;
1193       reg1 = XEXP (x, 1);
1194       if (GET_CODE (reg1) != REG)
1195           reg1 = force_reg (Pmode, force_operand (reg1, 0));
1196 
1197       reg2 = XEXP (XEXP (x, 0), 0);
1198       if (GET_CODE (reg2) != REG)
1199         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1200 
1201       return force_reg (Pmode,
1202                               gen_rtx_PLUS (Pmode,
1203                                               gen_rtx_ASHIFT (Pmode, reg2,
1204                                                                   GEN_INT (shift_val)),
1205                                               reg1));
1206     }
1207 
1208   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1209 
1210      Only do so for floating point modes since this is more speculative
1211      and we lose if it's an integer store.  */
1212   if (GET_CODE (x) == PLUS
1213       && GET_CODE (XEXP (x, 0)) == PLUS
1214       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1215       && (mode == SFmode || mode == DFmode))
1216     {
1217       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1218 
1219       /* If we were given a MULT, we must fix the constant
1220            as we're going to create the ASHIFT form.  */
1221       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1222           shift_val = exact_log2 (shift_val);
1223 
1224       /* Try and figure out what to use as a base register.  */
1225       rtx reg1, reg2, base, idx;
1226 
1227       reg1 = XEXP (XEXP (x, 0), 1);
1228       reg2 = XEXP (x, 1);
1229       base = NULL_RTX;
1230       idx = NULL_RTX;
1231 
1232       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1233            then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1234            it's a base register below.  */
1235       if (GET_CODE (reg1) != REG)
1236           reg1 = force_reg (Pmode, force_operand (reg1, 0));
1237 
1238       if (GET_CODE (reg2) != REG)
1239           reg2 = force_reg (Pmode, force_operand (reg2, 0));
1240 
1241       /* Figure out what the base and index are.  */
1242 
1243       if (GET_CODE (reg1) == REG
1244             && REG_POINTER (reg1))
1245           {
1246             base = reg1;
1247             idx = gen_rtx_PLUS (Pmode,
1248                                     gen_rtx_ASHIFT (Pmode,
1249                                                         XEXP (XEXP (XEXP (x, 0), 0), 0),
1250                                                         GEN_INT (shift_val)),
1251                                     XEXP (x, 1));
1252           }
1253       else if (GET_CODE (reg2) == REG
1254                  && REG_POINTER (reg2))
1255           {
1256             base = reg2;
1257             idx = XEXP (x, 0);
1258           }
1259 
1260       if (base == 0)
1261           return orig;
1262 
1263       /* If the index adds a large constant, try to scale the
1264            constant so that it can be loaded with only one insn.  */
1265       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1266             && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1267                                   / INTVAL (XEXP (XEXP (idx, 0), 1)))
1268             && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1269           {
1270             /* Divide the CONST_INT by the scale factor, then add it to A.  */
1271             HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1272             val /= (1 << shift_val);
1273 
1274             reg1 = XEXP (XEXP (idx, 0), 0);
1275             if (GET_CODE (reg1) != REG)
1276               reg1 = force_reg (Pmode, force_operand (reg1, 0));
1277 
1278             reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1279 
1280             /* We can now generate a simple scaled indexed address.  */
1281             return
1282               force_reg
1283                 (Pmode, gen_rtx_PLUS (Pmode,
1284                                             gen_rtx_ASHIFT (Pmode, reg1,
1285                                                                 GEN_INT (shift_val)),
1286                                             base));
1287           }
1288 
1289       /* If B + C is still a valid base register, then add them.  */
1290       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1291             && INTVAL (XEXP (idx, 1)) <= 4096
1292             && INTVAL (XEXP (idx, 1)) >= -4096)
1293           {
1294             rtx reg1, reg2;
1295 
1296             reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1297 
1298             reg2 = XEXP (XEXP (idx, 0), 0);
1299             if (GET_CODE (reg2) != CONST_INT)
1300               reg2 = force_reg (Pmode, force_operand (reg2, 0));
1301 
1302             return force_reg (Pmode,
1303                                   gen_rtx_PLUS (Pmode,
1304                                                     gen_rtx_ASHIFT (Pmode, reg2,
1305                                                                         GEN_INT (shift_val)),
1306                                                     reg1));
1307           }
1308 
1309       /* Get the index into a register, then add the base + index and
1310            return a register holding the result.  */
1311 
1312       /* First get A into a register.  */
1313       reg1 = XEXP (XEXP (idx, 0), 0);
1314       if (GET_CODE (reg1) != REG)
1315           reg1 = force_reg (Pmode, force_operand (reg1, 0));
1316 
1317       /* And get B into a register.  */
1318       reg2 = XEXP (idx, 1);
1319       if (GET_CODE (reg2) != REG)
1320           reg2 = force_reg (Pmode, force_operand (reg2, 0));
1321 
1322       reg1 = force_reg (Pmode,
1323                               gen_rtx_PLUS (Pmode,
1324                                               gen_rtx_ASHIFT (Pmode, reg1,
1325                                                                   GEN_INT (shift_val)),
1326                                               reg2));
1327 
1328       /* Add the result to our base register and return.  */
1329       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1330 
1331     }
1332 
1333   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1334      special handling to avoid creating an indexed memory address
1335      with x-100000 as the base.
1336 
1337      If the constant part is small enough, then it's still safe because
1338      there is a guard page at the beginning and end of the data segment.
1339 
1340      Scaled references are common enough that we want to try and rearrange the
1341      terms so that we can use indexing for these addresses too.  Only
1342      do the optimization for floatint point modes.  */
1343 
1344   if (GET_CODE (x) == PLUS
1345       && pa_symbolic_expression_p (XEXP (x, 1)))
1346     {
1347       /* Ugly.  We modify things here so that the address offset specified
1348            by the index expression is computed first, then added to x to form
1349            the entire address.  */
1350 
1351       rtx regx1, regx2, regy1, regy2, y;
1352 
1353       /* Strip off any CONST.  */
1354       y = XEXP (x, 1);
1355       if (GET_CODE (y) == CONST)
1356           y = XEXP (y, 0);
1357 
1358       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1359           {
1360             /* See if this looks like
1361                     (plus (mult (reg) (mem_shadd_const))
1362                           (const (plus (symbol_ref) (const_int))))
1363 
1364                Where const_int is small.  In that case the const
1365                expression is a valid pointer for indexing.
1366 
1367                If const_int is big, but can be divided evenly by shadd_const
1368                and added to (reg).  This allows more scaled indexed addresses.  */
1369             if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1370                 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1371                 && GET_CODE (XEXP (y, 1)) == CONST_INT
1372                 && INTVAL (XEXP (y, 1)) >= -4096
1373                 && INTVAL (XEXP (y, 1)) <= 4095)
1374               {
1375                 HOST_WIDE_INT shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1376 
1377                 /* If we were given a MULT, we must fix the constant
1378                      as we're going to create the ASHIFT form.  */
1379                 if (GET_CODE (XEXP (x, 0)) == MULT)
1380                     shift_val = exact_log2 (shift_val);
1381 
1382                 rtx reg1, reg2;
1383 
1384                 reg1 = XEXP (x, 1);
1385                 if (GET_CODE (reg1) != REG)
1386                     reg1 = force_reg (Pmode, force_operand (reg1, 0));
1387 
1388                 reg2 = XEXP (XEXP (x, 0), 0);
1389                 if (GET_CODE (reg2) != REG)
1390                   reg2 = force_reg (Pmode, force_operand (reg2, 0));
1391 
1392                 return
1393                     force_reg (Pmode,
1394                                  gen_rtx_PLUS (Pmode,
1395                                                    gen_rtx_ASHIFT (Pmode,
1396                                                                        reg2,
1397                                                                        GEN_INT (shift_val)),
1398                                                    reg1));
1399               }
1400             else if ((mode == DFmode || mode == SFmode)
1401                        && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1402                        && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1403                        && GET_CODE (XEXP (y, 1)) == CONST_INT
1404                        && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1405               {
1406                 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1407 
1408                 /* If we were given a MULT, we must fix the constant
1409                      as we're going to create the ASHIFT form.  */
1410                 if (GET_CODE (XEXP (x, 0)) == MULT)
1411                     shift_val = exact_log2 (shift_val);
1412 
1413                 regx1
1414                     = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1415                                                        / INTVAL (XEXP (XEXP (x, 0), 1))));
1416                 regx2 = XEXP (XEXP (x, 0), 0);
1417                 if (GET_CODE (regx2) != REG)
1418                     regx2 = force_reg (Pmode, force_operand (regx2, 0));
1419                 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1420                                                                       regx2, regx1));
1421                 return
1422                     force_reg (Pmode,
1423                                  gen_rtx_PLUS (Pmode,
1424                                                    gen_rtx_ASHIFT (Pmode, regx2,
1425                                                                      GEN_INT (shift_val)),
1426                                                    force_reg (Pmode, XEXP (y, 0))));
1427               }
1428             else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1429                        && INTVAL (XEXP (y, 1)) >= -4096
1430                        && INTVAL (XEXP (y, 1)) <= 4095)
1431               {
1432                 /* This is safe because of the guard page at the
1433                      beginning and end of the data space.  Just
1434                      return the original address.  */
1435                 return orig;
1436               }
1437             else
1438               {
1439                 /* Doesn't look like one we can optimize.  */
1440                 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1441                 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1442                 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1443                 regx1 = force_reg (Pmode,
1444                                          gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1445                                                              regx1, regy2));
1446                 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1447               }
1448           }
1449     }
1450 
1451   return orig;
1452 }
1453 
1454 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1455 
1456    Compute extra cost of moving data between one register class
1457    and another.
1458 
1459    Make moves from SAR so expensive they should never happen.  We used to
1460    have 0xffff here, but that generates overflow in rare cases.
1461 
1462    Copies involving a FP register and a non-FP register are relatively
1463    expensive because they must go through memory.
1464 
1465    Other copies are reasonably cheap.  */
1466 
1467 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1468 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1469                                reg_class_t from, reg_class_t to)
1470 {
1471   if (from == SHIFT_REGS)
1472     return 0x100;
1473   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1474     return 18;
1475   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1476            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1477     return 16;
1478   else
1479     return 2;
1480 }
1481 
1482 /* For the HPPA, REG and REG+CONST is cost 0
1483    and addresses involving symbolic constants are cost 2.
1484 
1485    PIC addresses are very expensive.
1486 
1487    It is no coincidence that this has the same structure
1488    as pa_legitimate_address_p.  */
1489 
1490 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1491 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1492                        addr_space_t as ATTRIBUTE_UNUSED,
1493                        bool speed ATTRIBUTE_UNUSED)
1494 {
1495   switch (GET_CODE (X))
1496     {
1497     case REG:
1498     case PLUS:
1499     case LO_SUM:
1500       return 1;
1501     case HIGH:
1502       return 2;
1503     default:
1504       return 4;
1505     }
1506 }
1507 
1508 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1509    The machine mode of X is known to be SImode or DImode.  */
1510 
1511 static bool
hppa_rtx_costs_shadd_p(rtx x)1512 hppa_rtx_costs_shadd_p (rtx x)
1513 {
1514   if (GET_CODE (x) != PLUS
1515       || !REG_P (XEXP (x, 1)))
1516     return false;
1517   rtx op0 = XEXP (x, 0);
1518   if (GET_CODE (op0) == ASHIFT
1519       && CONST_INT_P (XEXP (op0, 1))
1520       && REG_P (XEXP (op0, 0)))
1521     {
1522       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1523       return x == 1 || x == 2 || x == 3;
1524     }
1525   if (GET_CODE (op0) == MULT
1526       && CONST_INT_P (XEXP (op0, 1))
1527       && REG_P (XEXP (op0, 0)))
1528     {
1529       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1530       return x == 2 || x == 4 || x == 8;
1531     }
1532   return false;
1533 }
1534 
1535 /* Compute a (partial) cost for rtx X.  Return true if the complete
1536    cost has been computed, and false if subexpressions should be
1537    scanned.  In either case, *TOTAL contains the cost result.  */
1538 
1539 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1540 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1541                     int opno ATTRIBUTE_UNUSED,
1542                     int *total, bool speed)
1543 {
1544   int code = GET_CODE (x);
1545 
1546   switch (code)
1547     {
1548     case CONST_INT:
1549       if (outer_code == SET)
1550           *total = COSTS_N_INSNS (1);
1551       else if (INTVAL (x) == 0)
1552           *total = 0;
1553       else if (INT_14_BITS (x))
1554           *total = 1;
1555       else
1556           *total = 2;
1557       return true;
1558 
1559     case HIGH:
1560       *total = 2;
1561       return true;
1562 
1563     case CONST:
1564     case LABEL_REF:
1565     case SYMBOL_REF:
1566       *total = 4;
1567       return true;
1568 
1569     case CONST_DOUBLE:
1570       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1571             && outer_code != SET)
1572           *total = 0;
1573       else
1574           *total = 8;
1575       return true;
1576 
1577     case MULT:
1578       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1579           {
1580             *total = COSTS_N_INSNS (3);
1581           }
1582       else if (mode == DImode)
1583           {
1584             if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1585               *total = COSTS_N_INSNS (25);
1586             else
1587               *total = COSTS_N_INSNS (80);
1588           }
1589       else
1590           {
1591             if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1592               *total = COSTS_N_INSNS (8);
1593             else
1594               *total = COSTS_N_INSNS (20);
1595           }
1596       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1597 
1598     case DIV:
1599       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1600           {
1601             *total = COSTS_N_INSNS (14);
1602             return false;
1603           }
1604       /* FALLTHRU */
1605 
1606     case UDIV:
1607     case MOD:
1608     case UMOD:
1609       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1610       if (mode == DImode)
1611           *total = COSTS_N_INSNS (240);
1612       else
1613           *total = COSTS_N_INSNS (60);
1614       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1615 
1616     case PLUS: /* this includes shNadd insns */
1617     case MINUS:
1618       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1619           *total = COSTS_N_INSNS (3);
1620       else if (mode == DImode)
1621           {
1622             if (TARGET_64BIT)
1623               {
1624                 *total = COSTS_N_INSNS (1);
1625                 /* Handle shladd,l instructions.  */
1626                 if (hppa_rtx_costs_shadd_p (x))
1627                     return true;
1628               }
1629             else
1630               *total = COSTS_N_INSNS (2);
1631           }
1632       else
1633           {
1634             *total = COSTS_N_INSNS (1);
1635             /* Handle shNadd instructions.  */
1636             if (hppa_rtx_costs_shadd_p (x))
1637               return true;
1638           }
1639       return REG_P (XEXP (x, 0))
1640                && (REG_P (XEXP (x, 1))
1641                      || CONST_INT_P (XEXP (x, 1)));
1642 
1643     case ASHIFT:
1644       if (mode == DImode)
1645           {
1646             if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1647               {
1648                 if (TARGET_64BIT)
1649                     *total = COSTS_N_INSNS (1);
1650                 else
1651                     *total = COSTS_N_INSNS (2);
1652                 return true;
1653               }
1654             else if (TARGET_64BIT)
1655               *total = COSTS_N_INSNS (3);
1656             else if (speed)
1657               *total = COSTS_N_INSNS (13);
1658             else
1659               *total = COSTS_N_INSNS (18);
1660           }
1661       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1662           {
1663             if (TARGET_64BIT)
1664               *total = COSTS_N_INSNS (2);
1665             else
1666               *total = COSTS_N_INSNS (1);
1667             return true;
1668           }
1669       else if (TARGET_64BIT)
1670           *total = COSTS_N_INSNS (4);
1671       else
1672           *total = COSTS_N_INSNS (2);
1673       return REG_P (XEXP (x, 0))
1674                && (REG_P (XEXP (x, 1))
1675                      || CONST_INT_P (XEXP (x, 1)));
1676 
1677     case ASHIFTRT:
1678       if (mode == DImode)
1679           {
1680             if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1681               {
1682                 if (TARGET_64BIT)
1683                     *total = COSTS_N_INSNS (1);
1684                 else
1685                     *total = COSTS_N_INSNS (2);
1686                 return true;
1687               }
1688             else if (TARGET_64BIT)
1689               *total = COSTS_N_INSNS (3);
1690             else if (speed)
1691               *total = COSTS_N_INSNS (14);
1692             else
1693               *total = COSTS_N_INSNS (19);
1694           }
1695       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1696           {
1697             if (TARGET_64BIT)
1698               *total = COSTS_N_INSNS (2);
1699             else
1700               *total = COSTS_N_INSNS (1);
1701             return true;
1702           }
1703       else if (TARGET_64BIT)
1704           *total = COSTS_N_INSNS (4);
1705       else
1706           *total = COSTS_N_INSNS (2);
1707       return REG_P (XEXP (x, 0))
1708                && (REG_P (XEXP (x, 1))
1709                      || CONST_INT_P (XEXP (x, 1)));
1710 
1711     case LSHIFTRT:
1712       if (mode == DImode)
1713           {
1714             if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1715               {
1716                 if (TARGET_64BIT)
1717                     *total = COSTS_N_INSNS (1);
1718                 else
1719                     *total = COSTS_N_INSNS (2);
1720                 return true;
1721               }
1722             else if (TARGET_64BIT)
1723               *total = COSTS_N_INSNS (2);
1724             else if (speed)
1725               *total = COSTS_N_INSNS (12);
1726             else
1727               *total = COSTS_N_INSNS (15);
1728           }
1729       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1730           {
1731             *total = COSTS_N_INSNS (1);
1732             return true;
1733           }
1734       else if (TARGET_64BIT)
1735           *total = COSTS_N_INSNS (3);
1736       else
1737           *total = COSTS_N_INSNS (2);
1738       return REG_P (XEXP (x, 0))
1739                && (REG_P (XEXP (x, 1))
1740                      || CONST_INT_P (XEXP (x, 1)));
1741 
1742     default:
1743       return false;
1744     }
1745 }
1746 
1747 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1748    new rtx with the correct mode.  */
1749 static inline rtx
force_mode(machine_mode mode,rtx orig)1750 force_mode (machine_mode mode, rtx orig)
1751 {
1752   if (mode == GET_MODE (orig))
1753     return orig;
1754 
1755   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1756 
1757   return gen_rtx_REG (mode, REGNO (orig));
1758 }
1759 
1760 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1761 
1762 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1763 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1764 {
1765   return tls_referenced_p (x);
1766 }
1767 
1768 /* Emit insns to move operands[1] into operands[0].
1769 
1770    Return 1 if we have written out everything that needs to be done to
1771    do the move.  Otherwise, return 0 and the caller will emit the move
1772    normally.
1773 
1774    Note SCRATCH_REG may not be in the proper mode depending on how it
1775    will be used.  This routine is responsible for creating a new copy
1776    of SCRATCH_REG in the proper mode.  */
1777 
1778 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1779 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1780 {
1781   rtx operand0 = operands[0];
1782   rtx operand1 = operands[1];
1783   rtx tem;
1784 
1785   /* We can only handle indexed addresses in the destination operand
1786      of floating point stores.  Thus, we need to break out indexed
1787      addresses from the destination operand.  */
1788   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1789     {
1790       gcc_assert (can_create_pseudo_p ());
1791 
1792       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1793       operand0 = replace_equiv_address (operand0, tem);
1794     }
1795 
1796   /* On targets with non-equivalent space registers, break out unscaled
1797      indexed addresses from the source operand before the final CSE.
1798      We have to do this because the REG_POINTER flag is not correctly
1799      carried through various optimization passes and CSE may substitute
1800      a pseudo without the pointer set for one with the pointer set.  As
1801      a result, we loose various opportunities to create insns with
1802      unscaled indexed addresses.  */
1803   if (!TARGET_NO_SPACE_REGS
1804       && !cse_not_expected
1805       && GET_CODE (operand1) == MEM
1806       && GET_CODE (XEXP (operand1, 0)) == PLUS
1807       && REG_P (XEXP (XEXP (operand1, 0), 0))
1808       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1809     operand1
1810       = replace_equiv_address (operand1,
1811                                      copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1812 
1813   if (scratch_reg
1814       && reload_in_progress && GET_CODE (operand0) == REG
1815       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1816     operand0 = reg_equiv_mem (REGNO (operand0));
1817   else if (scratch_reg
1818              && reload_in_progress && GET_CODE (operand0) == SUBREG
1819              && GET_CODE (SUBREG_REG (operand0)) == REG
1820              && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1821     {
1822      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1823           the code which tracks sets/uses for delete_output_reload.  */
1824       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1825                                          reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1826                                          SUBREG_BYTE (operand0));
1827       operand0 = alter_subreg (&temp, true);
1828     }
1829 
1830   if (scratch_reg
1831       && reload_in_progress && GET_CODE (operand1) == REG
1832       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1833     operand1 = reg_equiv_mem (REGNO (operand1));
1834   else if (scratch_reg
1835              && reload_in_progress && GET_CODE (operand1) == SUBREG
1836              && GET_CODE (SUBREG_REG (operand1)) == REG
1837              && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1838     {
1839      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1840           the code which tracks sets/uses for delete_output_reload.  */
1841       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1842                                          reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1843                                          SUBREG_BYTE (operand1));
1844       operand1 = alter_subreg (&temp, true);
1845     }
1846 
1847   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1848       && ((tem = find_replacement (&XEXP (operand0, 0)))
1849             != XEXP (operand0, 0)))
1850     operand0 = replace_equiv_address (operand0, tem);
1851 
1852   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1853       && ((tem = find_replacement (&XEXP (operand1, 0)))
1854             != XEXP (operand1, 0)))
1855     operand1 = replace_equiv_address (operand1, tem);
1856 
1857   /* Handle secondary reloads for loads/stores of FP registers from
1858      REG+D addresses where D does not fit in 5 or 14 bits, including
1859      (subreg (mem (addr))) cases, and reloads for other unsupported
1860      memory operands.  */
1861   if (scratch_reg
1862       && FP_REG_P (operand0)
1863       && (MEM_P (operand1)
1864             || (GET_CODE (operand1) == SUBREG
1865                 && MEM_P (XEXP (operand1, 0)))))
1866     {
1867       rtx op1 = operand1;
1868 
1869       if (GET_CODE (op1) == SUBREG)
1870           op1 = XEXP (op1, 0);
1871 
1872       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1873           {
1874             if (!(TARGET_PA_20
1875                     && !TARGET_ELF32
1876                     && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1877                 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1878               {
1879                 /* SCRATCH_REG will hold an address and maybe the actual data.
1880                      We want it in WORD_MODE regardless of what mode it was
1881                      originally given to us.  */
1882                 scratch_reg = force_mode (word_mode, scratch_reg);
1883 
1884                 /* D might not fit in 14 bits either; for such cases load D
1885                      into scratch reg.  */
1886                 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1887                     {
1888                       emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1889                       emit_move_insn (scratch_reg,
1890                                           gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1891                                                               Pmode,
1892                                                               XEXP (XEXP (op1, 0), 0),
1893                                                               scratch_reg));
1894                     }
1895                 else
1896                     emit_move_insn (scratch_reg, XEXP (op1, 0));
1897                 op1 = replace_equiv_address (op1, scratch_reg);
1898               }
1899           }
1900       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1901                  || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1902                  || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1903           {
1904             /* Load memory address into SCRATCH_REG.  */
1905             scratch_reg = force_mode (word_mode, scratch_reg);
1906             emit_move_insn (scratch_reg, XEXP (op1, 0));
1907             op1 = replace_equiv_address (op1, scratch_reg);
1908           }
1909       emit_insn (gen_rtx_SET (operand0, op1));
1910       return 1;
1911     }
1912   else if (scratch_reg
1913              && FP_REG_P (operand1)
1914              && (MEM_P (operand0)
1915                  || (GET_CODE (operand0) == SUBREG
1916                        && MEM_P (XEXP (operand0, 0)))))
1917     {
1918       rtx op0 = operand0;
1919 
1920       if (GET_CODE (op0) == SUBREG)
1921           op0 = XEXP (op0, 0);
1922 
1923       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1924           {
1925             if (!(TARGET_PA_20
1926                     && !TARGET_ELF32
1927                     && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1928                 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1929               {
1930                 /* SCRATCH_REG will hold an address and maybe the actual data.
1931                      We want it in WORD_MODE regardless of what mode it was
1932                      originally given to us.  */
1933                 scratch_reg = force_mode (word_mode, scratch_reg);
1934 
1935                 /* D might not fit in 14 bits either; for such cases load D
1936                      into scratch reg.  */
1937                 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1938                     {
1939                       emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1940                       emit_move_insn (scratch_reg,
1941                                           gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1942                                                               Pmode,
1943                                                               XEXP (XEXP (op0, 0), 0),
1944                                                               scratch_reg));
1945                     }
1946                 else
1947                     emit_move_insn (scratch_reg, XEXP (op0, 0));
1948                 op0 = replace_equiv_address (op0, scratch_reg);
1949               }
1950           }
1951       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1952                  || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1953                  || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1954           {
1955             /* Load memory address into SCRATCH_REG.  */
1956             scratch_reg = force_mode (word_mode, scratch_reg);
1957             emit_move_insn (scratch_reg, XEXP (op0, 0));
1958             op0 = replace_equiv_address (op0, scratch_reg);
1959           }
1960       emit_insn (gen_rtx_SET (op0, operand1));
1961       return 1;
1962     }
1963   /* Handle secondary reloads for loads of FP registers from constant
1964      expressions by forcing the constant into memory.  For the most part,
1965      this is only necessary for SImode and DImode.
1966 
1967      Use scratch_reg to hold the address of the memory location.  */
1968   else if (scratch_reg
1969              && CONSTANT_P (operand1)
1970              && FP_REG_P (operand0))
1971     {
1972       rtx const_mem, xoperands[2];
1973 
1974       if (operand1 == CONST0_RTX (mode))
1975           {
1976             emit_insn (gen_rtx_SET (operand0, operand1));
1977             return 1;
1978           }
1979 
1980       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1981            it in WORD_MODE regardless of what mode it was originally given
1982            to us.  */
1983       scratch_reg = force_mode (word_mode, scratch_reg);
1984 
1985       /* Force the constant into memory and put the address of the
1986            memory location into scratch_reg.  */
1987       const_mem = force_const_mem (mode, operand1);
1988       xoperands[0] = scratch_reg;
1989       xoperands[1] = XEXP (const_mem, 0);
1990       pa_emit_move_sequence (xoperands, Pmode, 0);
1991 
1992       /* Now load the destination register.  */
1993       emit_insn (gen_rtx_SET (operand0,
1994                                     replace_equiv_address (const_mem, scratch_reg)));
1995       return 1;
1996     }
1997   /* Handle secondary reloads for SAR.  These occur when trying to load
1998      the SAR from memory or a constant.  */
1999   else if (scratch_reg
2000              && GET_CODE (operand0) == REG
2001              && REGNO (operand0) < FIRST_PSEUDO_REGISTER
2002              && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2003              && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2004     {
2005       /* D might not fit in 14 bits either; for such cases load D into
2006            scratch reg.  */
2007       if (GET_CODE (operand1) == MEM
2008             && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2009           {
2010             /* We are reloading the address into the scratch register, so we
2011                want to make sure the scratch register is a full register.  */
2012             scratch_reg = force_mode (word_mode, scratch_reg);
2013 
2014             emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2015             emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2016                                                                                         0)),
2017                                                                    Pmode,
2018                                                                    XEXP (XEXP (operand1, 0),
2019                                                                    0),
2020                                                                    scratch_reg));
2021 
2022             /* Now we are going to load the scratch register from memory,
2023                we want to load it in the same width as the original MEM,
2024                which must be the same as the width of the ultimate destination,
2025                OPERAND0.  */
2026             scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2027 
2028             emit_move_insn (scratch_reg,
2029                                 replace_equiv_address (operand1, scratch_reg));
2030           }
2031       else
2032           {
2033             /* We want to load the scratch register using the same mode as
2034                the ultimate destination.  */
2035             scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2036 
2037             emit_move_insn (scratch_reg, operand1);
2038           }
2039 
2040       /* And emit the insn to set the ultimate destination.  We know that
2041            the scratch register has the same mode as the destination at this
2042            point.  */
2043       emit_move_insn (operand0, scratch_reg);
2044       return 1;
2045     }
2046 
2047   /* Handle the most common case: storing into a register.  */
2048   if (register_operand (operand0, mode))
2049     {
2050       /* Legitimize TLS symbol references.  This happens for references
2051            that aren't a legitimate constant.  */
2052       if (PA_SYMBOL_REF_TLS_P (operand1))
2053           operand1 = legitimize_tls_address (operand1);
2054 
2055       if (register_operand (operand1, mode)
2056             || (GET_CODE (operand1) == CONST_INT
2057                 && pa_cint_ok_for_move (UINTVAL (operand1)))
2058             || (operand1 == CONST0_RTX (mode))
2059             || (GET_CODE (operand1) == HIGH
2060                 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2061             /* Only `general_operands' can come here, so MEM is ok.  */
2062             || GET_CODE (operand1) == MEM)
2063           {
2064             /* Various sets are created during RTL generation which don't
2065                have the REG_POINTER flag correctly set.  After the CSE pass,
2066                instruction recognition can fail if we don't consistently
2067                set this flag when performing register copies.  This should
2068                also improve the opportunities for creating insns that use
2069                unscaled indexing.  */
2070             if (REG_P (operand0) && REG_P (operand1))
2071               {
2072                 if (REG_POINTER (operand1)
2073                       && !REG_POINTER (operand0)
2074                       && !HARD_REGISTER_P (operand0))
2075                     copy_reg_pointer (operand0, operand1);
2076               }
2077 
2078             /* When MEMs are broken out, the REG_POINTER flag doesn't
2079                get set.  In some cases, we can set the REG_POINTER flag
2080                from the declaration for the MEM.  */
2081             if (REG_P (operand0)
2082                 && GET_CODE (operand1) == MEM
2083                 && !REG_POINTER (operand0))
2084               {
2085                 tree decl = MEM_EXPR (operand1);
2086 
2087                 /* Set the register pointer flag and register alignment
2088                      if the declaration for this memory reference is a
2089                      pointer type.  */
2090                 if (decl)
2091                     {
2092                       tree type;
2093 
2094                       /* If this is a COMPONENT_REF, use the FIELD_DECL from
2095                          tree operand 1.  */
2096                       if (TREE_CODE (decl) == COMPONENT_REF)
2097                         decl = TREE_OPERAND (decl, 1);
2098 
2099                       type = TREE_TYPE (decl);
2100                       type = strip_array_types (type);
2101 
2102                       if (POINTER_TYPE_P (type))
2103                         mark_reg_pointer (operand0, BITS_PER_UNIT);
2104                     }
2105               }
2106 
2107             emit_insn (gen_rtx_SET (operand0, operand1));
2108             return 1;
2109           }
2110     }
2111   else if (GET_CODE (operand0) == MEM)
2112     {
2113       if (mode == DFmode && operand1 == CONST0_RTX (mode)
2114             && !(reload_in_progress || reload_completed))
2115           {
2116             rtx temp = gen_reg_rtx (DFmode);
2117 
2118             emit_insn (gen_rtx_SET (temp, operand1));
2119             emit_insn (gen_rtx_SET (operand0, temp));
2120             return 1;
2121           }
2122       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2123           {
2124             /* Run this case quickly.  */
2125             emit_insn (gen_rtx_SET (operand0, operand1));
2126             return 1;
2127           }
2128       if (! (reload_in_progress || reload_completed))
2129           {
2130             operands[0] = validize_mem (operand0);
2131             operands[1] = operand1 = force_reg (mode, operand1);
2132           }
2133     }
2134 
2135   /* Simplify the source if we need to.
2136      Note we do have to handle function labels here, even though we do
2137      not consider them legitimate constants.  Loop optimizations can
2138      call the emit_move_xxx with one as a source.  */
2139   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2140       || (GET_CODE (operand1) == HIGH
2141             && symbolic_operand (XEXP (operand1, 0), mode))
2142       || function_label_operand (operand1, VOIDmode)
2143       || tls_referenced_p (operand1))
2144     {
2145       int ishighonly = 0;
2146 
2147       if (GET_CODE (operand1) == HIGH)
2148           {
2149             ishighonly = 1;
2150             operand1 = XEXP (operand1, 0);
2151           }
2152       if (symbolic_operand (operand1, mode))
2153           {
2154             /* Argh.  The assembler and linker can't handle arithmetic
2155                involving plabels.
2156 
2157                So we force the plabel into memory, load operand0 from
2158                the memory location, then add in the constant part.  */
2159             if ((GET_CODE (operand1) == CONST
2160                  && GET_CODE (XEXP (operand1, 0)) == PLUS
2161                  && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2162                                                     VOIDmode))
2163                 || function_label_operand (operand1, VOIDmode))
2164               {
2165                 rtx temp, const_part;
2166 
2167                 /* Figure out what (if any) scratch register to use.  */
2168                 if (reload_in_progress || reload_completed)
2169                     {
2170                       scratch_reg = scratch_reg ? scratch_reg : operand0;
2171                       /* SCRATCH_REG will hold an address and maybe the actual
2172                          data.  We want it in WORD_MODE regardless of what mode it
2173                          was originally given to us.  */
2174                       scratch_reg = force_mode (word_mode, scratch_reg);
2175                     }
2176                 else if (flag_pic)
2177                     scratch_reg = gen_reg_rtx (Pmode);
2178 
2179                 if (GET_CODE (operand1) == CONST)
2180                     {
2181                       /* Save away the constant part of the expression.  */
2182                       const_part = XEXP (XEXP (operand1, 0), 1);
2183                       gcc_assert (GET_CODE (const_part) == CONST_INT);
2184 
2185                       /* Force the function label into memory.  */
2186                       temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2187                     }
2188                 else
2189                     {
2190                       /* No constant part.  */
2191                       const_part = NULL_RTX;
2192 
2193                       /* Force the function label into memory.  */
2194                       temp = force_const_mem (mode, operand1);
2195                     }
2196 
2197 
2198                 /* Get the address of the memory location.  PIC-ify it if
2199                      necessary.  */
2200                 temp = XEXP (temp, 0);
2201                 if (flag_pic)
2202                     temp = legitimize_pic_address (temp, mode, scratch_reg);
2203 
2204                 /* Put the address of the memory location into our destination
2205                      register.  */
2206                 operands[1] = temp;
2207                 pa_emit_move_sequence (operands, mode, scratch_reg);
2208 
2209                 /* Now load from the memory location into our destination
2210                      register.  */
2211                 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2212                 pa_emit_move_sequence (operands, mode, scratch_reg);
2213 
2214                 /* And add back in the constant part.  */
2215                 if (const_part != NULL_RTX)
2216                     expand_inc (operand0, const_part);
2217 
2218                 return 1;
2219               }
2220 
2221             if (flag_pic)
2222               {
2223                 rtx_insn *insn;
2224                 rtx temp;
2225 
2226                 if (reload_in_progress || reload_completed)
2227                     {
2228                       temp = scratch_reg ? scratch_reg : operand0;
2229                       /* TEMP will hold an address and maybe the actual
2230                          data.  We want it in WORD_MODE regardless of what mode it
2231                          was originally given to us.  */
2232                       temp = force_mode (word_mode, temp);
2233                     }
2234                 else
2235                     temp = gen_reg_rtx (Pmode);
2236 
2237                 /* Force (const (plus (symbol) (const_int))) to memory
2238                    if the const_int will not fit in 14 bits.  Although
2239                      this requires a relocation, the instruction sequence
2240                      needed to load the value is shorter.  */
2241                 if (GET_CODE (operand1) == CONST
2242                            && GET_CODE (XEXP (operand1, 0)) == PLUS
2243                            && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2244                            && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2245                     {
2246                       rtx x, m = force_const_mem (mode, operand1);
2247 
2248                       x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2249                       x = replace_equiv_address (m, x);
2250                       insn = emit_move_insn (operand0, x);
2251                     }
2252                 else
2253                     {
2254                       operands[1] = legitimize_pic_address (operand1, mode, temp);
2255                       if (REG_P (operand0) && REG_P (operands[1]))
2256                         copy_reg_pointer (operand0, operands[1]);
2257                       insn = emit_move_insn (operand0, operands[1]);
2258                     }
2259 
2260                 /* Put a REG_EQUAL note on this insn.  */
2261                 set_unique_reg_note (insn, REG_EQUAL, operand1);
2262               }
2263             /* On the HPPA, references to data space are supposed to use dp,
2264                register 27, but showing it in the RTL inhibits various cse
2265                and loop optimizations.  */
2266             else
2267               {
2268                 rtx temp, set;
2269 
2270                 if (reload_in_progress || reload_completed)
2271                     {
2272                       temp = scratch_reg ? scratch_reg : operand0;
2273                       /* TEMP will hold an address and maybe the actual
2274                          data.  We want it in WORD_MODE regardless of what mode it
2275                          was originally given to us.  */
2276                       temp = force_mode (word_mode, temp);
2277                     }
2278                 else
2279                     temp = gen_reg_rtx (mode);
2280 
2281                 /* Loading a SYMBOL_REF into a register makes that register
2282                      safe to be used as the base in an indexed address.
2283 
2284                      Don't mark hard registers though.  That loses.  */
2285                 if (GET_CODE (operand0) == REG
2286                       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2287                     mark_reg_pointer (operand0, BITS_PER_UNIT);
2288                 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2289                     mark_reg_pointer (temp, BITS_PER_UNIT);
2290 
2291                 if (ishighonly)
2292                     set = gen_rtx_SET (operand0, temp);
2293                 else
2294                     set = gen_rtx_SET (operand0,
2295                                            gen_rtx_LO_SUM (mode, temp, operand1));
2296 
2297                 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2298                 emit_insn (set);
2299 
2300               }
2301             return 1;
2302           }
2303       else if (tls_referenced_p (operand1))
2304           {
2305             rtx tmp = operand1;
2306             rtx addend = NULL;
2307 
2308             if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2309               {
2310                 addend = XEXP (XEXP (tmp, 0), 1);
2311                 tmp = XEXP (XEXP (tmp, 0), 0);
2312               }
2313 
2314             gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2315             tmp = legitimize_tls_address (tmp);
2316             if (addend)
2317               {
2318                 tmp = gen_rtx_PLUS (mode, tmp, addend);
2319                 tmp = force_operand (tmp, operands[0]);
2320               }
2321             operands[1] = tmp;
2322           }
2323       else if (GET_CODE (operand1) != CONST_INT
2324                  || !pa_cint_ok_for_move (UINTVAL (operand1)))
2325           {
2326             rtx temp;
2327             rtx_insn *insn;
2328             rtx op1 = operand1;
2329             HOST_WIDE_INT value = 0;
2330             HOST_WIDE_INT insv = 0;
2331             int insert = 0;
2332 
2333             if (GET_CODE (operand1) == CONST_INT)
2334               value = INTVAL (operand1);
2335 
2336             if (TARGET_64BIT
2337                 && GET_CODE (operand1) == CONST_INT
2338                 && HOST_BITS_PER_WIDE_INT > 32
2339                 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2340               {
2341                 HOST_WIDE_INT nval;
2342 
2343                 /* Extract the low order 32 bits of the value and sign extend.
2344                      If the new value is the same as the original value, we can
2345                      can use the original value as-is.  If the new value is
2346                      different, we use it and insert the most-significant 32-bits
2347                      of the original value into the final result.  */
2348                 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2349                           ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2350                 if (value != nval)
2351                     {
2352 #if HOST_BITS_PER_WIDE_INT > 32
2353                       insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2354 #endif
2355                       insert = 1;
2356                       value = nval;
2357                       operand1 = GEN_INT (nval);
2358                     }
2359               }
2360 
2361             if (reload_in_progress || reload_completed)
2362               temp = scratch_reg ? scratch_reg : operand0;
2363             else
2364               temp = gen_reg_rtx (mode);
2365 
2366             /* We don't directly split DImode constants on 32-bit targets
2367                because PLUS uses an 11-bit immediate and the insn sequence
2368                generated is not as efficient as the one using HIGH/LO_SUM.  */
2369             if (GET_CODE (operand1) == CONST_INT
2370                 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2371                 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2372                 && !insert)
2373               {
2374                 /* Directly break constant into high and low parts.  This
2375                      provides better optimization opportunities because various
2376                      passes recognize constants split with PLUS but not LO_SUM.
2377                      We use a 14-bit signed low part except when the addition
2378                      of 0x4000 to the high part might change the sign of the
2379                      high part.  */
2380                 HOST_WIDE_INT low = value & 0x3fff;
2381                 HOST_WIDE_INT high = value & ~ 0x3fff;
2382 
2383                 if (low >= 0x2000)
2384                     {
2385                       if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2386                         high += 0x2000;
2387                       else
2388                         high += 0x4000;
2389                     }
2390 
2391                 low = value - high;
2392 
2393                 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2394                 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2395               }
2396             else
2397               {
2398                 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2399                 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2400               }
2401 
2402             insn = emit_move_insn (operands[0], operands[1]);
2403 
2404             /* Now insert the most significant 32 bits of the value
2405                into the register.  When we don't have a second register
2406                available, it could take up to nine instructions to load
2407                a 64-bit integer constant.  Prior to reload, we force
2408                constants that would take more than three instructions
2409                to load to the constant pool.  During and after reload,
2410                we have to handle all possible values.  */
2411             if (insert)
2412               {
2413                 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2414                      register and the value to be inserted is outside the
2415                      range that can be loaded with three depdi instructions.  */
2416                 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2417                     {
2418                       operand1 = GEN_INT (insv);
2419 
2420                       emit_insn (gen_rtx_SET (temp,
2421                                                     gen_rtx_HIGH (mode, operand1)));
2422                       emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2423                       if (mode == DImode)
2424                         insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2425                                                               const0_rtx, temp));
2426                       else
2427                         insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2428                                                               const0_rtx, temp));
2429                     }
2430                 else
2431                     {
2432                       int len = 5, pos = 27;
2433 
2434                       /* Insert the bits using the depdi instruction.  */
2435                       while (pos >= 0)
2436                         {
2437                           HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2438                           HOST_WIDE_INT sign = v5 < 0;
2439 
2440                           /* Left extend the insertion.  */
2441                           insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2442                           while (pos > 0 && (insv & 1) == sign)
2443                               {
2444                                 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2445                                 len += 1;
2446                                 pos -= 1;
2447                               }
2448 
2449                           if (mode == DImode)
2450                               insn = emit_insn (gen_insvdi (operand0,
2451                                                                   GEN_INT (len),
2452                                                                   GEN_INT (pos),
2453                                                                   GEN_INT (v5)));
2454                           else
2455                               insn = emit_insn (gen_insvsi (operand0,
2456                                                                   GEN_INT (len),
2457                                                                   GEN_INT (pos),
2458                                                                   GEN_INT (v5)));
2459 
2460                           len = pos > 0 && pos < 5 ? pos : 5;
2461                           pos -= len;
2462                         }
2463                     }
2464               }
2465 
2466             set_unique_reg_note (insn, REG_EQUAL, op1);
2467 
2468             return 1;
2469           }
2470     }
2471   /* Now have insn-emit do whatever it normally does.  */
2472   return 0;
2473 }
2474 
2475 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2476    it will need a link/runtime reloc).  */
2477 
2478 int
pa_reloc_needed(tree exp)2479 pa_reloc_needed (tree exp)
2480 {
2481   int reloc = 0;
2482 
2483   switch (TREE_CODE (exp))
2484     {
2485     case ADDR_EXPR:
2486       return 1;
2487 
2488     case POINTER_PLUS_EXPR:
2489     case PLUS_EXPR:
2490     case MINUS_EXPR:
2491       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2492       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2493       break;
2494 
2495     CASE_CONVERT:
2496     case NON_LVALUE_EXPR:
2497       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2498       break;
2499 
2500     case CONSTRUCTOR:
2501       {
2502           tree value;
2503           unsigned HOST_WIDE_INT ix;
2504 
2505           FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2506             if (value)
2507               reloc |= pa_reloc_needed (value);
2508       }
2509       break;
2510 
2511     case ERROR_MARK:
2512       break;
2513 
2514     default:
2515       break;
2516     }
2517   return reloc;
2518 }
2519 
2520 
2521 /* Return the best assembler insn template
2522    for moving operands[1] into operands[0] as a fullword.  */
2523 const char *
pa_singlemove_string(rtx * operands)2524 pa_singlemove_string (rtx *operands)
2525 {
2526   HOST_WIDE_INT intval;
2527 
2528   if (GET_CODE (operands[0]) == MEM)
2529     return "stw %r1,%0";
2530   if (GET_CODE (operands[1]) == MEM)
2531     return "ldw %1,%0";
2532   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2533     {
2534       long i;
2535 
2536       gcc_assert (GET_MODE (operands[1]) == SFmode);
2537 
2538       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2539            bit pattern.  */
2540       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2541 
2542       operands[1] = GEN_INT (i);
2543       /* Fall through to CONST_INT case.  */
2544     }
2545   if (GET_CODE (operands[1]) == CONST_INT)
2546     {
2547       intval = INTVAL (operands[1]);
2548 
2549       if (VAL_14_BITS_P (intval))
2550           return "ldi %1,%0";
2551       else if ((intval & 0x7ff) == 0)
2552           return "ldil L'%1,%0";
2553       else if (pa_zdepi_cint_p (intval))
2554           return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2555       else
2556           return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2557     }
2558   return "copy %1,%0";
2559 }
2560 
2561 
2562 /* Compute position (in OP[1]) and width (in OP[2])
2563    useful for copying IMM to a register using the zdepi
2564    instructions.  Store the immediate value to insert in OP[0].  */
2565 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2566 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2567 {
2568   int lsb, len;
2569 
2570   /* Find the least significant set bit in IMM.  */
2571   for (lsb = 0; lsb < 32; lsb++)
2572     {
2573       if ((imm & 1) != 0)
2574         break;
2575       imm >>= 1;
2576     }
2577 
2578   /* Choose variants based on *sign* of the 5-bit field.  */
2579   if ((imm & 0x10) == 0)
2580     len = (lsb <= 28) ? 4 : 32 - lsb;
2581   else
2582     {
2583       /* Find the width of the bitstring in IMM.  */
2584       for (len = 5; len < 32 - lsb; len++)
2585           {
2586             if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2587               break;
2588           }
2589 
2590       /* Sign extend IMM as a 5-bit value.  */
2591       imm = (imm & 0xf) - 0x10;
2592     }
2593 
2594   op[0] = imm;
2595   op[1] = 31 - lsb;
2596   op[2] = len;
2597 }
2598 
2599 /* Compute position (in OP[1]) and width (in OP[2])
2600    useful for copying IMM to a register using the depdi,z
2601    instructions.  Store the immediate value to insert in OP[0].  */
2602 
2603 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2604 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2605 {
2606   int lsb, len, maxlen;
2607 
2608   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2609 
2610   /* Find the least significant set bit in IMM.  */
2611   for (lsb = 0; lsb < maxlen; lsb++)
2612     {
2613       if ((imm & 1) != 0)
2614         break;
2615       imm >>= 1;
2616     }
2617 
2618   /* Choose variants based on *sign* of the 5-bit field.  */
2619   if ((imm & 0x10) == 0)
2620     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2621   else
2622     {
2623       /* Find the width of the bitstring in IMM.  */
2624       for (len = 5; len < maxlen - lsb; len++)
2625           {
2626             if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2627               break;
2628           }
2629 
2630       /* Extend length if host is narrow and IMM is negative.  */
2631       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2632           len += 32;
2633 
2634       /* Sign extend IMM as a 5-bit value.  */
2635       imm = (imm & 0xf) - 0x10;
2636     }
2637 
2638   op[0] = imm;
2639   op[1] = 63 - lsb;
2640   op[2] = len;
2641 }
2642 
2643 /* Output assembler code to perform a doubleword move insn
2644    with operands OPERANDS.  */
2645 
2646 const char *
pa_output_move_double(rtx * operands)2647 pa_output_move_double (rtx *operands)
2648 {
2649   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2650   rtx latehalf[2];
2651   rtx addreg0 = 0, addreg1 = 0;
2652   int highonly = 0;
2653 
2654   /* First classify both operands.  */
2655 
2656   if (REG_P (operands[0]))
2657     optype0 = REGOP;
2658   else if (offsettable_memref_p (operands[0]))
2659     optype0 = OFFSOP;
2660   else if (GET_CODE (operands[0]) == MEM)
2661     optype0 = MEMOP;
2662   else
2663     optype0 = RNDOP;
2664 
2665   if (REG_P (operands[1]))
2666     optype1 = REGOP;
2667   else if (CONSTANT_P (operands[1]))
2668     optype1 = CNSTOP;
2669   else if (offsettable_memref_p (operands[1]))
2670     optype1 = OFFSOP;
2671   else if (GET_CODE (operands[1]) == MEM)
2672     optype1 = MEMOP;
2673   else
2674     optype1 = RNDOP;
2675 
2676   /* Check for the cases that the operand constraints are not
2677      supposed to allow to happen.  */
2678   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2679 
2680   /* Handle copies between general and floating registers.  */
2681 
2682   if (optype0 == REGOP && optype1 == REGOP
2683       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2684     {
2685       if (FP_REG_P (operands[0]))
2686           {
2687             output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2688             output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2689             return "{fldds|fldd} -16(%%sp),%0";
2690           }
2691       else
2692           {
2693             output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2694             output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2695             return "{ldws|ldw} -12(%%sp),%R0";
2696           }
2697     }
2698 
2699    /* Handle auto decrementing and incrementing loads and stores
2700      specifically, since the structure of the function doesn't work
2701      for them without major modification.  Do it better when we learn
2702      this port about the general inc/dec addressing of PA.
2703      (This was written by tege.  Chide him if it doesn't work.)  */
2704 
2705   if (optype0 == MEMOP)
2706     {
2707       /* We have to output the address syntax ourselves, since print_operand
2708            doesn't deal with the addresses we want to use.  Fix this later.  */
2709 
2710       rtx addr = XEXP (operands[0], 0);
2711       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2712           {
2713             rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2714 
2715             operands[0] = XEXP (addr, 0);
2716             gcc_assert (GET_CODE (operands[1]) == REG
2717                           && GET_CODE (operands[0]) == REG);
2718 
2719             gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2720 
2721             /* No overlap between high target register and address
2722                register.  (We do this in a non-obvious way to
2723                save a register file writeback)  */
2724             if (GET_CODE (addr) == POST_INC)
2725               return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2726             return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2727           }
2728       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2729           {
2730             rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2731 
2732             operands[0] = XEXP (addr, 0);
2733             gcc_assert (GET_CODE (operands[1]) == REG
2734                           && GET_CODE (operands[0]) == REG);
2735 
2736             gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2737             /* No overlap between high target register and address
2738                register.  (We do this in a non-obvious way to save a
2739                register file writeback)  */
2740             if (GET_CODE (addr) == PRE_INC)
2741               return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2742             return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2743           }
2744     }
2745   if (optype1 == MEMOP)
2746     {
2747       /* We have to output the address syntax ourselves, since print_operand
2748            doesn't deal with the addresses we want to use.  Fix this later.  */
2749 
2750       rtx addr = XEXP (operands[1], 0);
2751       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2752           {
2753             rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2754 
2755             operands[1] = XEXP (addr, 0);
2756             gcc_assert (GET_CODE (operands[0]) == REG
2757                           && GET_CODE (operands[1]) == REG);
2758 
2759             if (!reg_overlap_mentioned_p (high_reg, addr))
2760               {
2761                 /* No overlap between high target register and address
2762                      register.  (We do this in a non-obvious way to
2763                      save a register file writeback)  */
2764                 if (GET_CODE (addr) == POST_INC)
2765                     return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2766                 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2767               }
2768             else
2769               {
2770                 /* This is an undefined situation.  We should load into the
2771                      address register *and* update that register.  Probably
2772                      we don't need to handle this at all.  */
2773                 if (GET_CODE (addr) == POST_INC)
2774                     return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2775                 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2776               }
2777           }
2778       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2779           {
2780             rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2781 
2782             operands[1] = XEXP (addr, 0);
2783             gcc_assert (GET_CODE (operands[0]) == REG
2784                           && GET_CODE (operands[1]) == REG);
2785 
2786             if (!reg_overlap_mentioned_p (high_reg, addr))
2787               {
2788                 /* No overlap between high target register and address
2789                      register.  (We do this in a non-obvious way to
2790                      save a register file writeback)  */
2791                 if (GET_CODE (addr) == PRE_INC)
2792                     return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2793                 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2794               }
2795             else
2796               {
2797                 /* This is an undefined situation.  We should load into the
2798                      address register *and* update that register.  Probably
2799                      we don't need to handle this at all.  */
2800                 if (GET_CODE (addr) == PRE_INC)
2801                     return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2802                 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2803               }
2804           }
2805       else if (GET_CODE (addr) == PLUS
2806                  && GET_CODE (XEXP (addr, 0)) == MULT)
2807           {
2808             rtx xoperands[4];
2809 
2810             /* Load address into left half of destination register.  */
2811             xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2812             xoperands[1] = XEXP (addr, 1);
2813             xoperands[2] = XEXP (XEXP (addr, 0), 0);
2814             xoperands[3] = XEXP (XEXP (addr, 0), 1);
2815             output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2816                                  xoperands);
2817             return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2818           }
2819       else if (GET_CODE (addr) == PLUS
2820                  && REG_P (XEXP (addr, 0))
2821                  && REG_P (XEXP (addr, 1)))
2822           {
2823             rtx xoperands[3];
2824 
2825             /* Load address into left half of destination register.  */
2826             xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2827             xoperands[1] = XEXP (addr, 0);
2828             xoperands[2] = XEXP (addr, 1);
2829             output_asm_insn ("{addl|add,l} %1,%2,%0",
2830                                  xoperands);
2831             return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2832           }
2833     }
2834 
2835   /* If an operand is an unoffsettable memory ref, find a register
2836      we can increment temporarily to make it refer to the second word.  */
2837 
2838   if (optype0 == MEMOP)
2839     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2840 
2841   if (optype1 == MEMOP)
2842     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2843 
2844   /* Ok, we can do one word at a time.
2845      Normally we do the low-numbered word first.
2846 
2847      In either case, set up in LATEHALF the operands to use
2848      for the high-numbered word and in some cases alter the
2849      operands in OPERANDS to be suitable for the low-numbered word.  */
2850 
2851   if (optype0 == REGOP)
2852     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2853   else if (optype0 == OFFSOP)
2854     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2855   else
2856     latehalf[0] = operands[0];
2857 
2858   if (optype1 == REGOP)
2859     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2860   else if (optype1 == OFFSOP)
2861     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2862   else if (optype1 == CNSTOP)
2863     {
2864       if (GET_CODE (operands[1]) == HIGH)
2865           {
2866             operands[1] = XEXP (operands[1], 0);
2867             highonly = 1;
2868           }
2869       split_double (operands[1], &operands[1], &latehalf[1]);
2870     }
2871   else
2872     latehalf[1] = operands[1];
2873 
2874   /* If the first move would clobber the source of the second one,
2875      do them in the other order.
2876 
2877      This can happen in two cases:
2878 
2879           mem -> register where the first half of the destination register
2880           is the same register used in the memory's address.  Reload
2881           can create such insns.
2882 
2883           mem in this case will be either register indirect or register
2884           indirect plus a valid offset.
2885 
2886           register -> register move where REGNO(dst) == REGNO(src + 1)
2887           someone (Tim/Tege?) claimed this can happen for parameter loads.
2888 
2889      Handle mem -> register case first.  */
2890   if (optype0 == REGOP
2891       && (optype1 == MEMOP || optype1 == OFFSOP)
2892       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2893     {
2894       /* Do the late half first.  */
2895       if (addreg1)
2896           output_asm_insn ("ldo 4(%0),%0", &addreg1);
2897       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2898 
2899       /* Then clobber.  */
2900       if (addreg1)
2901           output_asm_insn ("ldo -4(%0),%0", &addreg1);
2902       return pa_singlemove_string (operands);
2903     }
2904 
2905   /* Now handle register -> register case.  */
2906   if (optype0 == REGOP && optype1 == REGOP
2907       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2908     {
2909       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2910       return pa_singlemove_string (operands);
2911     }
2912 
2913   /* Normal case: do the two words, low-numbered first.  */
2914 
2915   output_asm_insn (pa_singlemove_string (operands), operands);
2916 
2917   /* Make any unoffsettable addresses point at high-numbered word.  */
2918   if (addreg0)
2919     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2920   if (addreg1)
2921     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2922 
2923   /* Do high-numbered word.  */
2924   if (highonly)
2925     output_asm_insn ("ldil L'%1,%0", latehalf);
2926   else
2927     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2928 
2929   /* Undo the adds we just did.  */
2930   if (addreg0)
2931     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2932   if (addreg1)
2933     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2934 
2935   return "";
2936 }
2937 
2938 const char *
pa_output_fp_move_double(rtx * operands)2939 pa_output_fp_move_double (rtx *operands)
2940 {
2941   if (FP_REG_P (operands[0]))
2942     {
2943       if (FP_REG_P (operands[1])
2944             || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2945           output_asm_insn ("fcpy,dbl %f1,%0", operands);
2946       else
2947           output_asm_insn ("fldd%F1 %1,%0", operands);
2948     }
2949   else if (FP_REG_P (operands[1]))
2950     {
2951       output_asm_insn ("fstd%F0 %1,%0", operands);
2952     }
2953   else
2954     {
2955       rtx xoperands[2];
2956 
2957       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2958 
2959       /* This is a pain.  You have to be prepared to deal with an
2960            arbitrary address here including pre/post increment/decrement.
2961 
2962            so avoid this in the MD.  */
2963       gcc_assert (GET_CODE (operands[0]) == REG);
2964 
2965       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2966       xoperands[0] = operands[0];
2967       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2968     }
2969   return "";
2970 }
2971 
2972 /* Return a REG that occurs in ADDR with coefficient 1.
2973    ADDR can be effectively incremented by incrementing REG.  */
2974 
2975 static rtx
find_addr_reg(rtx addr)2976 find_addr_reg (rtx addr)
2977 {
2978   while (GET_CODE (addr) == PLUS)
2979     {
2980       if (GET_CODE (XEXP (addr, 0)) == REG)
2981           addr = XEXP (addr, 0);
2982       else if (GET_CODE (XEXP (addr, 1)) == REG)
2983           addr = XEXP (addr, 1);
2984       else if (CONSTANT_P (XEXP (addr, 0)))
2985           addr = XEXP (addr, 1);
2986       else if (CONSTANT_P (XEXP (addr, 1)))
2987           addr = XEXP (addr, 0);
2988       else
2989           gcc_unreachable ();
2990     }
2991   gcc_assert (GET_CODE (addr) == REG);
2992   return addr;
2993 }
2994 
2995 /* Emit code to perform a block move.
2996 
2997    OPERANDS[0] is the destination pointer as a REG, clobbered.
2998    OPERANDS[1] is the source pointer as a REG, clobbered.
2999    OPERANDS[2] is a register for temporary storage.
3000    OPERANDS[3] is a register for temporary storage.
3001    OPERANDS[4] is the size as a CONST_INT
3002    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3003    OPERANDS[6] is another temporary register.  */
3004 
3005 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3006 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3007 {
3008   HOST_WIDE_INT align = INTVAL (operands[5]);
3009   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
3010 
3011   /* We can't move more than a word at a time because the PA
3012      has no longer integer move insns.  (Could use fp mem ops?)  */
3013   if (align > (TARGET_64BIT ? 8 : 4))
3014     align = (TARGET_64BIT ? 8 : 4);
3015 
3016   /* Note that we know each loop below will execute at least twice
3017      (else we would have open-coded the copy).  */
3018   switch (align)
3019     {
3020       case 8:
3021           /* Pre-adjust the loop counter.  */
3022           operands[4] = GEN_INT (n_bytes - 16);
3023           output_asm_insn ("ldi %4,%2", operands);
3024 
3025           /* Copying loop.  */
3026           output_asm_insn ("ldd,ma 8(%1),%3", operands);
3027           output_asm_insn ("ldd,ma 8(%1),%6", operands);
3028           output_asm_insn ("std,ma %3,8(%0)", operands);
3029           output_asm_insn ("addib,>= -16,%2,.-12", operands);
3030           output_asm_insn ("std,ma %6,8(%0)", operands);
3031 
3032           /* Handle the residual.  There could be up to 7 bytes of
3033              residual to copy!  */
3034           if (n_bytes % 16 != 0)
3035             {
3036               operands[4] = GEN_INT (n_bytes % 8);
3037               if (n_bytes % 16 >= 8)
3038                 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3039               if (n_bytes % 8 != 0)
3040                 output_asm_insn ("ldd 0(%1),%6", operands);
3041               if (n_bytes % 16 >= 8)
3042                 output_asm_insn ("std,ma %3,8(%0)", operands);
3043               if (n_bytes % 8 != 0)
3044                 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3045             }
3046           return "";
3047 
3048       case 4:
3049           /* Pre-adjust the loop counter.  */
3050           operands[4] = GEN_INT (n_bytes - 8);
3051           output_asm_insn ("ldi %4,%2", operands);
3052 
3053           /* Copying loop.  */
3054           output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3055           output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3056           output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3057           output_asm_insn ("addib,>= -8,%2,.-12", operands);
3058           output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3059 
3060           /* Handle the residual.  There could be up to 7 bytes of
3061              residual to copy!  */
3062           if (n_bytes % 8 != 0)
3063             {
3064               operands[4] = GEN_INT (n_bytes % 4);
3065               if (n_bytes % 8 >= 4)
3066                 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3067               if (n_bytes % 4 != 0)
3068                 output_asm_insn ("ldw 0(%1),%6", operands);
3069               if (n_bytes % 8 >= 4)
3070                 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3071               if (n_bytes % 4 != 0)
3072                 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3073             }
3074           return "";
3075 
3076       case 2:
3077           /* Pre-adjust the loop counter.  */
3078           operands[4] = GEN_INT (n_bytes - 4);
3079           output_asm_insn ("ldi %4,%2", operands);
3080 
3081           /* Copying loop.  */
3082           output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3083           output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3084           output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3085           output_asm_insn ("addib,>= -4,%2,.-12", operands);
3086           output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3087 
3088           /* Handle the residual.  */
3089           if (n_bytes % 4 != 0)
3090             {
3091               if (n_bytes % 4 >= 2)
3092                 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3093               if (n_bytes % 2 != 0)
3094                 output_asm_insn ("ldb 0(%1),%6", operands);
3095               if (n_bytes % 4 >= 2)
3096                 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3097               if (n_bytes % 2 != 0)
3098                 output_asm_insn ("stb %6,0(%0)", operands);
3099             }
3100           return "";
3101 
3102       case 1:
3103           /* Pre-adjust the loop counter.  */
3104           operands[4] = GEN_INT (n_bytes - 2);
3105           output_asm_insn ("ldi %4,%2", operands);
3106 
3107           /* Copying loop.  */
3108           output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3109           output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3110           output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3111           output_asm_insn ("addib,>= -2,%2,.-12", operands);
3112           output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3113 
3114           /* Handle the residual.  */
3115           if (n_bytes % 2 != 0)
3116             {
3117               output_asm_insn ("ldb 0(%1),%3", operands);
3118               output_asm_insn ("stb %3,0(%0)", operands);
3119             }
3120           return "";
3121 
3122       default:
3123           gcc_unreachable ();
3124     }
3125 }
3126 
3127 /* Count the number of insns necessary to handle this block move.
3128 
3129    Basic structure is the same as emit_block_move, except that we
3130    count insns rather than emit them.  */
3131 
3132 static int
compute_cpymem_length(rtx_insn * insn)3133 compute_cpymem_length (rtx_insn *insn)
3134 {
3135   rtx pat = PATTERN (insn);
3136   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3137   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3138   unsigned int n_insns = 0;
3139 
3140   /* We can't move more than four bytes at a time because the PA
3141      has no longer integer move insns.  (Could use fp mem ops?)  */
3142   if (align > (TARGET_64BIT ? 8 : 4))
3143     align = (TARGET_64BIT ? 8 : 4);
3144 
3145   /* The basic copying loop.  */
3146   n_insns = 6;
3147 
3148   /* Residuals.  */
3149   if (n_bytes % (2 * align) != 0)
3150     {
3151       if ((n_bytes % (2 * align)) >= align)
3152           n_insns += 2;
3153 
3154       if ((n_bytes % align) != 0)
3155           n_insns += 2;
3156     }
3157 
3158   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3159   return n_insns * 4;
3160 }
3161 
3162 /* Emit code to perform a block clear.
3163 
3164    OPERANDS[0] is the destination pointer as a REG, clobbered.
3165    OPERANDS[1] is a register for temporary storage.
3166    OPERANDS[2] is the size as a CONST_INT
3167    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3168 
3169 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3170 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3171 {
3172   HOST_WIDE_INT align = INTVAL (operands[3]);
3173   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
3174 
3175   /* We can't clear more than a word at a time because the PA
3176      has no longer integer move insns.  */
3177   if (align > (TARGET_64BIT ? 8 : 4))
3178     align = (TARGET_64BIT ? 8 : 4);
3179 
3180   /* Note that we know each loop below will execute at least twice
3181      (else we would have open-coded the copy).  */
3182   switch (align)
3183     {
3184       case 8:
3185           /* Pre-adjust the loop counter.  */
3186           operands[2] = GEN_INT (n_bytes - 16);
3187           output_asm_insn ("ldi %2,%1", operands);
3188 
3189           /* Loop.  */
3190           output_asm_insn ("std,ma %%r0,8(%0)", operands);
3191           output_asm_insn ("addib,>= -16,%1,.-4", operands);
3192           output_asm_insn ("std,ma %%r0,8(%0)", operands);
3193 
3194           /* Handle the residual.  There could be up to 7 bytes of
3195              residual to copy!  */
3196           if (n_bytes % 16 != 0)
3197             {
3198               operands[2] = GEN_INT (n_bytes % 8);
3199               if (n_bytes % 16 >= 8)
3200                 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3201               if (n_bytes % 8 != 0)
3202                 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3203             }
3204           return "";
3205 
3206       case 4:
3207           /* Pre-adjust the loop counter.  */
3208           operands[2] = GEN_INT (n_bytes - 8);
3209           output_asm_insn ("ldi %2,%1", operands);
3210 
3211           /* Loop.  */
3212           output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3213           output_asm_insn ("addib,>= -8,%1,.-4", operands);
3214           output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3215 
3216           /* Handle the residual.  There could be up to 7 bytes of
3217              residual to copy!  */
3218           if (n_bytes % 8 != 0)
3219             {
3220               operands[2] = GEN_INT (n_bytes % 4);
3221               if (n_bytes % 8 >= 4)
3222                 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3223               if (n_bytes % 4 != 0)
3224                 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3225             }
3226           return "";
3227 
3228       case 2:
3229           /* Pre-adjust the loop counter.  */
3230           operands[2] = GEN_INT (n_bytes - 4);
3231           output_asm_insn ("ldi %2,%1", operands);
3232 
3233           /* Loop.  */
3234           output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3235           output_asm_insn ("addib,>= -4,%1,.-4", operands);
3236           output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3237 
3238           /* Handle the residual.  */
3239           if (n_bytes % 4 != 0)
3240             {
3241               if (n_bytes % 4 >= 2)
3242                 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3243               if (n_bytes % 2 != 0)
3244                 output_asm_insn ("stb %%r0,0(%0)", operands);
3245             }
3246           return "";
3247 
3248       case 1:
3249           /* Pre-adjust the loop counter.  */
3250           operands[2] = GEN_INT (n_bytes - 2);
3251           output_asm_insn ("ldi %2,%1", operands);
3252 
3253           /* Loop.  */
3254           output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3255           output_asm_insn ("addib,>= -2,%1,.-4", operands);
3256           output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3257 
3258           /* Handle the residual.  */
3259           if (n_bytes % 2 != 0)
3260             output_asm_insn ("stb %%r0,0(%0)", operands);
3261 
3262           return "";
3263 
3264       default:
3265           gcc_unreachable ();
3266     }
3267 }
3268 
3269 /* Count the number of insns necessary to handle this block move.
3270 
3271    Basic structure is the same as emit_block_move, except that we
3272    count insns rather than emit them.  */
3273 
3274 static int
compute_clrmem_length(rtx_insn * insn)3275 compute_clrmem_length (rtx_insn *insn)
3276 {
3277   rtx pat = PATTERN (insn);
3278   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3279   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3280   unsigned int n_insns = 0;
3281 
3282   /* We can't clear more than a word at a time because the PA
3283      has no longer integer move insns.  */
3284   if (align > (TARGET_64BIT ? 8 : 4))
3285     align = (TARGET_64BIT ? 8 : 4);
3286 
3287   /* The basic loop.  */
3288   n_insns = 4;
3289 
3290   /* Residuals.  */
3291   if (n_bytes % (2 * align) != 0)
3292     {
3293       if ((n_bytes % (2 * align)) >= align)
3294           n_insns++;
3295 
3296       if ((n_bytes % align) != 0)
3297           n_insns++;
3298     }
3299 
3300   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3301   return n_insns * 4;
3302 }
3303 
3304 
3305 const char *
pa_output_and(rtx * operands)3306 pa_output_and (rtx *operands)
3307 {
3308   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3309     {
3310       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3311       int ls0, ls1, ms0, p, len;
3312 
3313       for (ls0 = 0; ls0 < 32; ls0++)
3314           if ((mask & (1 << ls0)) == 0)
3315             break;
3316 
3317       for (ls1 = ls0; ls1 < 32; ls1++)
3318           if ((mask & (1 << ls1)) != 0)
3319             break;
3320 
3321       for (ms0 = ls1; ms0 < 32; ms0++)
3322           if ((mask & (1 << ms0)) == 0)
3323             break;
3324 
3325       gcc_assert (ms0 == 32);
3326 
3327       if (ls1 == 32)
3328           {
3329             len = ls0;
3330 
3331             gcc_assert (len);
3332 
3333             operands[2] = GEN_INT (len);
3334             return "{extru|extrw,u} %1,31,%2,%0";
3335           }
3336       else
3337           {
3338             /* We could use this `depi' for the case above as well, but `depi'
3339                requires one more register file access than an `extru'.  */
3340 
3341             p = 31 - ls0;
3342             len = ls1 - ls0;
3343 
3344             operands[2] = GEN_INT (p);
3345             operands[3] = GEN_INT (len);
3346             return "{depi|depwi} 0,%2,%3,%0";
3347           }
3348     }
3349   else
3350     return "and %1,%2,%0";
3351 }
3352 
3353 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3354    storing the result in operands[0].  */
3355 const char *
pa_output_64bit_and(rtx * operands)3356 pa_output_64bit_and (rtx *operands)
3357 {
3358   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3359     {
3360       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3361       int ls0, ls1, ms0, p, len;
3362 
3363       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3364           if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3365             break;
3366 
3367       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3368           if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3369             break;
3370 
3371       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3372           if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3373             break;
3374 
3375       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3376 
3377       if (ls1 == HOST_BITS_PER_WIDE_INT)
3378           {
3379             len = ls0;
3380 
3381             gcc_assert (len);
3382 
3383             operands[2] = GEN_INT (len);
3384             return "extrd,u %1,63,%2,%0";
3385           }
3386       else
3387           {
3388             /* We could use this `depi' for the case above as well, but `depi'
3389                requires one more register file access than an `extru'.  */
3390 
3391             p = 63 - ls0;
3392             len = ls1 - ls0;
3393 
3394             operands[2] = GEN_INT (p);
3395             operands[3] = GEN_INT (len);
3396             return "depdi 0,%2,%3,%0";
3397           }
3398     }
3399   else
3400     return "and %1,%2,%0";
3401 }
3402 
3403 const char *
pa_output_ior(rtx * operands)3404 pa_output_ior (rtx *operands)
3405 {
3406   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3407   int bs0, bs1, p, len;
3408 
3409   if (INTVAL (operands[2]) == 0)
3410     return "copy %1,%0";
3411 
3412   for (bs0 = 0; bs0 < 32; bs0++)
3413     if ((mask & (1 << bs0)) != 0)
3414       break;
3415 
3416   for (bs1 = bs0; bs1 < 32; bs1++)
3417     if ((mask & (1 << bs1)) == 0)
3418       break;
3419 
3420   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3421 
3422   p = 31 - bs0;
3423   len = bs1 - bs0;
3424 
3425   operands[2] = GEN_INT (p);
3426   operands[3] = GEN_INT (len);
3427   return "{depi|depwi} -1,%2,%3,%0";
3428 }
3429 
3430 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3431    storing the result in operands[0].  */
3432 const char *
pa_output_64bit_ior(rtx * operands)3433 pa_output_64bit_ior (rtx *operands)
3434 {
3435   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3436   int bs0, bs1, p, len;
3437 
3438   if (INTVAL (operands[2]) == 0)
3439     return "copy %1,%0";
3440 
3441   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3442     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3443       break;
3444 
3445   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3446     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3447       break;
3448 
3449   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3450                 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3451 
3452   p = 63 - bs0;
3453   len = bs1 - bs0;
3454 
3455   operands[2] = GEN_INT (p);
3456   operands[3] = GEN_INT (len);
3457   return "depdi -1,%2,%3,%0";
3458 }
3459 
3460 /* Target hook for assembling integer objects.  This code handles
3461    aligned SI and DI integers specially since function references
3462    must be preceded by P%.  */
3463 
3464 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3465 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3466 {
3467   bool result;
3468   tree decl = NULL;
3469 
3470   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3471      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3472      calling output_addr_const.  Otherwise, it may call assemble_external
3473      in the midst of outputing the assembler code for the SYMBOL_REF.
3474      We restore the SYMBOL_REF_DECL after the output is done.  */
3475   if (GET_CODE (x) == SYMBOL_REF)
3476     {
3477       decl = SYMBOL_REF_DECL (x);
3478       if (decl)
3479           {
3480             assemble_external (decl);
3481             SET_SYMBOL_REF_DECL (x, NULL);
3482           }
3483     }
3484 
3485   if (size == UNITS_PER_WORD
3486       && aligned_p
3487       && function_label_operand (x, VOIDmode))
3488     {
3489       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3490 
3491       /* We don't want an OPD when generating fast indirect calls.  */
3492       if (!TARGET_FAST_INDIRECT_CALLS)
3493           fputs ("P%", asm_out_file);
3494 
3495       output_addr_const (asm_out_file, x);
3496       fputc ('\n', asm_out_file);
3497       result = true;
3498     }
3499   else
3500     result = default_assemble_integer (x, size, aligned_p);
3501 
3502   if (decl)
3503     SET_SYMBOL_REF_DECL (x, decl);
3504 
3505   return result;
3506 }
3507 
3508 /* Output an ascii string.  */
3509 void
pa_output_ascii(FILE * file,const char * p,int size)3510 pa_output_ascii (FILE *file, const char *p, int size)
3511 {
3512   int i;
3513   int chars_output;
3514   unsigned char partial_output[16];     /* Max space 4 chars can occupy.  */
3515 
3516   /* The HP assembler can only take strings of 256 characters at one
3517      time.  This is a limitation on input line length, *not* the
3518      length of the string.  Sigh.  Even worse, it seems that the
3519      restriction is in number of input characters (see \xnn &
3520      \whatever).  So we have to do this very carefully.  */
3521 
3522   fputs ("\t.STRING \"", file);
3523 
3524   chars_output = 0;
3525   for (i = 0; i < size; i += 4)
3526     {
3527       int co = 0;
3528       int io = 0;
3529       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3530           {
3531             unsigned int c = (unsigned char) p[i + io];
3532 
3533             if (c == '\"' || c == '\\')
3534               partial_output[co++] = '\\';
3535             if (c >= ' ' && c < 0177)
3536               partial_output[co++] = c;
3537             else
3538               {
3539                 unsigned int hexd;
3540                 partial_output[co++] = '\\';
3541                 partial_output[co++] = 'x';
3542                 hexd =  c  / 16 - 0 + '0';
3543                 if (hexd > '9')
3544                     hexd -= '9' - 'a' + 1;
3545                 partial_output[co++] = hexd;
3546                 hexd =  c % 16 - 0 + '0';
3547                 if (hexd > '9')
3548                     hexd -= '9' - 'a' + 1;
3549                 partial_output[co++] = hexd;
3550               }
3551           }
3552       if (chars_output + co > 243)
3553           {
3554             fputs ("\"\n\t.STRING \"", file);
3555             chars_output = 0;
3556           }
3557       fwrite (partial_output, 1, (size_t) co, file);
3558       chars_output += co;
3559       co = 0;
3560     }
3561   fputs ("\"\n", file);
3562 }
3563 
3564 /* Try to rewrite floating point comparisons & branches to avoid
3565    useless add,tr insns.
3566 
3567    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3568    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3569    first attempt to remove useless add,tr insns.  It is zero
3570    for the second pass as reorg sometimes leaves bogus REG_DEAD
3571    notes lying around.
3572 
3573    When CHECK_NOTES is zero we can only eliminate add,tr insns
3574    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3575    instructions.  */
3576 static void
remove_useless_addtr_insns(int check_notes)3577 remove_useless_addtr_insns (int check_notes)
3578 {
3579   rtx_insn *insn;
3580   static int pass = 0;
3581 
3582   /* This is fairly cheap, so always run it when optimizing.  */
3583   if (optimize > 0)
3584     {
3585       int fcmp_count = 0;
3586       int fbranch_count = 0;
3587 
3588       /* Walk all the insns in this function looking for fcmp & fbranch
3589            instructions.  Keep track of how many of each we find.  */
3590       for (insn = get_insns (); insn; insn = next_insn (insn))
3591           {
3592             rtx tmp;
3593 
3594             /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3595             if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3596               continue;
3597 
3598             tmp = PATTERN (insn);
3599 
3600             /* It must be a set.  */
3601             if (GET_CODE (tmp) != SET)
3602               continue;
3603 
3604             /* If the destination is CCFP, then we've found an fcmp insn.  */
3605             tmp = SET_DEST (tmp);
3606             if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3607               {
3608                 fcmp_count++;
3609                 continue;
3610               }
3611 
3612             tmp = PATTERN (insn);
3613             /* If this is an fbranch instruction, bump the fbranch counter.  */
3614             if (GET_CODE (tmp) == SET
3615                 && SET_DEST (tmp) == pc_rtx
3616                 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3617                 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3618                 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3619                 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3620               {
3621                 fbranch_count++;
3622                 continue;
3623               }
3624           }
3625 
3626 
3627       /* Find all floating point compare + branch insns.  If possible,
3628            reverse the comparison & the branch to avoid add,tr insns.  */
3629       for (insn = get_insns (); insn; insn = next_insn (insn))
3630           {
3631             rtx tmp;
3632             rtx_insn *next;
3633 
3634             /* Ignore anything that isn't an INSN.  */
3635             if (! NONJUMP_INSN_P (insn))
3636               continue;
3637 
3638             tmp = PATTERN (insn);
3639 
3640             /* It must be a set.  */
3641             if (GET_CODE (tmp) != SET)
3642               continue;
3643 
3644             /* The destination must be CCFP, which is register zero.  */
3645             tmp = SET_DEST (tmp);
3646             if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3647               continue;
3648 
3649             /* INSN should be a set of CCFP.
3650 
3651                See if the result of this insn is used in a reversed FP
3652                conditional branch.  If so, reverse our condition and
3653                the branch.  Doing so avoids useless add,tr insns.  */
3654             next = next_insn (insn);
3655             while (next)
3656               {
3657                 /* Jumps, calls and labels stop our search.  */
3658                 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3659                     break;
3660 
3661                 /* As does another fcmp insn.  */
3662                 if (NONJUMP_INSN_P (next)
3663                       && GET_CODE (PATTERN (next)) == SET
3664                       && GET_CODE (SET_DEST (PATTERN (next))) == REG
3665                       && REGNO (SET_DEST (PATTERN (next))) == 0)
3666                     break;
3667 
3668                 next = next_insn (next);
3669               }
3670 
3671             /* Is NEXT_INSN a branch?  */
3672             if (next && JUMP_P (next))
3673               {
3674                 rtx pattern = PATTERN (next);
3675 
3676                 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3677                      and CCFP dies, then reverse our conditional and the branch
3678                      to avoid the add,tr.  */
3679                 if (GET_CODE (pattern) == SET
3680                       && SET_DEST (pattern) == pc_rtx
3681                       && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3682                       && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3683                       && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3684                       && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3685                       && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3686                       && (fcmp_count == fbranch_count
3687                           || (check_notes
3688                                 && find_regno_note (next, REG_DEAD, 0))))
3689                     {
3690                       /* Reverse the branch.  */
3691                       tmp = XEXP (SET_SRC (pattern), 1);
3692                       XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3693                       XEXP (SET_SRC (pattern), 2) = tmp;
3694                       INSN_CODE (next) = -1;
3695 
3696                       /* Reverse our condition.  */
3697                       tmp = PATTERN (insn);
3698                       PUT_CODE (XEXP (tmp, 1),
3699                                   (reverse_condition_maybe_unordered
3700                                    (GET_CODE (XEXP (tmp, 1)))));
3701                     }
3702               }
3703           }
3704     }
3705 
3706   pass = !pass;
3707 
3708 }
3709 
3710 /* You may have trouble believing this, but this is the 32 bit HP-PA
3711    stack layout.  Wow.
3712 
3713    Offset           Contents
3714 
3715    Variable arguments         (optional; any number may be allocated)
3716 
3717    SP-(4*(N+9))               arg word N
3718           :                       :
3719       SP-56                   arg word 5
3720       SP-52                   arg word 4
3721 
3722    Fixed arguments  (must be allocated; may remain unused)
3723 
3724       SP-48                   arg word 3
3725       SP-44                   arg word 2
3726       SP-40                   arg word 1
3727       SP-36                   arg word 0
3728 
3729    Frame Marker
3730 
3731       SP-32                   External Data Pointer (DP)
3732       SP-28                   External sr4
3733       SP-24                   External/stub RP (RP')
3734       SP-20                   Current RP
3735       SP-16                   Static Link
3736       SP-12                   Clean up
3737       SP-8                    Calling Stub RP (RP'')
3738       SP-4                    Previous SP
3739 
3740    Top of Frame
3741 
3742       SP-0                    Stack Pointer (points to next available address)
3743 
3744 */
3745 
3746 /* This function saves registers as follows.  Registers marked with ' are
3747    this function's registers (as opposed to the previous function's).
3748    If a frame_pointer isn't needed, r4 is saved as a general register;
3749    the space for the frame pointer is still allocated, though, to keep
3750    things simple.
3751 
3752 
3753    Top of Frame
3754 
3755        SP (FP')               Previous FP
3756        SP + 4                 Alignment filler (sigh)
3757        SP + 8                 Space for locals reserved here.
3758        .
3759        .
3760        .
3761        SP + n                 All call saved register used.
3762        .
3763        .
3764        .
3765        SP + o                 All call saved fp registers used.
3766        .
3767        .
3768        .
3769        SP + p (SP') points to next available address.
3770 
3771 */
3772 
3773 /* Global variables set by output_function_prologue().  */
3774 /* Size of frame.  Need to know this to emit return insns from
3775    leaf procedures.  */
3776 static HOST_WIDE_INT actual_fsize, local_fsize;
3777 static int save_fregs;
3778 
3779 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3780    Handle case where DISP > 8k by using the add_high_const patterns.
3781 
3782    Note in DISP > 8k case, we will leave the high part of the address
3783    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3784 
3785 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3786 store_reg (int reg, HOST_WIDE_INT disp, int base)
3787 {
3788   rtx dest, src, basereg;
3789   rtx_insn *insn;
3790 
3791   src = gen_rtx_REG (word_mode, reg);
3792   basereg = gen_rtx_REG (Pmode, base);
3793   if (VAL_14_BITS_P (disp))
3794     {
3795       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3796       insn = emit_move_insn (dest, src);
3797     }
3798   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3799     {
3800       rtx delta = GEN_INT (disp);
3801       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3802 
3803       emit_move_insn (tmpreg, delta);
3804       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3805       if (DO_FRAME_NOTES)
3806           {
3807             add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3808                               gen_rtx_SET (tmpreg,
3809                                              gen_rtx_PLUS (Pmode, basereg, delta)));
3810             RTX_FRAME_RELATED_P (insn) = 1;
3811           }
3812       dest = gen_rtx_MEM (word_mode, tmpreg);
3813       insn = emit_move_insn (dest, src);
3814     }
3815   else
3816     {
3817       rtx delta = GEN_INT (disp);
3818       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3819       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3820 
3821       emit_move_insn (tmpreg, high);
3822       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3823       insn = emit_move_insn (dest, src);
3824       if (DO_FRAME_NOTES)
3825           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3826                           gen_rtx_SET (gen_rtx_MEM (word_mode,
3827                                                             gen_rtx_PLUS (word_mode,
3828                                                                             basereg,
3829                                                                             delta)),
3830                                            src));
3831     }
3832 
3833   if (DO_FRAME_NOTES)
3834     RTX_FRAME_RELATED_P (insn) = 1;
3835 }
3836 
3837 /* Emit RTL to store REG at the memory location specified by BASE and then
3838    add MOD to BASE.  MOD must be <= 8k.  */
3839 
3840 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3841 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3842 {
3843   rtx basereg, srcreg, delta;
3844   rtx_insn *insn;
3845 
3846   gcc_assert (VAL_14_BITS_P (mod));
3847 
3848   basereg = gen_rtx_REG (Pmode, base);
3849   srcreg = gen_rtx_REG (word_mode, reg);
3850   delta = GEN_INT (mod);
3851 
3852   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3853   if (DO_FRAME_NOTES)
3854     {
3855       RTX_FRAME_RELATED_P (insn) = 1;
3856 
3857       /* RTX_FRAME_RELATED_P must be set on each frame related set
3858            in a parallel with more than one element.  */
3859       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3860       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3861     }
3862 }
3863 
3864 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3865    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3866    whether to add a frame note or not.
3867 
3868    In the DISP > 8k case, we leave the high part of the address in %r1.
3869    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3870 
3871 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3872 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3873 {
3874   rtx_insn *insn;
3875 
3876   if (VAL_14_BITS_P (disp))
3877     {
3878       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3879                                    plus_constant (Pmode,
3880                                                       gen_rtx_REG (Pmode, base), disp));
3881     }
3882   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3883     {
3884       rtx basereg = gen_rtx_REG (Pmode, base);
3885       rtx delta = GEN_INT (disp);
3886       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3887 
3888       emit_move_insn (tmpreg, delta);
3889       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3890                                    gen_rtx_PLUS (Pmode, tmpreg, basereg));
3891       if (DO_FRAME_NOTES)
3892           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3893                           gen_rtx_SET (tmpreg,
3894                                            gen_rtx_PLUS (Pmode, basereg, delta)));
3895     }
3896   else
3897     {
3898       rtx basereg = gen_rtx_REG (Pmode, base);
3899       rtx delta = GEN_INT (disp);
3900       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3901 
3902       emit_move_insn (tmpreg,
3903                           gen_rtx_PLUS (Pmode, basereg,
3904                                             gen_rtx_HIGH (Pmode, delta)));
3905       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3906                                    gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3907     }
3908 
3909   if (DO_FRAME_NOTES && note)
3910     RTX_FRAME_RELATED_P (insn) = 1;
3911 }
3912 
3913 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3914 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3915 {
3916   int freg_saved = 0;
3917   int i, j;
3918 
3919   /* The code in pa_expand_prologue and pa_expand_epilogue must
3920      be consistent with the rounding and size calculation done here.
3921      Change them at the same time.  */
3922 
3923   /* We do our own stack alignment.  First, round the size of the
3924      stack locals up to a word boundary.  */
3925   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3926 
3927   /* Space for previous frame pointer + filler.  If any frame is
3928      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3929      waste some space here for the sake of HP compatibility.  The
3930      first slot is only used when the frame pointer is needed.  */
3931   if (size || frame_pointer_needed)
3932     size += pa_starting_frame_offset ();
3933 
3934   /* If the current function calls __builtin_eh_return, then we need
3935      to allocate stack space for registers that will hold data for
3936      the exception handler.  */
3937   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3938     {
3939       unsigned int i;
3940 
3941       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3942           continue;
3943       size += i * UNITS_PER_WORD;
3944     }
3945 
3946   /* Account for space used by the callee general register saves.  */
3947   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3948     if (df_regs_ever_live_p (i))
3949       size += UNITS_PER_WORD;
3950 
3951   /* Account for space used by the callee floating point register saves.  */
3952   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3953     if (df_regs_ever_live_p (i)
3954           || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3955       {
3956           freg_saved = 1;
3957 
3958           /* We always save both halves of the FP register, so always
3959              increment the frame size by 8 bytes.  */
3960           size += 8;
3961       }
3962 
3963   /* If any of the floating registers are saved, account for the
3964      alignment needed for the floating point register save block.  */
3965   if (freg_saved)
3966     {
3967       size = (size + 7) & ~7;
3968       if (fregs_live)
3969           *fregs_live = 1;
3970     }
3971 
3972   /* The various ABIs include space for the outgoing parameters in the
3973      size of the current function's stack frame.  We don't need to align
3974      for the outgoing arguments as their alignment is set by the final
3975      rounding for the frame as a whole.  */
3976   size += crtl->outgoing_args_size;
3977 
3978   /* Allocate space for the fixed frame marker.  This space must be
3979      allocated for any function that makes calls or allocates
3980      stack space.  */
3981   if (!crtl->is_leaf || size)
3982     size += TARGET_64BIT ? 48 : 32;
3983 
3984   /* Finally, round to the preferred stack boundary.  */
3985   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3986             & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3987 }
3988 
3989 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3990 
3991 void
pa_output_function_label(FILE * file)3992 pa_output_function_label (FILE *file)
3993 {
3994   /* The function's label and associated .PROC must never be
3995      separated and must be output *after* any profiling declarations
3996      to avoid changing spaces/subspaces within a procedure.  */
3997   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3998   fputs ("\t.PROC\n", file);
3999 
4000   /* pa_expand_prologue does the dirty work now.  We just need
4001      to output the assembler directives which denote the start
4002      of a function.  */
4003   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4004   if (crtl->is_leaf)
4005     fputs (",NO_CALLS", file);
4006   else
4007     fputs (",CALLS", file);
4008   if (rp_saved)
4009     fputs (",SAVE_RP", file);
4010 
4011   /* The SAVE_SP flag is used to indicate that register %r3 is stored
4012      at the beginning of the frame and that it is used as the frame
4013      pointer for the frame.  We do this because our current frame
4014      layout doesn't conform to that specified in the HP runtime
4015      documentation and we need a way to indicate to programs such as
4016      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
4017      isn't used by HP compilers but is supported by the assembler.
4018      However, SAVE_SP is supposed to indicate that the previous stack
4019      pointer has been saved in the frame marker.  */
4020   if (frame_pointer_needed)
4021     fputs (",SAVE_SP", file);
4022 
4023   /* Pass on information about the number of callee register saves
4024      performed in the prologue.
4025 
4026      The compiler is supposed to pass the highest register number
4027      saved, the assembler then has to adjust that number before
4028      entering it into the unwind descriptor (to account for any
4029      caller saved registers with lower register numbers than the
4030      first callee saved register).  */
4031   if (gr_saved)
4032     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4033 
4034   if (fr_saved)
4035     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4036 
4037   fputs ("\n\t.ENTRY\n", file);
4038 }
4039 
4040 /* Output function prologue.  */
4041 
4042 static void
pa_output_function_prologue(FILE * file)4043 pa_output_function_prologue (FILE *file)
4044 {
4045   pa_output_function_label (file);
4046   remove_useless_addtr_insns (0);
4047 }
4048 
4049 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
4050 
4051 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4052 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4053 {
4054   remove_useless_addtr_insns (0);
4055 }
4056 
4057 void
pa_expand_prologue(void)4058 pa_expand_prologue (void)
4059 {
4060   int merge_sp_adjust_with_store = 0;
4061   HOST_WIDE_INT size = get_frame_size ();
4062   HOST_WIDE_INT offset;
4063   int i;
4064   rtx tmpreg;
4065   rtx_insn *insn;
4066 
4067   gr_saved = 0;
4068   fr_saved = 0;
4069   save_fregs = 0;
4070 
4071   /* Compute total size for frame pointer, filler, locals and rounding to
4072      the next word boundary.  Similar code appears in pa_compute_frame_size
4073      and must be changed in tandem with this code.  */
4074   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4075   if (local_fsize || frame_pointer_needed)
4076     local_fsize += pa_starting_frame_offset ();
4077 
4078   actual_fsize = pa_compute_frame_size (size, &save_fregs);
4079   if (flag_stack_usage_info)
4080     current_function_static_stack_size = actual_fsize;
4081 
4082   /* Compute a few things we will use often.  */
4083   tmpreg = gen_rtx_REG (word_mode, 1);
4084 
4085   /* Save RP first.  The calling conventions manual states RP will
4086      always be stored into the caller's frame at sp - 20 or sp - 16
4087      depending on which ABI is in use.  */
4088   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4089     {
4090       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4091       rp_saved = true;
4092     }
4093   else
4094     rp_saved = false;
4095 
4096   /* Allocate the local frame and set up the frame pointer if needed.  */
4097   if (actual_fsize != 0)
4098     {
4099       if (frame_pointer_needed)
4100           {
4101             /* Copy the old frame pointer temporarily into %r1.  Set up the
4102                new stack pointer, then store away the saved old frame pointer
4103                into the stack at sp and at the same time update the stack
4104                pointer by actual_fsize bytes.  Two versions, first
4105                handles small (<8k) frames.  The second handles large (>=8k)
4106                frames.  */
4107             insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4108             if (DO_FRAME_NOTES)
4109               RTX_FRAME_RELATED_P (insn) = 1;
4110 
4111             insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4112             if (DO_FRAME_NOTES)
4113               RTX_FRAME_RELATED_P (insn) = 1;
4114 
4115             if (VAL_14_BITS_P (actual_fsize))
4116               store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4117             else
4118               {
4119                 /* It is incorrect to store the saved frame pointer at *sp,
4120                      then increment sp (writes beyond the current stack boundary).
4121 
4122                      So instead use stwm to store at *sp and post-increment the
4123                      stack pointer as an atomic operation.  Then increment sp to
4124                      finish allocating the new frame.  */
4125                 HOST_WIDE_INT adjust1 = 8192 - 64;
4126                 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4127 
4128                 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4129                 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4130                                     adjust2, 1);
4131               }
4132 
4133             /* We set SAVE_SP in frames that need a frame pointer.  Thus,
4134                we need to store the previous stack pointer (frame pointer)
4135                into the frame marker on targets that use the HP unwind
4136                library.  This allows the HP unwind library to be used to
4137                unwind GCC frames.  However, we are not fully compatible
4138                with the HP library because our frame layout differs from
4139                that specified in the HP runtime specification.
4140 
4141                We don't want a frame note on this instruction as the frame
4142                marker moves during dynamic stack allocation.
4143 
4144                This instruction also serves as a blockage to prevent
4145                register spills from being scheduled before the stack
4146                pointer is raised.  This is necessary as we store
4147                registers using the frame pointer as a base register,
4148                and the frame pointer is set before sp is raised.  */
4149             if (TARGET_HPUX_UNWIND_LIBRARY)
4150               {
4151                 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4152                                                GEN_INT (TARGET_64BIT ? -8 : -4));
4153 
4154                 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4155                                     hard_frame_pointer_rtx);
4156               }
4157             else
4158               emit_insn (gen_blockage ());
4159           }
4160       /* no frame pointer needed.  */
4161       else
4162           {
4163             /* In some cases we can perform the first callee register save
4164                and allocating the stack frame at the same time.   If so, just
4165                make a note of it and defer allocating the frame until saving
4166                the callee registers.  */
4167             if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4168               merge_sp_adjust_with_store = 1;
4169             /* Cannot optimize.  Adjust the stack frame by actual_fsize
4170                bytes.  */
4171             else
4172               set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4173                                   actual_fsize, 1);
4174           }
4175     }
4176 
4177   /* Normal register save.
4178 
4179      Do not save the frame pointer in the frame_pointer_needed case.  It
4180      was done earlier.  */
4181   if (frame_pointer_needed)
4182     {
4183       offset = local_fsize;
4184 
4185       /* Saving the EH return data registers in the frame is the simplest
4186            way to get the frame unwind information emitted.  We put them
4187            just before the general registers.  */
4188       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4189           {
4190             unsigned int i, regno;
4191 
4192             for (i = 0; ; ++i)
4193               {
4194                 regno = EH_RETURN_DATA_REGNO (i);
4195                 if (regno == INVALID_REGNUM)
4196                     break;
4197 
4198                 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4199                 offset += UNITS_PER_WORD;
4200               }
4201           }
4202 
4203       for (i = 18; i >= 4; i--)
4204           if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4205             {
4206               store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4207               offset += UNITS_PER_WORD;
4208               gr_saved++;
4209             }
4210       /* Account for %r3 which is saved in a special place.  */
4211       gr_saved++;
4212     }
4213   /* No frame pointer needed.  */
4214   else
4215     {
4216       offset = local_fsize - actual_fsize;
4217 
4218       /* Saving the EH return data registers in the frame is the simplest
4219          way to get the frame unwind information emitted.  */
4220       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4221           {
4222             unsigned int i, regno;
4223 
4224             for (i = 0; ; ++i)
4225               {
4226                 regno = EH_RETURN_DATA_REGNO (i);
4227                 if (regno == INVALID_REGNUM)
4228                     break;
4229 
4230                 /* If merge_sp_adjust_with_store is nonzero, then we can
4231                      optimize the first save.  */
4232                 if (merge_sp_adjust_with_store)
4233                     {
4234                       store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4235                       merge_sp_adjust_with_store = 0;
4236                     }
4237                 else
4238                     store_reg (regno, offset, STACK_POINTER_REGNUM);
4239                 offset += UNITS_PER_WORD;
4240               }
4241           }
4242 
4243       for (i = 18; i >= 3; i--)
4244           if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4245             {
4246               /* If merge_sp_adjust_with_store is nonzero, then we can
4247                  optimize the first GR save.  */
4248               if (merge_sp_adjust_with_store)
4249                 {
4250                     store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4251                     merge_sp_adjust_with_store = 0;
4252                 }
4253               else
4254                 store_reg (i, offset, STACK_POINTER_REGNUM);
4255               offset += UNITS_PER_WORD;
4256               gr_saved++;
4257             }
4258 
4259       /* If we wanted to merge the SP adjustment with a GR save, but we never
4260            did any GR saves, then just emit the adjustment here.  */
4261       if (merge_sp_adjust_with_store)
4262           set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4263                               actual_fsize, 1);
4264     }
4265 
4266   /* The hppa calling conventions say that %r19, the pic offset
4267      register, is saved at sp - 32 (in this function's frame)
4268      when generating PIC code.  FIXME:  What is the correct thing
4269      to do for functions which make no calls and allocate no
4270      frame?  Do we need to allocate a frame, or can we just omit
4271      the save?   For now we'll just omit the save.
4272 
4273      We don't want a note on this insn as the frame marker can
4274      move if there is a dynamic stack allocation.  */
4275   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4276     {
4277       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4278 
4279       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4280 
4281     }
4282 
4283   /* Align pointer properly (doubleword boundary).  */
4284   offset = (offset + 7) & ~7;
4285 
4286   /* Floating point register store.  */
4287   if (save_fregs)
4288     {
4289       rtx base;
4290 
4291       /* First get the frame or stack pointer to the start of the FP register
4292            save area.  */
4293       if (frame_pointer_needed)
4294           {
4295             set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4296             base = hard_frame_pointer_rtx;
4297           }
4298       else
4299           {
4300             set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4301             base = stack_pointer_rtx;
4302           }
4303 
4304       /* Now actually save the FP registers.  */
4305       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4306           {
4307             if (df_regs_ever_live_p (i)
4308                 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4309               {
4310                 rtx addr, reg;
4311                 rtx_insn *insn;
4312                 addr = gen_rtx_MEM (DFmode,
4313                                           gen_rtx_POST_INC (word_mode, tmpreg));
4314                 reg = gen_rtx_REG (DFmode, i);
4315                 insn = emit_move_insn (addr, reg);
4316                 if (DO_FRAME_NOTES)
4317                     {
4318                       RTX_FRAME_RELATED_P (insn) = 1;
4319                       if (TARGET_64BIT)
4320                         {
4321                           rtx mem = gen_rtx_MEM (DFmode,
4322                                                        plus_constant (Pmode, base,
4323                                                                           offset));
4324                           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4325                                             gen_rtx_SET (mem, reg));
4326                         }
4327                       else
4328                         {
4329                           rtx meml = gen_rtx_MEM (SFmode,
4330                                                         plus_constant (Pmode, base,
4331                                                                            offset));
4332                           rtx memr = gen_rtx_MEM (SFmode,
4333                                                         plus_constant (Pmode, base,
4334                                                                            offset + 4));
4335                           rtx regl = gen_rtx_REG (SFmode, i);
4336                           rtx regr = gen_rtx_REG (SFmode, i + 1);
4337                           rtx setl = gen_rtx_SET (meml, regl);
4338                           rtx setr = gen_rtx_SET (memr, regr);
4339                           rtvec vec;
4340 
4341                           RTX_FRAME_RELATED_P (setl) = 1;
4342                           RTX_FRAME_RELATED_P (setr) = 1;
4343                           vec = gen_rtvec (2, setl, setr);
4344                           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4345                                             gen_rtx_SEQUENCE (VOIDmode, vec));
4346                         }
4347                     }
4348                 offset += GET_MODE_SIZE (DFmode);
4349                 fr_saved++;
4350               }
4351           }
4352     }
4353 }
4354 
4355 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4356    Handle case where DISP > 8k by using the add_high_const patterns.  */
4357 
4358 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4359 load_reg (int reg, HOST_WIDE_INT disp, int base)
4360 {
4361   rtx dest = gen_rtx_REG (word_mode, reg);
4362   rtx basereg = gen_rtx_REG (Pmode, base);
4363   rtx src;
4364 
4365   if (VAL_14_BITS_P (disp))
4366     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4367   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4368     {
4369       rtx delta = GEN_INT (disp);
4370       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4371 
4372       emit_move_insn (tmpreg, delta);
4373       if (TARGET_DISABLE_INDEXING)
4374           {
4375             emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4376             src = gen_rtx_MEM (word_mode, tmpreg);
4377           }
4378       else
4379           src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4380     }
4381   else
4382     {
4383       rtx delta = GEN_INT (disp);
4384       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4385       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4386 
4387       emit_move_insn (tmpreg, high);
4388       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4389     }
4390 
4391   emit_move_insn (dest, src);
4392 }
4393 
4394 /* Update the total code bytes output to the text section.  */
4395 
4396 static void
update_total_code_bytes(unsigned int nbytes)4397 update_total_code_bytes (unsigned int nbytes)
4398 {
4399   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4400       && !IN_NAMED_SECTION_P (cfun->decl))
4401     {
4402       unsigned int old_total = total_code_bytes;
4403 
4404       total_code_bytes += nbytes;
4405 
4406       /* Be prepared to handle overflows.  */
4407       if (old_total > total_code_bytes)
4408         total_code_bytes = UINT_MAX;
4409     }
4410 }
4411 
4412 /* This function generates the assembly code for function exit.
4413    Args are as for output_function_prologue ().
4414 
4415    The function epilogue should not depend on the current stack
4416    pointer!  It should use the frame pointer only.  This is mandatory
4417    because of alloca; we also take advantage of it to omit stack
4418    adjustments before returning.  */
4419 
4420 static void
pa_output_function_epilogue(FILE * file)4421 pa_output_function_epilogue (FILE *file)
4422 {
4423   rtx_insn *insn = get_last_insn ();
4424   bool extra_nop;
4425 
4426   /* pa_expand_epilogue does the dirty work now.  We just need
4427      to output the assembler directives which denote the end
4428      of a function.
4429 
4430      To make debuggers happy, emit a nop if the epilogue was completely
4431      eliminated due to a volatile call as the last insn in the
4432      current function.  That way the return address (in %r2) will
4433      always point to a valid instruction in the current function.  */
4434 
4435   /* Get the last real insn.  */
4436   if (NOTE_P (insn))
4437     insn = prev_real_insn (insn);
4438 
4439   /* If it is a sequence, then look inside.  */
4440   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4441     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4442 
4443   /* If insn is a CALL_INSN, then it must be a call to a volatile
4444      function (otherwise there would be epilogue insns).  */
4445   if (insn && CALL_P (insn))
4446     {
4447       fputs ("\tnop\n", file);
4448       extra_nop = true;
4449     }
4450   else
4451     extra_nop = false;
4452 
4453   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4454 
4455   if (TARGET_SOM && TARGET_GAS)
4456     {
4457       /* We are done with this subspace except possibly for some additional
4458            debug information.  Forget that we are in this subspace to ensure
4459            that the next function is output in its own subspace.  */
4460       in_section = NULL;
4461       cfun->machine->in_nsubspa = 2;
4462     }
4463 
4464   /* Thunks do their own insn accounting.  */
4465   if (cfun->is_thunk)
4466     return;
4467 
4468   if (INSN_ADDRESSES_SET_P ())
4469     {
4470       last_address = extra_nop ? 4 : 0;
4471       insn = get_last_nonnote_insn ();
4472       if (insn)
4473           {
4474             last_address += INSN_ADDRESSES (INSN_UID (insn));
4475             if (INSN_P (insn))
4476               last_address += insn_default_length (insn);
4477           }
4478       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4479                           & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4480     }
4481   else
4482     last_address = UINT_MAX;
4483 
4484   /* Finally, update the total number of code bytes output so far.  */
4485   update_total_code_bytes (last_address);
4486 }
4487 
4488 void
pa_expand_epilogue(void)4489 pa_expand_epilogue (void)
4490 {
4491   rtx tmpreg;
4492   HOST_WIDE_INT offset;
4493   HOST_WIDE_INT ret_off = 0;
4494   int i;
4495   int merge_sp_adjust_with_load = 0;
4496 
4497   /* We will use this often.  */
4498   tmpreg = gen_rtx_REG (word_mode, 1);
4499 
4500   /* Try to restore RP early to avoid load/use interlocks when
4501      RP gets used in the return (bv) instruction.  This appears to still
4502      be necessary even when we schedule the prologue and epilogue.  */
4503   if (rp_saved)
4504     {
4505       ret_off = TARGET_64BIT ? -16 : -20;
4506       if (frame_pointer_needed)
4507           {
4508             load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4509             ret_off = 0;
4510           }
4511       else
4512           {
4513             /* No frame pointer, and stack is smaller than 8k.  */
4514             if (VAL_14_BITS_P (ret_off - actual_fsize))
4515               {
4516                 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4517                 ret_off = 0;
4518               }
4519           }
4520     }
4521 
4522   /* General register restores.  */
4523   if (frame_pointer_needed)
4524     {
4525       offset = local_fsize;
4526 
4527       /* If the current function calls __builtin_eh_return, then we need
4528          to restore the saved EH data registers.  */
4529       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4530           {
4531             unsigned int i, regno;
4532 
4533             for (i = 0; ; ++i)
4534               {
4535                 regno = EH_RETURN_DATA_REGNO (i);
4536                 if (regno == INVALID_REGNUM)
4537                     break;
4538 
4539                 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4540                 offset += UNITS_PER_WORD;
4541               }
4542           }
4543 
4544       for (i = 18; i >= 4; i--)
4545           if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4546             {
4547               load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4548               offset += UNITS_PER_WORD;
4549             }
4550     }
4551   else
4552     {
4553       offset = local_fsize - actual_fsize;
4554 
4555       /* If the current function calls __builtin_eh_return, then we need
4556          to restore the saved EH data registers.  */
4557       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4558           {
4559             unsigned int i, regno;
4560 
4561             for (i = 0; ; ++i)
4562               {
4563                 regno = EH_RETURN_DATA_REGNO (i);
4564                 if (regno == INVALID_REGNUM)
4565                     break;
4566 
4567                 /* Only for the first load.
4568                    merge_sp_adjust_with_load holds the register load
4569                    with which we will merge the sp adjustment.  */
4570                 if (merge_sp_adjust_with_load == 0
4571                       && local_fsize == 0
4572                       && VAL_14_BITS_P (-actual_fsize))
4573                   merge_sp_adjust_with_load = regno;
4574                 else
4575                     load_reg (regno, offset, STACK_POINTER_REGNUM);
4576                 offset += UNITS_PER_WORD;
4577               }
4578           }
4579 
4580       for (i = 18; i >= 3; i--)
4581           {
4582             if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4583               {
4584                 /* Only for the first load.
4585                    merge_sp_adjust_with_load holds the register load
4586                    with which we will merge the sp adjustment.  */
4587                 if (merge_sp_adjust_with_load == 0
4588                       && local_fsize == 0
4589                       && VAL_14_BITS_P (-actual_fsize))
4590                   merge_sp_adjust_with_load = i;
4591                 else
4592                     load_reg (i, offset, STACK_POINTER_REGNUM);
4593                 offset += UNITS_PER_WORD;
4594               }
4595           }
4596     }
4597 
4598   /* Align pointer properly (doubleword boundary).  */
4599   offset = (offset + 7) & ~7;
4600 
4601   /* FP register restores.  */
4602   if (save_fregs)
4603     {
4604       /* Adjust the register to index off of.  */
4605       if (frame_pointer_needed)
4606           set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4607       else
4608           set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4609 
4610       /* Actually do the restores now.  */
4611       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4612           if (df_regs_ever_live_p (i)
4613               || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4614             {
4615               rtx src = gen_rtx_MEM (DFmode,
4616                                            gen_rtx_POST_INC (word_mode, tmpreg));
4617               rtx dest = gen_rtx_REG (DFmode, i);
4618               emit_move_insn (dest, src);
4619             }
4620     }
4621 
4622   /* Emit a blockage insn here to keep these insns from being moved to
4623      an earlier spot in the epilogue, or into the main instruction stream.
4624 
4625      This is necessary as we must not cut the stack back before all the
4626      restores are finished.  */
4627   emit_insn (gen_blockage ());
4628 
4629   /* Reset stack pointer (and possibly frame pointer).  The stack
4630      pointer is initially set to fp + 64 to avoid a race condition.  */
4631   if (frame_pointer_needed)
4632     {
4633       rtx delta = GEN_INT (-64);
4634 
4635       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4636       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4637                                      stack_pointer_rtx, delta));
4638     }
4639   /* If we were deferring a callee register restore, do it now.  */
4640   else if (merge_sp_adjust_with_load)
4641     {
4642       rtx delta = GEN_INT (-actual_fsize);
4643       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4644 
4645       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4646     }
4647   else if (actual_fsize != 0)
4648     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4649                         - actual_fsize, 0);
4650 
4651   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4652      frame greater than 8k), do so now.  */
4653   if (ret_off != 0)
4654     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4655 
4656   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4657     {
4658       rtx sa = EH_RETURN_STACKADJ_RTX;
4659 
4660       emit_insn (gen_blockage ());
4661       emit_insn (TARGET_64BIT
4662                      ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4663                      : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4664     }
4665 }
4666 
4667 bool
pa_can_use_return_insn(void)4668 pa_can_use_return_insn (void)
4669 {
4670   if (!reload_completed)
4671     return false;
4672 
4673   if (frame_pointer_needed)
4674     return false;
4675 
4676   if (df_regs_ever_live_p (2))
4677     return false;
4678 
4679   if (crtl->profile)
4680     return false;
4681 
4682   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4683 }
4684 
4685 rtx
hppa_pic_save_rtx(void)4686 hppa_pic_save_rtx (void)
4687 {
4688   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4689 }
4690 
4691 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4692 #define NO_DEFERRED_PROFILE_COUNTERS 0
4693 #endif
4694 
4695 
4696 /* Vector of funcdef numbers.  */
4697 static vec<int> funcdef_nos;
4698 
4699 /* Output deferred profile counters.  */
4700 static void
output_deferred_profile_counters(void)4701 output_deferred_profile_counters (void)
4702 {
4703   unsigned int i;
4704   int align, n;
4705 
4706   if (funcdef_nos.is_empty ())
4707    return;
4708 
4709   switch_to_section (data_section);
4710   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4711   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4712 
4713   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4714     {
4715       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4716       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4717     }
4718 
4719   funcdef_nos.release ();
4720 }
4721 
4722 void
hppa_profile_hook(int label_no)4723 hppa_profile_hook (int label_no)
4724 {
4725   rtx_code_label *label_rtx = gen_label_rtx ();
4726   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4727   rtx arg_bytes, begin_label_rtx, mcount, sym;
4728   rtx_insn *call_insn;
4729   char begin_label_name[16];
4730   bool use_mcount_pcrel_call;
4731 
4732   /* Set up call destination.  */
4733   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4734   pa_encode_label (sym);
4735   mcount = gen_rtx_MEM (Pmode, sym);
4736 
4737   /* If we can reach _mcount with a pc-relative call, we can optimize
4738      loading the address of the current function.  This requires linker
4739      long branch stub support.  */
4740   if (!TARGET_PORTABLE_RUNTIME
4741       && !TARGET_LONG_CALLS
4742       && (TARGET_SOM || flag_function_sections))
4743     use_mcount_pcrel_call = TRUE;
4744   else
4745     use_mcount_pcrel_call = FALSE;
4746 
4747   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4748                                      label_no);
4749   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4750 
4751   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4752 
4753   if (!use_mcount_pcrel_call)
4754     {
4755       /* The address of the function is loaded into %r25 with an instruction-
4756            relative sequence that avoids the use of relocations.  We use SImode
4757            for the address of the function in both 32 and 64-bit code to avoid
4758            having to provide DImode versions of the lcla2 pattern.  */
4759       if (TARGET_PA_20)
4760           emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4761       else
4762           emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4763     }
4764 
4765   if (!NO_DEFERRED_PROFILE_COUNTERS)
4766     {
4767       rtx count_label_rtx, addr, r24;
4768       char count_label_name[16];
4769 
4770       funcdef_nos.safe_push (label_no);
4771       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4772       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4773                                                       ggc_strdup (count_label_name));
4774 
4775       addr = force_reg (Pmode, count_label_rtx);
4776       r24 = gen_rtx_REG (Pmode, 24);
4777       emit_move_insn (r24, addr);
4778 
4779       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4780       if (use_mcount_pcrel_call)
4781           call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4782                                                                  begin_label_rtx));
4783       else
4784           call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4785 
4786       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4787     }
4788   else
4789     {
4790       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4791       if (use_mcount_pcrel_call)
4792           call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4793                                                                  begin_label_rtx));
4794       else
4795           call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4796     }
4797 
4798   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4799   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4800 
4801   /* Indicate the _mcount call cannot throw, nor will it execute a
4802      non-local goto.  */
4803   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4804 
4805   /* Allocate space for fixed arguments.  */
4806   if (reg_parm_stack_space > crtl->outgoing_args_size)
4807     crtl->outgoing_args_size = reg_parm_stack_space;
4808 }
4809 
4810 /* Fetch the return address for the frame COUNT steps up from
4811    the current frame, after the prologue.  FRAMEADDR is the
4812    frame pointer of the COUNT frame.
4813 
4814    We want to ignore any export stub remnants here.  To handle this,
4815    we examine the code at the return address, and if it is an export
4816    stub, we return a memory rtx for the stub return address stored
4817    at frame-24.
4818 
4819    The value returned is used in two different ways:
4820 
4821           1. To find a function's caller.
4822 
4823           2. To change the return address for a function.
4824 
4825    This function handles most instances of case 1; however, it will
4826    fail if there are two levels of stubs to execute on the return
4827    path.  The only way I believe that can happen is if the return value
4828    needs a parameter relocation, which never happens for C code.
4829 
4830    This function handles most instances of case 2; however, it will
4831    fail if we did not originally have stub code on the return path
4832    but will need stub code on the new return path.  This can happen if
4833    the caller & callee are both in the main program, but the new
4834    return location is in a shared library.  */
4835 
4836 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4837 pa_return_addr_rtx (int count, rtx frameaddr)
4838 {
4839   rtx label;
4840   rtx rp;
4841   rtx saved_rp;
4842   rtx ins;
4843 
4844   /* The instruction stream at the return address of a PA1.X export stub is:
4845 
4846           0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4847           0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4848           0x00011820 | stub+16:  mtsp r1,sr0
4849           0xe0400002 | stub+20:  be,n 0(sr0,rp)
4850 
4851      0xe0400002 must be specified as -532676606 so that it won't be
4852      rejected as an invalid immediate operand on 64-bit hosts.
4853 
4854      The instruction stream at the return address of a PA2.0 export stub is:
4855 
4856           0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4857           0xe840d002 | stub+12:  bve,n (rp)
4858   */
4859 
4860   HOST_WIDE_INT insns[4];
4861   int i, len;
4862 
4863   if (count != 0)
4864     return NULL_RTX;
4865 
4866   rp = get_hard_reg_initial_val (Pmode, 2);
4867 
4868   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4869     return rp;
4870 
4871   /* If there is no export stub then just use the value saved from
4872      the return pointer register.  */
4873 
4874   saved_rp = gen_reg_rtx (Pmode);
4875   emit_move_insn (saved_rp, rp);
4876 
4877   /* Get pointer to the instruction stream.  We have to mask out the
4878      privilege level from the two low order bits of the return address
4879      pointer here so that ins will point to the start of the first
4880      instruction that would have been executed if we returned.  */
4881   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4882   label = gen_label_rtx ();
4883 
4884   if (TARGET_PA_20)
4885     {
4886       insns[0] = 0x4bc23fd1;
4887       insns[1] = -398405630;
4888       len = 2;
4889     }
4890   else
4891     {
4892       insns[0] = 0x4bc23fd1;
4893       insns[1] = 0x004010a1;
4894       insns[2] = 0x00011820;
4895       insns[3] = -532676606;
4896       len = 4;
4897     }
4898 
4899   /* Check the instruction stream at the normal return address for the
4900      export stub.  If it is an export stub, than our return address is
4901      really in -24[frameaddr].  */
4902 
4903   for (i = 0; i < len; i++)
4904     {
4905       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4906       rtx op1 = GEN_INT (insns[i]);
4907       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4908     }
4909 
4910   /* Here we know that our return address points to an export
4911      stub.  We don't want to return the address of the export stub,
4912      but rather the return address of the export stub.  That return
4913      address is stored at -24[frameaddr].  */
4914 
4915   emit_move_insn (saved_rp,
4916                       gen_rtx_MEM (Pmode,
4917                                      memory_address (Pmode,
4918                                                          plus_constant (Pmode, frameaddr,
4919                                                                             -24))));
4920 
4921   emit_label (label);
4922 
4923   return saved_rp;
4924 }
4925 
4926 void
pa_emit_bcond_fp(rtx operands[])4927 pa_emit_bcond_fp (rtx operands[])
4928 {
4929   enum rtx_code code = GET_CODE (operands[0]);
4930   rtx operand0 = operands[1];
4931   rtx operand1 = operands[2];
4932   rtx label = operands[3];
4933 
4934   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4935                               gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4936 
4937   emit_jump_insn (gen_rtx_SET (pc_rtx,
4938                                      gen_rtx_IF_THEN_ELSE (VOIDmode,
4939                                                                  gen_rtx_fmt_ee (NE,
4940                                                                             VOIDmode,
4941                                                                             gen_rtx_REG (CCFPmode, 0),
4942                                                                             const0_rtx),
4943                                                                  gen_rtx_LABEL_REF (VOIDmode, label),
4944                                                                  pc_rtx)));
4945 
4946 }
4947 
4948 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4949    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4950 
4951 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4952 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4953                     unsigned int)
4954 {
4955   enum attr_type attr_type;
4956 
4957   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4958      true dependencies as they are described with bypasses now.  */
4959   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4960     return cost;
4961 
4962   if (! recog_memoized (insn))
4963     return 0;
4964 
4965   attr_type = get_attr_type (insn);
4966 
4967   switch (dep_type)
4968     {
4969     case REG_DEP_ANTI:
4970       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4971            cycles later.  */
4972 
4973       if (attr_type == TYPE_FPLOAD)
4974           {
4975             rtx pat = PATTERN (insn);
4976             rtx dep_pat = PATTERN (dep_insn);
4977             if (GET_CODE (pat) == PARALLEL)
4978               {
4979                 /* This happens for the fldXs,mb patterns.  */
4980                 pat = XVECEXP (pat, 0, 0);
4981               }
4982             if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4983               /* If this happens, we have to extend this to schedule
4984                  optimally.  Return 0 for now.  */
4985             return 0;
4986 
4987             if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4988               {
4989                 if (! recog_memoized (dep_insn))
4990                     return 0;
4991                 switch (get_attr_type (dep_insn))
4992                     {
4993                     case TYPE_FPALU:
4994                     case TYPE_FPMULSGL:
4995                     case TYPE_FPMULDBL:
4996                     case TYPE_FPDIVSGL:
4997                     case TYPE_FPDIVDBL:
4998                     case TYPE_FPSQRTSGL:
4999                     case TYPE_FPSQRTDBL:
5000                       /* A fpload can't be issued until one cycle before a
5001                          preceding arithmetic operation has finished if
5002                          the target of the fpload is any of the sources
5003                          (or destination) of the arithmetic operation.  */
5004                       return insn_default_latency (dep_insn) - 1;
5005 
5006                     default:
5007                       return 0;
5008                     }
5009               }
5010           }
5011       else if (attr_type == TYPE_FPALU)
5012           {
5013             rtx pat = PATTERN (insn);
5014             rtx dep_pat = PATTERN (dep_insn);
5015             if (GET_CODE (pat) == PARALLEL)
5016               {
5017                 /* This happens for the fldXs,mb patterns.  */
5018                 pat = XVECEXP (pat, 0, 0);
5019               }
5020             if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5021               /* If this happens, we have to extend this to schedule
5022                  optimally.  Return 0 for now.  */
5023             return 0;
5024 
5025             if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5026               {
5027                 if (! recog_memoized (dep_insn))
5028                     return 0;
5029                 switch (get_attr_type (dep_insn))
5030                     {
5031                     case TYPE_FPDIVSGL:
5032                     case TYPE_FPDIVDBL:
5033                     case TYPE_FPSQRTSGL:
5034                     case TYPE_FPSQRTDBL:
5035                       /* An ALU flop can't be issued until two cycles before a
5036                          preceding divide or sqrt operation has finished if
5037                          the target of the ALU flop is any of the sources
5038                          (or destination) of the divide or sqrt operation.  */
5039                       return insn_default_latency (dep_insn) - 2;
5040 
5041                     default:
5042                       return 0;
5043                     }
5044               }
5045           }
5046 
5047       /* For other anti dependencies, the cost is 0.  */
5048       return 0;
5049 
5050     case REG_DEP_OUTPUT:
5051       /* Output dependency; DEP_INSN writes a register that INSN writes some
5052            cycles later.  */
5053       if (attr_type == TYPE_FPLOAD)
5054           {
5055             rtx pat = PATTERN (insn);
5056             rtx dep_pat = PATTERN (dep_insn);
5057             if (GET_CODE (pat) == PARALLEL)
5058               {
5059                 /* This happens for the fldXs,mb patterns.  */
5060                 pat = XVECEXP (pat, 0, 0);
5061               }
5062             if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5063               /* If this happens, we have to extend this to schedule
5064                  optimally.  Return 0 for now.  */
5065             return 0;
5066 
5067             if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5068               {
5069                 if (! recog_memoized (dep_insn))
5070                     return 0;
5071                 switch (get_attr_type (dep_insn))
5072                     {
5073                     case TYPE_FPALU:
5074                     case TYPE_FPMULSGL:
5075                     case TYPE_FPMULDBL:
5076                     case TYPE_FPDIVSGL:
5077                     case TYPE_FPDIVDBL:
5078                     case TYPE_FPSQRTSGL:
5079                     case TYPE_FPSQRTDBL:
5080                       /* A fpload can't be issued until one cycle before a
5081                          preceding arithmetic operation has finished if
5082                          the target of the fpload is the destination of the
5083                          arithmetic operation.
5084 
5085                          Exception: For PA7100LC, PA7200 and PA7300, the cost
5086                          is 3 cycles, unless they bundle together.   We also
5087                          pay the penalty if the second insn is a fpload.  */
5088                       return insn_default_latency (dep_insn) - 1;
5089 
5090                     default:
5091                       return 0;
5092                     }
5093               }
5094           }
5095       else if (attr_type == TYPE_FPALU)
5096           {
5097             rtx pat = PATTERN (insn);
5098             rtx dep_pat = PATTERN (dep_insn);
5099             if (GET_CODE (pat) == PARALLEL)
5100               {
5101                 /* This happens for the fldXs,mb patterns.  */
5102                 pat = XVECEXP (pat, 0, 0);
5103               }
5104             if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5105               /* If this happens, we have to extend this to schedule
5106                  optimally.  Return 0 for now.  */
5107             return 0;
5108 
5109             if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5110               {
5111                 if (! recog_memoized (dep_insn))
5112                     return 0;
5113                 switch (get_attr_type (dep_insn))
5114                     {
5115                     case TYPE_FPDIVSGL:
5116                     case TYPE_FPDIVDBL:
5117                     case TYPE_FPSQRTSGL:
5118                     case TYPE_FPSQRTDBL:
5119                       /* An ALU flop can't be issued until two cycles before a
5120                          preceding divide or sqrt operation has finished if
5121                          the target of the ALU flop is also the target of
5122                          the divide or sqrt operation.  */
5123                       return insn_default_latency (dep_insn) - 2;
5124 
5125                     default:
5126                       return 0;
5127                     }
5128               }
5129           }
5130 
5131       /* For other output dependencies, the cost is 0.  */
5132       return 0;
5133 
5134     default:
5135       gcc_unreachable ();
5136     }
5137 }
5138 
5139 /* The 700 can only issue a single insn at a time.
5140    The 7XXX processors can issue two insns at a time.
5141    The 8000 can issue 4 insns at a time.  */
5142 static int
pa_issue_rate(void)5143 pa_issue_rate (void)
5144 {
5145   switch (pa_cpu)
5146     {
5147     case PROCESSOR_700:                 return 1;
5148     case PROCESSOR_7100:      return 2;
5149     case PROCESSOR_7100LC:    return 2;
5150     case PROCESSOR_7200:      return 2;
5151     case PROCESSOR_7300:      return 2;
5152     case PROCESSOR_8000:      return 4;
5153 
5154     default:
5155       gcc_unreachable ();
5156     }
5157 }
5158 
5159 
5160 
5161 /* Return any length plus adjustment needed by INSN which already has
5162    its length computed as LENGTH.   Return LENGTH if no adjustment is
5163    necessary.
5164 
5165    Also compute the length of an inline block move here as it is too
5166    complicated to express as a length attribute in pa.md.  */
5167 int
pa_adjust_insn_length(rtx_insn * insn,int length)5168 pa_adjust_insn_length (rtx_insn *insn, int length)
5169 {
5170   rtx pat = PATTERN (insn);
5171 
5172   /* If length is negative or undefined, provide initial length.  */
5173   if ((unsigned int) length >= INT_MAX)
5174     {
5175       if (GET_CODE (pat) == SEQUENCE)
5176           insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5177 
5178       switch (get_attr_type (insn))
5179           {
5180           case TYPE_MILLI:
5181             length = pa_attr_length_millicode_call (insn);
5182             break;
5183           case TYPE_CALL:
5184             length = pa_attr_length_call (insn, 0);
5185             break;
5186           case TYPE_SIBCALL:
5187             length = pa_attr_length_call (insn, 1);
5188             break;
5189           case TYPE_DYNCALL:
5190             length = pa_attr_length_indirect_call (insn);
5191             break;
5192           case TYPE_SH_FUNC_ADRS:
5193             length = pa_attr_length_millicode_call (insn) + 20;
5194             break;
5195           default:
5196             gcc_unreachable ();
5197           }
5198     }
5199 
5200   /* Block move pattern.  */
5201   if (NONJUMP_INSN_P (insn)
5202       && GET_CODE (pat) == PARALLEL
5203       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5204       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5205       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5206       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5207       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5208     length += compute_cpymem_length (insn) - 4;
5209   /* Block clear pattern.  */
5210   else if (NONJUMP_INSN_P (insn)
5211              && GET_CODE (pat) == PARALLEL
5212              && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5213              && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5214              && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5215              && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5216     length += compute_clrmem_length (insn) - 4;
5217   /* Conditional branch with an unfilled delay slot.  */
5218   else if (JUMP_P (insn) && ! simplejump_p (insn))
5219     {
5220       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5221       if (GET_CODE (pat) == SET
5222             && length == 4
5223             && JUMP_LABEL (insn) != NULL_RTX
5224             && ! forward_branch_p (insn))
5225           length += 4;
5226       else if (GET_CODE (pat) == PARALLEL
5227                  && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5228                  && length == 4)
5229           length += 4;
5230       /* Adjust dbra insn with short backwards conditional branch with
5231            unfilled delay slot -- only for case where counter is in a
5232            general register register.  */
5233       else if (GET_CODE (pat) == PARALLEL
5234                  && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5235                  && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5236                  && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5237                  && length == 4
5238                  && ! forward_branch_p (insn))
5239           length += 4;
5240     }
5241   return length;
5242 }
5243 
5244 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5245 
5246 static bool
pa_print_operand_punct_valid_p(unsigned char code)5247 pa_print_operand_punct_valid_p (unsigned char code)
5248 {
5249   if (code == '@'
5250       || code == '#'
5251       || code == '*'
5252       || code == '^')
5253     return true;
5254 
5255   return false;
5256 }
5257 
5258 /* Print operand X (an rtx) in assembler syntax to file FILE.
5259    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5260    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5261 
5262 void
pa_print_operand(FILE * file,rtx x,int code)5263 pa_print_operand (FILE *file, rtx x, int code)
5264 {
5265   switch (code)
5266     {
5267     case '#':
5268       /* Output a 'nop' if there's nothing for the delay slot.  */
5269       if (dbr_sequence_length () == 0)
5270           fputs ("\n\tnop", file);
5271       return;
5272     case '*':
5273       /* Output a nullification completer if there's nothing for the */
5274       /* delay slot or nullification is requested.  */
5275       if (dbr_sequence_length () == 0 ||
5276             (final_sequence &&
5277              INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5278         fputs (",n", file);
5279       return;
5280     case 'R':
5281       /* Print out the second register name of a register pair.
5282            I.e., R (6) => 7.  */
5283       fputs (reg_names[REGNO (x) + 1], file);
5284       return;
5285     case 'r':
5286       /* A register or zero.  */
5287       if (x == const0_rtx
5288             || (x == CONST0_RTX (DFmode))
5289             || (x == CONST0_RTX (SFmode)))
5290           {
5291             fputs ("%r0", file);
5292             return;
5293           }
5294       else
5295           break;
5296     case 'f':
5297       /* A register or zero (floating point).  */
5298       if (x == const0_rtx
5299             || (x == CONST0_RTX (DFmode))
5300             || (x == CONST0_RTX (SFmode)))
5301           {
5302             fputs ("%fr0", file);
5303             return;
5304           }
5305       else
5306           break;
5307     case 'A':
5308       {
5309           rtx xoperands[2];
5310 
5311           xoperands[0] = XEXP (XEXP (x, 0), 0);
5312           xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5313           pa_output_global_address (file, xoperands[1], 0);
5314         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5315           return;
5316       }
5317 
5318     case 'C':                           /* Plain (C)ondition */
5319     case 'X':
5320       switch (GET_CODE (x))
5321           {
5322           case EQ:
5323             fputs ("=", file);  break;
5324           case NE:
5325             fputs ("<>", file);  break;
5326           case GT:
5327             fputs (">", file);  break;
5328           case GE:
5329             fputs (">=", file);  break;
5330           case GEU:
5331             fputs (">>=", file);  break;
5332           case GTU:
5333             fputs (">>", file);  break;
5334           case LT:
5335             fputs ("<", file);  break;
5336           case LE:
5337             fputs ("<=", file);  break;
5338           case LEU:
5339             fputs ("<<=", file);  break;
5340           case LTU:
5341             fputs ("<<", file);  break;
5342           default:
5343             gcc_unreachable ();
5344           }
5345       return;
5346     case 'N':                           /* Condition, (N)egated */
5347       switch (GET_CODE (x))
5348           {
5349           case EQ:
5350             fputs ("<>", file);  break;
5351           case NE:
5352             fputs ("=", file);  break;
5353           case GT:
5354             fputs ("<=", file);  break;
5355           case GE:
5356             fputs ("<", file);  break;
5357           case GEU:
5358             fputs ("<<", file);  break;
5359           case GTU:
5360             fputs ("<<=", file);  break;
5361           case LT:
5362             fputs (">=", file);  break;
5363           case LE:
5364             fputs (">", file);  break;
5365           case LEU:
5366             fputs (">>", file);  break;
5367           case LTU:
5368             fputs (">>=", file);  break;
5369           default:
5370             gcc_unreachable ();
5371           }
5372       return;
5373     /* For floating point comparisons.  Note that the output
5374        predicates are the complement of the desired mode.  The
5375        conditions for GT, GE, LT, LE and LTGT cause an invalid
5376        operation exception if the result is unordered and this
5377        exception is enabled in the floating-point status register.  */
5378     case 'Y':
5379       switch (GET_CODE (x))
5380           {
5381           case EQ:
5382             fputs ("!=", file);  break;
5383           case NE:
5384             fputs ("=", file);  break;
5385           case GT:
5386             fputs ("!>", file);  break;
5387           case GE:
5388             fputs ("!>=", file);  break;
5389           case LT:
5390             fputs ("!<", file);  break;
5391           case LE:
5392             fputs ("!<=", file);  break;
5393           case LTGT:
5394             fputs ("!<>", file);  break;
5395           case UNLE:
5396             fputs ("!?<=", file);  break;
5397           case UNLT:
5398             fputs ("!?<", file);  break;
5399           case UNGE:
5400             fputs ("!?>=", file);  break;
5401           case UNGT:
5402             fputs ("!?>", file);  break;
5403           case UNEQ:
5404             fputs ("!?=", file);  break;
5405           case UNORDERED:
5406             fputs ("!?", file);  break;
5407           case ORDERED:
5408             fputs ("?", file);  break;
5409           default:
5410             gcc_unreachable ();
5411           }
5412       return;
5413     case 'S':                           /* Condition, operands are (S)wapped.  */
5414       switch (GET_CODE (x))
5415           {
5416           case EQ:
5417             fputs ("=", file);  break;
5418           case NE:
5419             fputs ("<>", file);  break;
5420           case GT:
5421             fputs ("<", file);  break;
5422           case GE:
5423             fputs ("<=", file);  break;
5424           case GEU:
5425             fputs ("<<=", file);  break;
5426           case GTU:
5427             fputs ("<<", file);  break;
5428           case LT:
5429             fputs (">", file);  break;
5430           case LE:
5431             fputs (">=", file);  break;
5432           case LEU:
5433             fputs (">>=", file);  break;
5434           case LTU:
5435             fputs (">>", file);  break;
5436           default:
5437             gcc_unreachable ();
5438           }
5439       return;
5440     case 'B':                           /* Condition, (B)oth swapped and negate.  */
5441       switch (GET_CODE (x))
5442           {
5443           case EQ:
5444             fputs ("<>", file);  break;
5445           case NE:
5446             fputs ("=", file);  break;
5447           case GT:
5448             fputs (">=", file);  break;
5449           case GE:
5450             fputs (">", file);  break;
5451           case GEU:
5452             fputs (">>", file);  break;
5453           case GTU:
5454             fputs (">>=", file);  break;
5455           case LT:
5456             fputs ("<=", file);  break;
5457           case LE:
5458             fputs ("<", file);  break;
5459           case LEU:
5460             fputs ("<<", file);  break;
5461           case LTU:
5462             fputs ("<<=", file);  break;
5463           default:
5464             gcc_unreachable ();
5465           }
5466       return;
5467     case 'k':
5468       gcc_assert (GET_CODE (x) == CONST_INT);
5469       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5470       return;
5471     case 'Q':
5472       gcc_assert (GET_CODE (x) == CONST_INT);
5473       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5474       return;
5475     case 'L':
5476       gcc_assert (GET_CODE (x) == CONST_INT);
5477       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5478       return;
5479     case 'o':
5480       gcc_assert (GET_CODE (x) == CONST_INT
5481                       && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5482       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5483       return;
5484     case 'O':
5485       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5486       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5487       return;
5488     case 'p':
5489       gcc_assert (GET_CODE (x) == CONST_INT);
5490       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5491       return;
5492     case 'P':
5493       gcc_assert (GET_CODE (x) == CONST_INT);
5494       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5495       return;
5496     case 'I':
5497       if (GET_CODE (x) == CONST_INT)
5498           fputs ("i", file);
5499       return;
5500     case 'M':
5501     case 'F':
5502       switch (GET_CODE (XEXP (x, 0)))
5503           {
5504           case PRE_DEC:
5505           case PRE_INC:
5506             if (ASSEMBLER_DIALECT == 0)
5507               fputs ("s,mb", file);
5508             else
5509               fputs (",mb", file);
5510             break;
5511           case POST_DEC:
5512           case POST_INC:
5513             if (ASSEMBLER_DIALECT == 0)
5514               fputs ("s,ma", file);
5515             else
5516               fputs (",ma", file);
5517             break;
5518           case PLUS:
5519             if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5520                 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5521               {
5522                 if (ASSEMBLER_DIALECT == 0)
5523                     fputs ("x", file);
5524               }
5525             else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5526                        || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5527               {
5528                 if (ASSEMBLER_DIALECT == 0)
5529                     fputs ("x,s", file);
5530                 else
5531                     fputs (",s", file);
5532               }
5533             else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5534               fputs ("s", file);
5535             break;
5536           default:
5537             if (code == 'F' && ASSEMBLER_DIALECT == 0)
5538               fputs ("s", file);
5539             break;
5540           }
5541       return;
5542     case 'G':
5543       pa_output_global_address (file, x, 0);
5544       return;
5545     case 'H':
5546       pa_output_global_address (file, x, 1);
5547       return;
5548     case 0:                             /* Don't do anything special */
5549       break;
5550     case 'Z':
5551       {
5552           unsigned op[3];
5553           compute_zdepwi_operands (INTVAL (x), op);
5554           fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5555           return;
5556       }
5557     case 'z':
5558       {
5559           unsigned op[3];
5560           compute_zdepdi_operands (INTVAL (x), op);
5561           fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5562           return;
5563       }
5564     case 'c':
5565       /* We can get here from a .vtable_inherit due to our
5566            CONSTANT_ADDRESS_P rejecting perfectly good constant
5567            addresses.  */
5568       break;
5569     default:
5570       gcc_unreachable ();
5571     }
5572   if (GET_CODE (x) == REG)
5573     {
5574       fputs (reg_names [REGNO (x)], file);
5575       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5576           {
5577             fputs ("R", file);
5578             return;
5579           }
5580       if (FP_REG_P (x)
5581             && GET_MODE_SIZE (GET_MODE (x)) <= 4
5582             && (REGNO (x) & 1) == 0)
5583           fputs ("L", file);
5584     }
5585   else if (GET_CODE (x) == MEM)
5586     {
5587       int size = GET_MODE_SIZE (GET_MODE (x));
5588       rtx base = NULL_RTX;
5589       switch (GET_CODE (XEXP (x, 0)))
5590           {
5591           case PRE_DEC:
5592           case POST_DEC:
5593           base = XEXP (XEXP (x, 0), 0);
5594             fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5595             break;
5596           case PRE_INC:
5597           case POST_INC:
5598           base = XEXP (XEXP (x, 0), 0);
5599             fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5600             break;
5601           case PLUS:
5602             if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5603               fprintf (file, "%s(%s)",
5604                          reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5605                          reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5606             else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5607               fprintf (file, "%s(%s)",
5608                          reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5609                          reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5610             else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5611                        && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5612               {
5613                 /* Because the REG_POINTER flag can get lost during reload,
5614                      pa_legitimate_address_p canonicalizes the order of the
5615                      index and base registers in the combined move patterns.  */
5616                 rtx base = XEXP (XEXP (x, 0), 1);
5617                 rtx index = XEXP (XEXP (x, 0), 0);
5618 
5619                 fprintf (file, "%s(%s)",
5620                            reg_names [REGNO (index)], reg_names [REGNO (base)]);
5621               }
5622             else
5623               output_address (GET_MODE (x), XEXP (x, 0));
5624             break;
5625           default:
5626             output_address (GET_MODE (x), XEXP (x, 0));
5627             break;
5628           }
5629     }
5630   else
5631     output_addr_const (file, x);
5632 }
5633 
5634 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5635 
5636 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5637 pa_output_global_address (FILE *file, rtx x, int round_constant)
5638 {
5639 
5640   /* Imagine  (high (const (plus ...))).  */
5641   if (GET_CODE (x) == HIGH)
5642     x = XEXP (x, 0);
5643 
5644   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5645     output_addr_const (file, x);
5646   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5647     {
5648       output_addr_const (file, x);
5649       fputs ("-$global$", file);
5650     }
5651   else if (GET_CODE (x) == CONST)
5652     {
5653       const char *sep = "";
5654       int offset = 0;                   /* assembler wants -$global$ at end */
5655       rtx base = NULL_RTX;
5656 
5657       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5658           {
5659           case LABEL_REF:
5660           case SYMBOL_REF:
5661             base = XEXP (XEXP (x, 0), 0);
5662             output_addr_const (file, base);
5663             break;
5664           case CONST_INT:
5665             offset = INTVAL (XEXP (XEXP (x, 0), 0));
5666             break;
5667           default:
5668             gcc_unreachable ();
5669           }
5670 
5671       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5672           {
5673           case LABEL_REF:
5674           case SYMBOL_REF:
5675             base = XEXP (XEXP (x, 0), 1);
5676             output_addr_const (file, base);
5677             break;
5678           case CONST_INT:
5679             offset = INTVAL (XEXP (XEXP (x, 0), 1));
5680             break;
5681           default:
5682             gcc_unreachable ();
5683           }
5684 
5685       /* How bogus.  The compiler is apparently responsible for
5686            rounding the constant if it uses an LR field selector.
5687 
5688            The linker and/or assembler seem a better place since
5689            they have to do this kind of thing already.
5690 
5691            If we fail to do this, HP's optimizing linker may eliminate
5692            an addil, but not update the ldw/stw/ldo instruction that
5693            uses the result of the addil.  */
5694       if (round_constant)
5695           offset = ((offset + 0x1000) & ~0x1fff);
5696 
5697       switch (GET_CODE (XEXP (x, 0)))
5698           {
5699           case PLUS:
5700             if (offset < 0)
5701               {
5702                 offset = -offset;
5703                 sep = "-";
5704               }
5705             else
5706               sep = "+";
5707             break;
5708 
5709           case MINUS:
5710             gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5711             sep = "-";
5712             break;
5713 
5714           default:
5715             gcc_unreachable ();
5716           }
5717 
5718       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5719           fputs ("-$global$", file);
5720       if (offset)
5721           fprintf (file, "%s%d", sep, offset);
5722     }
5723   else
5724     output_addr_const (file, x);
5725 }
5726 
5727 /* Output boilerplate text to appear at the beginning of the file.
5728    There are several possible versions.  */
5729 #define aputs(x) fputs(x, asm_out_file)
5730 static inline void
pa_file_start_level(void)5731 pa_file_start_level (void)
5732 {
5733   if (TARGET_64BIT)
5734     aputs ("\t.LEVEL 2.0w\n");
5735   else if (TARGET_PA_20)
5736     aputs ("\t.LEVEL 2.0\n");
5737   else if (TARGET_PA_11)
5738     aputs ("\t.LEVEL 1.1\n");
5739   else
5740     aputs ("\t.LEVEL 1.0\n");
5741 }
5742 
5743 static inline void
pa_file_start_space(int sortspace)5744 pa_file_start_space (int sortspace)
5745 {
5746   aputs ("\t.SPACE $PRIVATE$");
5747   if (sortspace)
5748     aputs (",SORT=16");
5749   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5750   if (flag_tm)
5751     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5752   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5753            "\n\t.SPACE $TEXT$");
5754   if (sortspace)
5755     aputs (",SORT=8");
5756   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5757            "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5758 }
5759 
5760 static inline void
pa_file_start_file(int want_version)5761 pa_file_start_file (int want_version)
5762 {
5763   if (write_symbols != NO_DEBUG)
5764     {
5765       output_file_directive (asm_out_file, main_input_filename);
5766       if (want_version)
5767           aputs ("\t.version\t\"01.01\"\n");
5768     }
5769 }
5770 
5771 static inline void
pa_file_start_mcount(const char * aswhat)5772 pa_file_start_mcount (const char *aswhat)
5773 {
5774   if (profile_flag)
5775     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5776 }
5777 
5778 static void
pa_elf_file_start(void)5779 pa_elf_file_start (void)
5780 {
5781   pa_file_start_level ();
5782   pa_file_start_mcount ("ENTRY");
5783   pa_file_start_file (0);
5784 }
5785 
5786 static void
pa_som_file_start(void)5787 pa_som_file_start (void)
5788 {
5789   pa_file_start_level ();
5790   pa_file_start_space (0);
5791   aputs ("\t.IMPORT $global$,DATA\n"
5792          "\t.IMPORT $$dyncall,MILLICODE\n");
5793   pa_file_start_mcount ("CODE");
5794   pa_file_start_file (0);
5795 }
5796 
5797 static void
pa_linux_file_start(void)5798 pa_linux_file_start (void)
5799 {
5800   pa_file_start_file (0);
5801   pa_file_start_level ();
5802   pa_file_start_mcount ("CODE");
5803 }
5804 
5805 static void
pa_hpux64_gas_file_start(void)5806 pa_hpux64_gas_file_start (void)
5807 {
5808   pa_file_start_level ();
5809 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5810   if (profile_flag)
5811     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5812 #endif
5813   pa_file_start_file (1);
5814 }
5815 
5816 static void
pa_hpux64_hpas_file_start(void)5817 pa_hpux64_hpas_file_start (void)
5818 {
5819   pa_file_start_level ();
5820   pa_file_start_space (1);
5821   pa_file_start_mcount ("CODE");
5822   pa_file_start_file (0);
5823 }
5824 #undef aputs
5825 
5826 /* Search the deferred plabel list for SYMBOL and return its internal
5827    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5828 
5829 rtx
pa_get_deferred_plabel(rtx symbol)5830 pa_get_deferred_plabel (rtx symbol)
5831 {
5832   const char *fname = XSTR (symbol, 0);
5833   size_t i;
5834 
5835   /* See if we have already put this function on the list of deferred
5836      plabels.  This list is generally small, so a liner search is not
5837      too ugly.  If it proves too slow replace it with something faster.  */
5838   for (i = 0; i < n_deferred_plabels; i++)
5839     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5840       break;
5841 
5842   /* If the deferred plabel list is empty, or this entry was not found
5843      on the list, create a new entry on the list.  */
5844   if (deferred_plabels == NULL || i == n_deferred_plabels)
5845     {
5846       tree id;
5847 
5848       if (deferred_plabels == 0)
5849           deferred_plabels =  ggc_alloc<deferred_plabel> ();
5850       else
5851         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5852                                           deferred_plabels,
5853                                           n_deferred_plabels + 1);
5854 
5855       i = n_deferred_plabels++;
5856       deferred_plabels[i].internal_label = gen_label_rtx ();
5857       deferred_plabels[i].symbol = symbol;
5858 
5859       /* Gross.  We have just implicitly taken the address of this
5860            function.  Mark it in the same manner as assemble_name.  */
5861       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5862       if (id)
5863           mark_referenced (id);
5864     }
5865 
5866   return deferred_plabels[i].internal_label;
5867 }
5868 
5869 static void
output_deferred_plabels(void)5870 output_deferred_plabels (void)
5871 {
5872   size_t i;
5873 
5874   /* If we have some deferred plabels, then we need to switch into the
5875      data or readonly data section, and align it to a 4 byte boundary
5876      before outputting the deferred plabels.  */
5877   if (n_deferred_plabels)
5878     {
5879       switch_to_section (flag_pic ? data_section : readonly_data_section);
5880       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5881     }
5882 
5883   /* Now output the deferred plabels.  */
5884   for (i = 0; i < n_deferred_plabels; i++)
5885     {
5886       targetm.asm_out.internal_label (asm_out_file, "L",
5887                      CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5888       assemble_integer (deferred_plabels[i].symbol,
5889                               TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5890     }
5891 }
5892 
5893 /* Initialize optabs to point to emulation routines.  */
5894 
5895 static void
pa_init_libfuncs(void)5896 pa_init_libfuncs (void)
5897 {
5898   if (HPUX_LONG_DOUBLE_LIBRARY)
5899     {
5900       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5901       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5902       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5903       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5904       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5905       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5906       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5907       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5908       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5909 
5910       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5911       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5912       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5913       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5914       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5915       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5916       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5917 
5918       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5919       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5920       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5921       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5922 
5923       set_conv_libfunc (sfix_optab, SImode, TFmode,
5924                               TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5925                                              : "_U_Qfcnvfxt_quad_to_sgl");
5926       set_conv_libfunc (sfix_optab, DImode, TFmode,
5927                               "_U_Qfcnvfxt_quad_to_dbl");
5928       set_conv_libfunc (ufix_optab, SImode, TFmode,
5929                               "_U_Qfcnvfxt_quad_to_usgl");
5930       set_conv_libfunc (ufix_optab, DImode, TFmode,
5931                               "_U_Qfcnvfxt_quad_to_udbl");
5932 
5933       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5934                               "_U_Qfcnvxf_sgl_to_quad");
5935       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5936                               "_U_Qfcnvxf_dbl_to_quad");
5937       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5938                               "_U_Qfcnvxf_usgl_to_quad");
5939       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5940                               "_U_Qfcnvxf_udbl_to_quad");
5941     }
5942 
5943   if (TARGET_SYNC_LIBCALL)
5944     init_sync_libfuncs (8);
5945 }
5946 
5947 /* HP's millicode routines mean something special to the assembler.
5948    Keep track of which ones we have used.  */
5949 
5950 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5951 static void import_milli (enum millicodes);
5952 static char imported[(int) end1000];
5953 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5954 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5955 #define MILLI_START 10
5956 
5957 static void
import_milli(enum millicodes code)5958 import_milli (enum millicodes code)
5959 {
5960   char str[sizeof (import_string)];
5961 
5962   if (!imported[(int) code])
5963     {
5964       imported[(int) code] = 1;
5965       strcpy (str, import_string);
5966       memcpy (str + MILLI_START, milli_names[(int) code], 4);
5967       output_asm_insn (str, 0);
5968     }
5969 }
5970 
5971 /* The register constraints have put the operands and return value in
5972    the proper registers.  */
5973 
5974 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5975 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5976 {
5977   import_milli (mulI);
5978   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5979 }
5980 
5981 /* Emit the rtl for doing a division by a constant.  */
5982 
5983 /* Do magic division millicodes exist for this value? */
5984 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5985 
5986 /* We'll use an array to keep track of the magic millicodes and
5987    whether or not we've used them already. [n][0] is signed, [n][1] is
5988    unsigned.  */
5989 
5990 static int div_milli[16][2];
5991 
5992 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5993 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5994 {
5995   if (GET_CODE (operands[2]) == CONST_INT
5996       && INTVAL (operands[2]) > 0
5997       && INTVAL (operands[2]) < 16
5998       && pa_magic_milli[INTVAL (operands[2])])
5999     {
6000       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6001 
6002       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6003       emit
6004           (gen_rtx_PARALLEL
6005            (VOIDmode,
6006             gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6007                                              gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6008                                                                  SImode,
6009                                                                  gen_rtx_REG (SImode, 26),
6010                                                                  operands[2])),
6011                          gen_rtx_CLOBBER (VOIDmode, operands[4]),
6012                          gen_rtx_CLOBBER (VOIDmode, operands[3]),
6013                          gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6014                          gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6015                          gen_rtx_CLOBBER (VOIDmode, ret))));
6016       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6017       return 1;
6018     }
6019   return 0;
6020 }
6021 
6022 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)6023 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6024 {
6025   HOST_WIDE_INT divisor;
6026 
6027   /* If the divisor is a constant, try to use one of the special
6028      opcodes .*/
6029   if (GET_CODE (operands[0]) == CONST_INT)
6030     {
6031       static char buf[100];
6032       divisor = INTVAL (operands[0]);
6033       if (!div_milli[divisor][unsignedp])
6034           {
6035             div_milli[divisor][unsignedp] = 1;
6036             if (unsignedp)
6037               output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6038             else
6039               output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6040           }
6041       if (unsignedp)
6042           {
6043             sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6044                        INTVAL (operands[0]));
6045             return pa_output_millicode_call (insn,
6046                                                      gen_rtx_SYMBOL_REF (SImode, buf));
6047           }
6048       else
6049           {
6050             sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6051                        INTVAL (operands[0]));
6052             return pa_output_millicode_call (insn,
6053                                                      gen_rtx_SYMBOL_REF (SImode, buf));
6054           }
6055     }
6056   /* Divisor isn't a special constant.  */
6057   else
6058     {
6059       if (unsignedp)
6060           {
6061             import_milli (divU);
6062             return pa_output_millicode_call (insn,
6063                                                   gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6064           }
6065       else
6066           {
6067             import_milli (divI);
6068             return pa_output_millicode_call (insn,
6069                                                   gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6070           }
6071     }
6072 }
6073 
6074 /* Output a $$rem millicode to do mod.  */
6075 
6076 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6077 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6078 {
6079   if (unsignedp)
6080     {
6081       import_milli (remU);
6082       return pa_output_millicode_call (insn,
6083                                                gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6084     }
6085   else
6086     {
6087       import_milli (remI);
6088       return pa_output_millicode_call (insn,
6089                                                gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6090     }
6091 }
6092 
6093 void
pa_output_arg_descriptor(rtx_insn * call_insn)6094 pa_output_arg_descriptor (rtx_insn *call_insn)
6095 {
6096   const char *arg_regs[4];
6097   machine_mode arg_mode;
6098   rtx link;
6099   int i, output_flag = 0;
6100   int regno;
6101 
6102   /* We neither need nor want argument location descriptors for the
6103      64bit runtime environment or the ELF32 environment.  */
6104   if (TARGET_64BIT || TARGET_ELF32)
6105     return;
6106 
6107   for (i = 0; i < 4; i++)
6108     arg_regs[i] = 0;
6109 
6110   /* Specify explicitly that no argument relocations should take place
6111      if using the portable runtime calling conventions.  */
6112   if (TARGET_PORTABLE_RUNTIME)
6113     {
6114       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6115                asm_out_file);
6116       return;
6117     }
6118 
6119   gcc_assert (CALL_P (call_insn));
6120   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6121        link; link = XEXP (link, 1))
6122     {
6123       rtx use = XEXP (link, 0);
6124 
6125       if (! (GET_CODE (use) == USE
6126                && GET_CODE (XEXP (use, 0)) == REG
6127                && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6128           continue;
6129 
6130       arg_mode = GET_MODE (XEXP (use, 0));
6131       regno = REGNO (XEXP (use, 0));
6132       if (regno >= 23 && regno <= 26)
6133           {
6134             arg_regs[26 - regno] = "GR";
6135             if (arg_mode == DImode)
6136               arg_regs[25 - regno] = "GR";
6137           }
6138       else if (regno >= 32 && regno <= 39)
6139           {
6140             if (arg_mode == SFmode)
6141               arg_regs[(regno - 32) / 2] = "FR";
6142             else
6143               {
6144 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6145                 arg_regs[(regno - 34) / 2] = "FR";
6146                 arg_regs[(regno - 34) / 2 + 1] = "FU";
6147 #else
6148                 arg_regs[(regno - 34) / 2] = "FU";
6149                 arg_regs[(regno - 34) / 2 + 1] = "FR";
6150 #endif
6151               }
6152           }
6153     }
6154   fputs ("\t.CALL ", asm_out_file);
6155   for (i = 0; i < 4; i++)
6156     {
6157       if (arg_regs[i])
6158           {
6159             if (output_flag++)
6160               fputc (',', asm_out_file);
6161             fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6162           }
6163     }
6164   fputc ('\n', asm_out_file);
6165 }
6166 
6167 /* Inform reload about cases where moving X with a mode MODE to or from
6168    a register in RCLASS requires an extra scratch or immediate register.
6169    Return the class needed for the immediate register.  */
6170 
6171 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6172 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6173                          machine_mode mode, secondary_reload_info *sri)
6174 {
6175   int regno;
6176   enum reg_class rclass = (enum reg_class) rclass_i;
6177 
6178   /* Handle the easy stuff first.  */
6179   if (rclass == R1_REGS)
6180     return NO_REGS;
6181 
6182   if (REG_P (x))
6183     {
6184       regno = REGNO (x);
6185       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6186           return NO_REGS;
6187     }
6188   else
6189     regno = -1;
6190 
6191   /* If we have something like (mem (mem (...)), we can safely assume the
6192      inner MEM will end up in a general register after reloading, so there's
6193      no need for a secondary reload.  */
6194   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6195     return NO_REGS;
6196 
6197   /* Trying to load a constant into a FP register during PIC code
6198      generation requires %r1 as a scratch register.  For float modes,
6199      the only legitimate constant is CONST0_RTX.  However, there are
6200      a few patterns that accept constant double operands.  */
6201   if (flag_pic
6202       && FP_REG_CLASS_P (rclass)
6203       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6204     {
6205       switch (mode)
6206           {
6207           case E_SImode:
6208             sri->icode = CODE_FOR_reload_insi_r1;
6209             break;
6210 
6211           case E_DImode:
6212             sri->icode = CODE_FOR_reload_indi_r1;
6213             break;
6214 
6215           case E_SFmode:
6216             sri->icode = CODE_FOR_reload_insf_r1;
6217             break;
6218 
6219           case E_DFmode:
6220             sri->icode = CODE_FOR_reload_indf_r1;
6221             break;
6222 
6223           default:
6224             gcc_unreachable ();
6225           }
6226       return NO_REGS;
6227     }
6228 
6229   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6230      register when we're generating PIC code or when the operand isn't
6231      readonly.  */
6232   if (pa_symbolic_expression_p (x))
6233     {
6234       if (GET_CODE (x) == HIGH)
6235           x = XEXP (x, 0);
6236 
6237       if (flag_pic || !read_only_operand (x, VOIDmode))
6238           {
6239             switch (mode)
6240               {
6241               case E_SImode:
6242                 sri->icode = CODE_FOR_reload_insi_r1;
6243                 break;
6244 
6245               case E_DImode:
6246                 sri->icode = CODE_FOR_reload_indi_r1;
6247                 break;
6248 
6249               default:
6250                 gcc_unreachable ();
6251               }
6252             return NO_REGS;
6253           }
6254     }
6255 
6256   /* Profiling showed the PA port spends about 1.3% of its compilation
6257      time in true_regnum from calls inside pa_secondary_reload_class.  */
6258   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6259     regno = true_regnum (x);
6260 
6261   /* Handle reloads for floating point loads and stores.  */
6262   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6263       && FP_REG_CLASS_P (rclass))
6264     {
6265       if (MEM_P (x))
6266           {
6267             x = XEXP (x, 0);
6268 
6269             /* We don't need a secondary reload for indexed memory addresses.
6270 
6271                When INT14_OK_STRICT is true, it might appear that we could
6272                directly allow register indirect memory addresses.  However,
6273                this doesn't work because we don't support SUBREGs in
6274                floating-point register copies and reload doesn't tell us
6275                when it's going to use a SUBREG.  */
6276             if (IS_INDEX_ADDR_P (x))
6277               return NO_REGS;
6278           }
6279 
6280       /* Request a secondary reload with a general scratch register
6281            for everything else.  ??? Could symbolic operands be handled
6282            directly when generating non-pic PA 2.0 code?  */
6283       sri->icode = (in_p
6284                         ? direct_optab_handler (reload_in_optab, mode)
6285                         : direct_optab_handler (reload_out_optab, mode));
6286       return NO_REGS;
6287     }
6288 
6289   /* A SAR<->FP register copy requires an intermediate general register
6290      and secondary memory.  We need a secondary reload with a general
6291      scratch register for spills.  */
6292   if (rclass == SHIFT_REGS)
6293     {
6294       /* Handle spill.  */
6295       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6296           {
6297             sri->icode = (in_p
6298                               ? direct_optab_handler (reload_in_optab, mode)
6299                               : direct_optab_handler (reload_out_optab, mode));
6300             return NO_REGS;
6301           }
6302 
6303       /* Handle FP copy.  */
6304       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6305           return GENERAL_REGS;
6306     }
6307 
6308   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6309       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6310       && FP_REG_CLASS_P (rclass))
6311     return GENERAL_REGS;
6312 
6313   return NO_REGS;
6314 }
6315 
6316 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6317 
6318 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6319 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6320                                   reg_class_t class1 ATTRIBUTE_UNUSED,
6321                                   reg_class_t class2 ATTRIBUTE_UNUSED)
6322 {
6323 #ifdef PA_SECONDARY_MEMORY_NEEDED
6324   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6325 #else
6326   return false;
6327 #endif
6328 }
6329 
6330 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6331    is only marked as live on entry by df-scan when it is a fixed
6332    register.  It isn't a fixed register in the 64-bit runtime,
6333    so we need to mark it here.  */
6334 
6335 static void
pa_extra_live_on_entry(bitmap regs)6336 pa_extra_live_on_entry (bitmap regs)
6337 {
6338   if (TARGET_64BIT)
6339     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6340 }
6341 
6342 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6343    to prevent it from being deleted.  */
6344 
6345 rtx
pa_eh_return_handler_rtx(void)6346 pa_eh_return_handler_rtx (void)
6347 {
6348   rtx tmp;
6349 
6350   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6351                           TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6352   tmp = gen_rtx_MEM (word_mode, tmp);
6353   tmp->volatil = 1;
6354   return tmp;
6355 }
6356 
6357 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6358    by invisible reference.  As a GCC extension, we also pass anything
6359    with a zero or variable size by reference.
6360 
6361    The 64-bit runtime does not describe passing any types by invisible
6362    reference.  The internals of GCC can't currently handle passing
6363    empty structures, and zero or variable length arrays when they are
6364    not passed entirely on the stack or by reference.  Thus, as a GCC
6365    extension, we pass these types by reference.  The HP compiler doesn't
6366    support these types, so hopefully there shouldn't be any compatibility
6367    issues.  This may have to be revisited when HP releases a C99 compiler
6368    or updates the ABI.  */
6369 
6370 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6371 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6372 {
6373   HOST_WIDE_INT size = arg.type_size_in_bytes ();
6374   if (TARGET_64BIT)
6375     return size <= 0;
6376   else
6377     return size <= 0 || size > 8;
6378 }
6379 
6380 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6381 
6382 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6383 pa_function_arg_padding (machine_mode mode, const_tree type)
6384 {
6385   if (mode == BLKmode
6386       || (TARGET_64BIT
6387             && type
6388             && (AGGREGATE_TYPE_P (type)
6389                 || TREE_CODE (type) == COMPLEX_TYPE
6390                 || TREE_CODE (type) == VECTOR_TYPE)))
6391     {
6392       /* Return PAD_NONE if justification is not required.  */
6393       if (type
6394             && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6395             && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6396           return PAD_NONE;
6397 
6398       /* The directions set here are ignored when a BLKmode argument larger
6399            than a word is placed in a register.  Different code is used for
6400            the stack and registers.  This makes it difficult to have a
6401            consistent data representation for both the stack and registers.
6402            For both runtimes, the justification and padding for arguments on
6403            the stack and in registers should be identical.  */
6404       if (TARGET_64BIT)
6405           /* The 64-bit runtime specifies left justification for aggregates.  */
6406           return PAD_UPWARD;
6407       else
6408           /* The 32-bit runtime architecture specifies right justification.
6409              When the argument is passed on the stack, the argument is padded
6410              with garbage on the left.  The HP compiler pads with zeros.  */
6411           return PAD_DOWNWARD;
6412     }
6413 
6414   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6415     return PAD_DOWNWARD;
6416   else
6417     return PAD_NONE;
6418 }
6419 
6420 
6421 /* Do what is necessary for `va_start'.  We look at the current function
6422    to determine if stdargs or varargs is used and fill in an initial
6423    va_list.  A pointer to this constructor is returned.  */
6424 
6425 static rtx
hppa_builtin_saveregs(void)6426 hppa_builtin_saveregs (void)
6427 {
6428   rtx offset, dest;
6429   tree fntype = TREE_TYPE (current_function_decl);
6430   int argadj = ((!stdarg_p (fntype))
6431                     ? UNITS_PER_WORD : 0);
6432 
6433   if (argadj)
6434     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6435   else
6436     offset = crtl->args.arg_offset_rtx;
6437 
6438   if (TARGET_64BIT)
6439     {
6440       int i, off;
6441 
6442       /* Adjust for varargs/stdarg differences.  */
6443       if (argadj)
6444           offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6445       else
6446           offset = crtl->args.arg_offset_rtx;
6447 
6448       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6449            from the incoming arg pointer and growing to larger addresses.  */
6450       for (i = 26, off = -64; i >= 19; i--, off += 8)
6451           emit_move_insn (gen_rtx_MEM (word_mode,
6452                                              plus_constant (Pmode,
6453                                                                 arg_pointer_rtx, off)),
6454                               gen_rtx_REG (word_mode, i));
6455 
6456       /* The incoming args pointer points just beyond the flushback area;
6457            normally this is not a serious concern.  However, when we are doing
6458            varargs/stdargs we want to make the arg pointer point to the start
6459            of the incoming argument area.  */
6460       emit_move_insn (virtual_incoming_args_rtx,
6461                           plus_constant (Pmode, arg_pointer_rtx, -64));
6462 
6463       /* Now return a pointer to the first anonymous argument.  */
6464       return copy_to_reg (expand_binop (Pmode, add_optab,
6465                                                   virtual_incoming_args_rtx,
6466                                                   offset, 0, 0, OPTAB_LIB_WIDEN));
6467     }
6468 
6469   /* Store general registers on the stack.  */
6470   dest = gen_rtx_MEM (BLKmode,
6471                           plus_constant (Pmode, crtl->args.internal_arg_pointer,
6472                                              -16));
6473   set_mem_alias_set (dest, get_varargs_alias_set ());
6474   set_mem_align (dest, BITS_PER_WORD);
6475   move_block_from_reg (23, dest, 4);
6476 
6477   /* move_block_from_reg will emit code to store the argument registers
6478      individually as scalar stores.
6479 
6480      However, other insns may later load from the same addresses for
6481      a structure load (passing a struct to a varargs routine).
6482 
6483      The alias code assumes that such aliasing can never happen, so we
6484      have to keep memory referencing insns from moving up beyond the
6485      last argument register store.  So we emit a blockage insn here.  */
6486   emit_insn (gen_blockage ());
6487 
6488   return copy_to_reg (expand_binop (Pmode, add_optab,
6489                                             crtl->args.internal_arg_pointer,
6490                                             offset, 0, 0, OPTAB_LIB_WIDEN));
6491 }
6492 
6493 static void
hppa_va_start(tree valist,rtx nextarg)6494 hppa_va_start (tree valist, rtx nextarg)
6495 {
6496   nextarg = expand_builtin_saveregs ();
6497   std_expand_builtin_va_start (valist, nextarg);
6498 }
6499 
6500 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6501 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6502                                  gimple_seq *post_p)
6503 {
6504   if (TARGET_64BIT)
6505     {
6506       /* Args grow upward.  We can use the generic routines.  */
6507       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6508     }
6509   else /* !TARGET_64BIT */
6510     {
6511       tree ptr = build_pointer_type (type);
6512       tree valist_type;
6513       tree t, u;
6514       unsigned int size, ofs;
6515       bool indirect;
6516 
6517       indirect = pass_va_arg_by_reference (type);
6518       if (indirect)
6519           {
6520             type = ptr;
6521             ptr = build_pointer_type (type);
6522           }
6523       size = int_size_in_bytes (type);
6524       valist_type = TREE_TYPE (valist);
6525 
6526       /* Args grow down.  Not handled by generic routines.  */
6527 
6528       u = fold_convert (sizetype, size_in_bytes (type));
6529       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6530       t = fold_build_pointer_plus (valist, u);
6531 
6532       /* Align to 4 or 8 byte boundary depending on argument size.  */
6533 
6534       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6535       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6536       t = fold_convert (valist_type, t);
6537 
6538       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6539 
6540       ofs = (8 - size) % 4;
6541       if (ofs != 0)
6542           t = fold_build_pointer_plus_hwi (t, ofs);
6543 
6544       t = fold_convert (ptr, t);
6545       t = build_va_arg_indirect_ref (t);
6546 
6547       if (indirect)
6548           t = build_va_arg_indirect_ref (t);
6549 
6550       return t;
6551     }
6552 }
6553 
6554 /* True if MODE is valid for the target.  By "valid", we mean able to
6555    be manipulated in non-trivial ways.  In particular, this means all
6556    the arithmetic is supported.  */
6557 
6558 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6559 pa_scalar_mode_supported_p (scalar_mode mode)
6560 {
6561   int precision = GET_MODE_PRECISION (mode);
6562 
6563   if (TARGET_64BIT && mode == TImode)
6564     return true;
6565 
6566   switch (GET_MODE_CLASS (mode))
6567     {
6568     case MODE_PARTIAL_INT:
6569     case MODE_INT:
6570       if (precision == CHAR_TYPE_SIZE)
6571           return true;
6572       if (precision == SHORT_TYPE_SIZE)
6573           return true;
6574       if (precision == INT_TYPE_SIZE)
6575           return true;
6576       if (precision == LONG_TYPE_SIZE)
6577           return true;
6578       if (precision == LONG_LONG_TYPE_SIZE)
6579           return true;
6580       return false;
6581 
6582     case MODE_FLOAT:
6583       if (precision == FLOAT_TYPE_SIZE)
6584           return true;
6585       if (precision == DOUBLE_TYPE_SIZE)
6586           return true;
6587       if (precision == LONG_DOUBLE_TYPE_SIZE)
6588           return true;
6589       return false;
6590 
6591     case MODE_DECIMAL_FLOAT:
6592       return false;
6593 
6594     default:
6595       gcc_unreachable ();
6596     }
6597 }
6598 
6599 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6600    it branches into the delay slot.  Otherwise, return FALSE.  */
6601 
6602 static bool
branch_to_delay_slot_p(rtx_insn * insn)6603 branch_to_delay_slot_p (rtx_insn *insn)
6604 {
6605   rtx_insn *jump_insn;
6606 
6607   if (dbr_sequence_length ())
6608     return FALSE;
6609 
6610   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6611   while (insn)
6612     {
6613       insn = next_active_insn (insn);
6614       if (jump_insn == insn)
6615           return TRUE;
6616 
6617       /* We can't rely on the length of asms.  So, we return FALSE when
6618            the branch is followed by an asm.  */
6619       if (!insn
6620             || GET_CODE (PATTERN (insn)) == ASM_INPUT
6621             || asm_noperands (PATTERN (insn)) >= 0
6622             || get_attr_length (insn) > 0)
6623           break;
6624     }
6625 
6626   return FALSE;
6627 }
6628 
6629 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6630 
6631    This occurs when INSN has an unfilled delay slot and is followed
6632    by an asm.  Disaster can occur if the asm is empty and the jump
6633    branches into the delay slot.  So, we add a nop in the delay slot
6634    when this occurs.  */
6635 
6636 static bool
branch_needs_nop_p(rtx_insn * insn)6637 branch_needs_nop_p (rtx_insn *insn)
6638 {
6639   rtx_insn *jump_insn;
6640 
6641   if (dbr_sequence_length ())
6642     return FALSE;
6643 
6644   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6645   while (insn)
6646     {
6647       insn = next_active_insn (insn);
6648       if (!insn || jump_insn == insn)
6649           return TRUE;
6650 
6651       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6652              || asm_noperands (PATTERN (insn)) >= 0)
6653             && get_attr_length (insn) > 0)
6654           break;
6655     }
6656 
6657   return FALSE;
6658 }
6659 
6660 /* Return TRUE if INSN, a forward jump insn, can use nullification
6661    to skip the following instruction.  This avoids an extra cycle due
6662    to a mis-predicted branch when we fall through.  */
6663 
6664 static bool
use_skip_p(rtx_insn * insn)6665 use_skip_p (rtx_insn *insn)
6666 {
6667   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6668 
6669   while (insn)
6670     {
6671       insn = next_active_insn (insn);
6672 
6673       /* We can't rely on the length of asms, so we can't skip asms.  */
6674       if (!insn
6675             || GET_CODE (PATTERN (insn)) == ASM_INPUT
6676             || asm_noperands (PATTERN (insn)) >= 0)
6677           break;
6678       if (get_attr_length (insn) == 4
6679             && jump_insn == next_active_insn (insn))
6680           return TRUE;
6681       if (get_attr_length (insn) > 0)
6682           break;
6683     }
6684 
6685   return FALSE;
6686 }
6687 
6688 /* This routine handles all the normal conditional branch sequences we
6689    might need to generate.  It handles compare immediate vs compare
6690    register, nullification of delay slots, varying length branches,
6691    negated branches, and all combinations of the above.  It returns the
6692    output appropriate to emit the branch corresponding to all given
6693    parameters.  */
6694 
6695 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6696 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6697 {
6698   static char buf[100];
6699   bool useskip;
6700   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6701   int length = get_attr_length (insn);
6702   int xdelay;
6703 
6704   /* A conditional branch to the following instruction (e.g. the delay slot)
6705      is asking for a disaster.  This can happen when not optimizing and
6706      when jump optimization fails.
6707 
6708      While it is usually safe to emit nothing, this can fail if the
6709      preceding instruction is a nullified branch with an empty delay
6710      slot and the same branch target as this branch.  We could check
6711      for this but jump optimization should eliminate nop jumps.  It
6712      is always safe to emit a nop.  */
6713   if (branch_to_delay_slot_p (insn))
6714     return "nop";
6715 
6716   /* The doubleword form of the cmpib instruction doesn't have the LEU
6717      and GTU conditions while the cmpb instruction does.  Since we accept
6718      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6719   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6720     operands[2] = gen_rtx_REG (DImode, 0);
6721   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6722     operands[1] = gen_rtx_REG (DImode, 0);
6723 
6724   /* If this is a long branch with its delay slot unfilled, set `nullify'
6725      as it can nullify the delay slot and save a nop.  */
6726   if (length == 8 && dbr_sequence_length () == 0)
6727     nullify = 1;
6728 
6729   /* If this is a short forward conditional branch which did not get
6730      its delay slot filled, the delay slot can still be nullified.  */
6731   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6732     nullify = forward_branch_p (insn);
6733 
6734   /* A forward branch over a single nullified insn can be done with a
6735      comclr instruction.  This avoids a single cycle penalty due to
6736      mis-predicted branch if we fall through (branch not taken).  */
6737   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6738 
6739   switch (length)
6740     {
6741       /* All short conditional branches except backwards with an unfilled
6742            delay slot.  */
6743       case 4:
6744           if (useskip)
6745             strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6746           else
6747             strcpy (buf, "{com%I2b,|cmp%I2b,}");
6748           if (GET_MODE (operands[1]) == DImode)
6749             strcat (buf, "*");
6750           if (negated)
6751             strcat (buf, "%B3");
6752           else
6753             strcat (buf, "%S3");
6754           if (useskip)
6755             strcat (buf, " %2,%r1,%%r0");
6756           else if (nullify)
6757             {
6758               if (branch_needs_nop_p (insn))
6759                 strcat (buf, ",n %2,%r1,%0%#");
6760               else
6761                 strcat (buf, ",n %2,%r1,%0");
6762             }
6763           else
6764             strcat (buf, " %2,%r1,%0");
6765           break;
6766 
6767      /* All long conditionals.  Note a short backward branch with an
6768           unfilled delay slot is treated just like a long backward branch
6769           with an unfilled delay slot.  */
6770       case 8:
6771           /* Handle weird backwards branch with a filled delay slot
6772              which is nullified.  */
6773           if (dbr_sequence_length () != 0
6774               && ! forward_branch_p (insn)
6775               && nullify)
6776             {
6777               strcpy (buf, "{com%I2b,|cmp%I2b,}");
6778               if (GET_MODE (operands[1]) == DImode)
6779                 strcat (buf, "*");
6780               if (negated)
6781                 strcat (buf, "%S3");
6782               else
6783                 strcat (buf, "%B3");
6784               strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6785             }
6786           /* Handle short backwards branch with an unfilled delay slot.
6787              Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6788              taken and untaken branches.  */
6789           else if (dbr_sequence_length () == 0
6790                      && ! forward_branch_p (insn)
6791                      && INSN_ADDRESSES_SET_P ()
6792                      && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6793                                             - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6794             {
6795               strcpy (buf, "{com%I2b,|cmp%I2b,}");
6796               if (GET_MODE (operands[1]) == DImode)
6797                 strcat (buf, "*");
6798               if (negated)
6799                 strcat (buf, "%B3 %2,%r1,%0%#");
6800               else
6801                 strcat (buf, "%S3 %2,%r1,%0%#");
6802             }
6803           else
6804             {
6805               strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6806               if (GET_MODE (operands[1]) == DImode)
6807                 strcat (buf, "*");
6808               if (negated)
6809                 strcat (buf, "%S3");
6810               else
6811                 strcat (buf, "%B3");
6812               if (nullify)
6813                 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6814               else
6815                 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6816             }
6817           break;
6818 
6819       default:
6820           /* The reversed conditional branch must branch over one additional
6821              instruction if the delay slot is filled and needs to be extracted
6822              by pa_output_lbranch.  If the delay slot is empty or this is a
6823              nullified forward branch, the instruction after the reversed
6824              condition branch must be nullified.  */
6825           if (dbr_sequence_length () == 0
6826               || (nullify && forward_branch_p (insn)))
6827             {
6828               nullify = 1;
6829               xdelay = 0;
6830               operands[4] = GEN_INT (length);
6831             }
6832           else
6833             {
6834               xdelay = 1;
6835               operands[4] = GEN_INT (length + 4);
6836             }
6837 
6838           /* Create a reversed conditional branch which branches around
6839              the following insns.  */
6840           if (GET_MODE (operands[1]) != DImode)
6841             {
6842               if (nullify)
6843                 {
6844                     if (negated)
6845                       strcpy (buf,
6846                         "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6847                     else
6848                       strcpy (buf,
6849                         "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6850                 }
6851               else
6852                 {
6853                     if (negated)
6854                       strcpy (buf,
6855                         "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6856                     else
6857                       strcpy (buf,
6858                         "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6859                 }
6860             }
6861           else
6862             {
6863               if (nullify)
6864                 {
6865                     if (negated)
6866                       strcpy (buf,
6867                         "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6868                     else
6869                       strcpy (buf,
6870                         "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6871                 }
6872               else
6873                 {
6874                     if (negated)
6875                       strcpy (buf,
6876                         "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6877                     else
6878                       strcpy (buf,
6879                         "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6880                 }
6881             }
6882 
6883           output_asm_insn (buf, operands);
6884           return pa_output_lbranch (operands[0], insn, xdelay);
6885     }
6886   return buf;
6887 }
6888 
6889 /* Output a PIC pc-relative instruction sequence to load the address of
6890    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6891    or a code label.  OPERANDS[1] specifies the register to use to load
6892    the program counter.  OPERANDS[3] may be used for label generation
6893    The sequence is always three instructions in length.  The program
6894    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6895    Register %r1 is clobbered.  */
6896 
6897 static void
pa_output_pic_pcrel_sequence(rtx * operands)6898 pa_output_pic_pcrel_sequence (rtx *operands)
6899 {
6900   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6901   if (TARGET_PA_20)
6902     {
6903       /* We can use mfia to determine the current program counter.  */
6904       if (TARGET_SOM || !TARGET_GAS)
6905           {
6906             operands[3] = gen_label_rtx ();
6907             targetm.asm_out.internal_label (asm_out_file, "L",
6908                                                     CODE_LABEL_NUMBER (operands[3]));
6909             output_asm_insn ("mfia %1", operands);
6910             output_asm_insn ("addil L'%0-%l3,%1", operands);
6911             output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6912           }
6913       else
6914           {
6915             output_asm_insn ("mfia %1", operands);
6916             output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6917             output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6918           }
6919     }
6920   else
6921     {
6922       /* We need to use a branch to determine the current program counter.  */
6923       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6924       if (TARGET_SOM || !TARGET_GAS)
6925           {
6926             operands[3] = gen_label_rtx ();
6927             output_asm_insn ("addil L'%0-%l3,%1", operands);
6928             targetm.asm_out.internal_label (asm_out_file, "L",
6929                                                     CODE_LABEL_NUMBER (operands[3]));
6930             output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6931           }
6932       else
6933           {
6934             output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6935             output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6936           }
6937     }
6938 }
6939 
6940 /* This routine handles output of long unconditional branches that
6941    exceed the maximum range of a simple branch instruction.  Since
6942    we don't have a register available for the branch, we save register
6943    %r1 in the frame marker, load the branch destination DEST into %r1,
6944    execute the branch, and restore %r1 in the delay slot of the branch.
6945 
6946    Since long branches may have an insn in the delay slot and the
6947    delay slot is used to restore %r1, we in general need to extract
6948    this insn and execute it before the branch.  However, to facilitate
6949    use of this function by conditional branches, we also provide an
6950    option to not extract the delay insn so that it will be emitted
6951    after the long branch.  So, if there is an insn in the delay slot,
6952    it is extracted if XDELAY is nonzero.
6953 
6954    The lengths of the various long-branch sequences are 20, 16 and 24
6955    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6956 
6957 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6958 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6959 {
6960   rtx xoperands[4];
6961 
6962   xoperands[0] = dest;
6963 
6964   /* First, free up the delay slot.  */
6965   if (xdelay && dbr_sequence_length () != 0)
6966     {
6967       /* We can't handle a jump in the delay slot.  */
6968       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6969 
6970       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6971                            optimize, 0, NULL);
6972 
6973       /* Now delete the delay insn.  */
6974       SET_INSN_DELETED (NEXT_INSN (insn));
6975     }
6976 
6977   /* Output an insn to save %r1.  The runtime documentation doesn't
6978      specify whether the "Clean Up" slot in the callers frame can
6979      be clobbered by the callee.  It isn't copied by HP's builtin
6980      alloca, so this suggests that it can be clobbered if necessary.
6981      The "Static Link" location is copied by HP builtin alloca, so
6982      we avoid using it.  Using the cleanup slot might be a problem
6983      if we have to interoperate with languages that pass cleanup
6984      information.  However, it should be possible to handle these
6985      situations with GCC's asm feature.
6986 
6987      The "Current RP" slot is reserved for the called procedure, so
6988      we try to use it when we don't have a frame of our own.  It's
6989      rather unlikely that we won't have a frame when we need to emit
6990      a very long branch.
6991 
6992      Really the way to go long term is a register scavenger; goto
6993      the target of the jump and find a register which we can use
6994      as a scratch to hold the value in %r1.  Then, we wouldn't have
6995      to free up the delay slot or clobber a slot that may be needed
6996      for other purposes.  */
6997   if (TARGET_64BIT)
6998     {
6999       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7000           /* Use the return pointer slot in the frame marker.  */
7001           output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7002       else
7003           /* Use the slot at -40 in the frame marker since HP builtin
7004              alloca doesn't copy it.  */
7005           output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7006     }
7007   else
7008     {
7009       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7010           /* Use the return pointer slot in the frame marker.  */
7011           output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7012       else
7013           /* Use the "Clean Up" slot in the frame marker.  In GCC,
7014              the only other use of this location is for copying a
7015              floating point double argument from a floating-point
7016              register to two general registers.  The copy is done
7017              as an "atomic" operation when outputting a call, so it
7018              won't interfere with our using the location here.  */
7019           output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7020     }
7021 
7022   if (TARGET_PORTABLE_RUNTIME)
7023     {
7024       output_asm_insn ("ldil L'%0,%%r1", xoperands);
7025       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7026       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7027     }
7028   else if (flag_pic)
7029     {
7030       xoperands[1] = gen_rtx_REG (Pmode, 1);
7031       xoperands[2] = xoperands[1];
7032       pa_output_pic_pcrel_sequence (xoperands);
7033       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7034     }
7035   else
7036     /* Now output a very long branch to the original target.  */
7037     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7038 
7039   /* Now restore the value of %r1 in the delay slot.  */
7040   if (TARGET_64BIT)
7041     {
7042       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7043           return "ldd -16(%%r30),%%r1";
7044       else
7045           return "ldd -40(%%r30),%%r1";
7046     }
7047   else
7048     {
7049       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7050           return "ldw -20(%%r30),%%r1";
7051       else
7052           return "ldw -12(%%r30),%%r1";
7053     }
7054 }
7055 
7056 /* This routine handles all the branch-on-bit conditional branch sequences we
7057    might need to generate.  It handles nullification of delay slots,
7058    varying length branches, negated branches and all combinations of the
7059    above.  it returns the appropriate output template to emit the branch.  */
7060 
7061 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7062 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7063 {
7064   static char buf[100];
7065   bool useskip;
7066   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7067   int length = get_attr_length (insn);
7068   int xdelay;
7069 
7070   /* A conditional branch to the following instruction (e.g. the delay slot) is
7071      asking for a disaster.  I do not think this can happen as this pattern
7072      is only used when optimizing; jump optimization should eliminate the
7073      jump.  But be prepared just in case.  */
7074 
7075   if (branch_to_delay_slot_p (insn))
7076     return "nop";
7077 
7078   /* If this is a long branch with its delay slot unfilled, set `nullify'
7079      as it can nullify the delay slot and save a nop.  */
7080   if (length == 8 && dbr_sequence_length () == 0)
7081     nullify = 1;
7082 
7083   /* If this is a short forward conditional branch which did not get
7084      its delay slot filled, the delay slot can still be nullified.  */
7085   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7086     nullify = forward_branch_p (insn);
7087 
7088   /* A forward branch over a single nullified insn can be done with a
7089      extrs instruction.  This avoids a single cycle penalty due to
7090      mis-predicted branch if we fall through (branch not taken).  */
7091   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7092 
7093   switch (length)
7094     {
7095 
7096       /* All short conditional branches except backwards with an unfilled
7097            delay slot.  */
7098       case 4:
7099           if (useskip)
7100             strcpy (buf, "{extrs,|extrw,s,}");
7101           else
7102             strcpy (buf, "bb,");
7103           if (useskip && GET_MODE (operands[0]) == DImode)
7104             strcpy (buf, "extrd,s,*");
7105           else if (GET_MODE (operands[0]) == DImode)
7106             strcpy (buf, "bb,*");
7107           if ((which == 0 && negated)
7108                || (which == 1 && ! negated))
7109             strcat (buf, ">=");
7110           else
7111             strcat (buf, "<");
7112           if (useskip)
7113             strcat (buf, " %0,%1,1,%%r0");
7114           else if (nullify && negated)
7115             {
7116               if (branch_needs_nop_p (insn))
7117                 strcat (buf, ",n %0,%1,%3%#");
7118               else
7119                 strcat (buf, ",n %0,%1,%3");
7120             }
7121           else if (nullify && ! negated)
7122             {
7123               if (branch_needs_nop_p (insn))
7124                 strcat (buf, ",n %0,%1,%2%#");
7125               else
7126                 strcat (buf, ",n %0,%1,%2");
7127             }
7128           else if (! nullify && negated)
7129             strcat (buf, " %0,%1,%3");
7130           else if (! nullify && ! negated)
7131             strcat (buf, " %0,%1,%2");
7132           break;
7133 
7134      /* All long conditionals.  Note a short backward branch with an
7135           unfilled delay slot is treated just like a long backward branch
7136           with an unfilled delay slot.  */
7137       case 8:
7138           /* Handle weird backwards branch with a filled delay slot
7139              which is nullified.  */
7140           if (dbr_sequence_length () != 0
7141               && ! forward_branch_p (insn)
7142               && nullify)
7143             {
7144               strcpy (buf, "bb,");
7145               if (GET_MODE (operands[0]) == DImode)
7146                 strcat (buf, "*");
7147               if ((which == 0 && negated)
7148                     || (which == 1 && ! negated))
7149                 strcat (buf, "<");
7150               else
7151                 strcat (buf, ">=");
7152               if (negated)
7153                 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7154               else
7155                 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7156             }
7157           /* Handle short backwards branch with an unfilled delay slot.
7158              Using a bb;nop rather than extrs;bl saves 1 cycle for both
7159              taken and untaken branches.  */
7160           else if (dbr_sequence_length () == 0
7161                      && ! forward_branch_p (insn)
7162                      && INSN_ADDRESSES_SET_P ()
7163                      && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7164                                             - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7165             {
7166               strcpy (buf, "bb,");
7167               if (GET_MODE (operands[0]) == DImode)
7168                 strcat (buf, "*");
7169               if ((which == 0 && negated)
7170                     || (which == 1 && ! negated))
7171                 strcat (buf, ">=");
7172               else
7173                 strcat (buf, "<");
7174               if (negated)
7175                 strcat (buf, " %0,%1,%3%#");
7176               else
7177                 strcat (buf, " %0,%1,%2%#");
7178             }
7179           else
7180             {
7181               if (GET_MODE (operands[0]) == DImode)
7182                 strcpy (buf, "extrd,s,*");
7183               else
7184                 strcpy (buf, "{extrs,|extrw,s,}");
7185               if ((which == 0 && negated)
7186                     || (which == 1 && ! negated))
7187                 strcat (buf, "<");
7188               else
7189                 strcat (buf, ">=");
7190               if (nullify && negated)
7191                 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7192               else if (nullify && ! negated)
7193                 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7194               else if (negated)
7195                 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7196               else
7197                 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7198             }
7199           break;
7200 
7201       default:
7202           /* The reversed conditional branch must branch over one additional
7203              instruction if the delay slot is filled and needs to be extracted
7204              by pa_output_lbranch.  If the delay slot is empty or this is a
7205              nullified forward branch, the instruction after the reversed
7206              condition branch must be nullified.  */
7207           if (dbr_sequence_length () == 0
7208               || (nullify && forward_branch_p (insn)))
7209             {
7210               nullify = 1;
7211               xdelay = 0;
7212               operands[4] = GEN_INT (length);
7213             }
7214           else
7215             {
7216               xdelay = 1;
7217               operands[4] = GEN_INT (length + 4);
7218             }
7219 
7220           if (GET_MODE (operands[0]) == DImode)
7221             strcpy (buf, "bb,*");
7222           else
7223             strcpy (buf, "bb,");
7224           if ((which == 0 && negated)
7225               || (which == 1 && !negated))
7226             strcat (buf, "<");
7227           else
7228             strcat (buf, ">=");
7229           if (nullify)
7230             strcat (buf, ",n %0,%1,.+%4");
7231           else
7232             strcat (buf, " %0,%1,.+%4");
7233           output_asm_insn (buf, operands);
7234           return pa_output_lbranch (negated ? operands[3] : operands[2],
7235                                           insn, xdelay);
7236     }
7237   return buf;
7238 }
7239 
7240 /* This routine handles all the branch-on-variable-bit conditional branch
7241    sequences we might need to generate.  It handles nullification of delay
7242    slots, varying length branches, negated branches and all combinations
7243    of the above.  it returns the appropriate output template to emit the
7244    branch.  */
7245 
7246 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7247 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7248                  int which)
7249 {
7250   static char buf[100];
7251   bool useskip;
7252   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7253   int length = get_attr_length (insn);
7254   int xdelay;
7255 
7256   /* A conditional branch to the following instruction (e.g. the delay slot) is
7257      asking for a disaster.  I do not think this can happen as this pattern
7258      is only used when optimizing; jump optimization should eliminate the
7259      jump.  But be prepared just in case.  */
7260 
7261   if (branch_to_delay_slot_p (insn))
7262     return "nop";
7263 
7264   /* If this is a long branch with its delay slot unfilled, set `nullify'
7265      as it can nullify the delay slot and save a nop.  */
7266   if (length == 8 && dbr_sequence_length () == 0)
7267     nullify = 1;
7268 
7269   /* If this is a short forward conditional branch which did not get
7270      its delay slot filled, the delay slot can still be nullified.  */
7271   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7272     nullify = forward_branch_p (insn);
7273 
7274   /* A forward branch over a single nullified insn can be done with a
7275      extrs instruction.  This avoids a single cycle penalty due to
7276      mis-predicted branch if we fall through (branch not taken).  */
7277   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7278 
7279   switch (length)
7280     {
7281 
7282       /* All short conditional branches except backwards with an unfilled
7283            delay slot.  */
7284       case 4:
7285           if (useskip)
7286             strcpy (buf, "{vextrs,|extrw,s,}");
7287           else
7288             strcpy (buf, "{bvb,|bb,}");
7289           if (useskip && GET_MODE (operands[0]) == DImode)
7290             strcpy (buf, "extrd,s,*");
7291           else if (GET_MODE (operands[0]) == DImode)
7292             strcpy (buf, "bb,*");
7293           if ((which == 0 && negated)
7294                || (which == 1 && ! negated))
7295             strcat (buf, ">=");
7296           else
7297             strcat (buf, "<");
7298           if (useskip)
7299             strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7300           else if (nullify && negated)
7301             {
7302               if (branch_needs_nop_p (insn))
7303                 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7304               else
7305                 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7306             }
7307           else if (nullify && ! negated)
7308             {
7309               if (branch_needs_nop_p (insn))
7310                 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7311               else
7312                 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7313             }
7314           else if (! nullify && negated)
7315             strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7316           else if (! nullify && ! negated)
7317             strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7318           break;
7319 
7320      /* All long conditionals.  Note a short backward branch with an
7321           unfilled delay slot is treated just like a long backward branch
7322           with an unfilled delay slot.  */
7323       case 8:
7324           /* Handle weird backwards branch with a filled delay slot
7325              which is nullified.  */
7326           if (dbr_sequence_length () != 0
7327               && ! forward_branch_p (insn)
7328               && nullify)
7329             {
7330               strcpy (buf, "{bvb,|bb,}");
7331               if (GET_MODE (operands[0]) == DImode)
7332                 strcat (buf, "*");
7333               if ((which == 0 && negated)
7334                     || (which == 1 && ! negated))
7335                 strcat (buf, "<");
7336               else
7337                 strcat (buf, ">=");
7338               if (negated)
7339                 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7340               else
7341                 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7342             }
7343           /* Handle short backwards branch with an unfilled delay slot.
7344              Using a bb;nop rather than extrs;bl saves 1 cycle for both
7345              taken and untaken branches.  */
7346           else if (dbr_sequence_length () == 0
7347                      && ! forward_branch_p (insn)
7348                      && INSN_ADDRESSES_SET_P ()
7349                      && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7350                                             - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7351             {
7352               strcpy (buf, "{bvb,|bb,}");
7353               if (GET_MODE (operands[0]) == DImode)
7354                 strcat (buf, "*");
7355               if ((which == 0 && negated)
7356                     || (which == 1 && ! negated))
7357                 strcat (buf, ">=");
7358               else
7359                 strcat (buf, "<");
7360               if (negated)
7361                 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7362               else
7363                 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7364             }
7365           else
7366             {
7367               strcpy (buf, "{vextrs,|extrw,s,}");
7368               if (GET_MODE (operands[0]) == DImode)
7369                 strcpy (buf, "extrd,s,*");
7370               if ((which == 0 && negated)
7371                     || (which == 1 && ! negated))
7372                 strcat (buf, "<");
7373               else
7374                 strcat (buf, ">=");
7375               if (nullify && negated)
7376                 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7377               else if (nullify && ! negated)
7378                 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7379               else if (negated)
7380                 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7381               else
7382                 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7383             }
7384           break;
7385 
7386       default:
7387           /* The reversed conditional branch must branch over one additional
7388              instruction if the delay slot is filled and needs to be extracted
7389              by pa_output_lbranch.  If the delay slot is empty or this is a
7390              nullified forward branch, the instruction after the reversed
7391              condition branch must be nullified.  */
7392           if (dbr_sequence_length () == 0
7393               || (nullify && forward_branch_p (insn)))
7394             {
7395               nullify = 1;
7396               xdelay = 0;
7397               operands[4] = GEN_INT (length);
7398             }
7399           else
7400             {
7401               xdelay = 1;
7402               operands[4] = GEN_INT (length + 4);
7403             }
7404 
7405           if (GET_MODE (operands[0]) == DImode)
7406             strcpy (buf, "bb,*");
7407           else
7408             strcpy (buf, "{bvb,|bb,}");
7409           if ((which == 0 && negated)
7410               || (which == 1 && !negated))
7411             strcat (buf, "<");
7412           else
7413             strcat (buf, ">=");
7414           if (nullify)
7415             strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7416           else
7417             strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7418           output_asm_insn (buf, operands);
7419           return pa_output_lbranch (negated ? operands[3] : operands[2],
7420                                           insn, xdelay);
7421     }
7422   return buf;
7423 }
7424 
7425 /* Return the output template for emitting a dbra type insn.
7426 
7427    Note it may perform some output operations on its own before
7428    returning the final output string.  */
7429 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7430 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7431 {
7432   int length = get_attr_length (insn);
7433 
7434   /* A conditional branch to the following instruction (e.g. the delay slot) is
7435      asking for a disaster.  Be prepared!  */
7436 
7437   if (branch_to_delay_slot_p (insn))
7438     {
7439       if (which_alternative == 0)
7440           return "ldo %1(%0),%0";
7441       else if (which_alternative == 1)
7442           {
7443             output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7444             output_asm_insn ("ldw -16(%%r30),%4", operands);
7445             output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7446             return "{fldws|fldw} -16(%%r30),%0";
7447           }
7448       else
7449           {
7450             output_asm_insn ("ldw %0,%4", operands);
7451             return "ldo %1(%4),%4\n\tstw %4,%0";
7452           }
7453     }
7454 
7455   if (which_alternative == 0)
7456     {
7457       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7458       int xdelay;
7459 
7460       /* If this is a long branch with its delay slot unfilled, set `nullify'
7461            as it can nullify the delay slot and save a nop.  */
7462       if (length == 8 && dbr_sequence_length () == 0)
7463           nullify = 1;
7464 
7465       /* If this is a short forward conditional branch which did not get
7466            its delay slot filled, the delay slot can still be nullified.  */
7467       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7468           nullify = forward_branch_p (insn);
7469 
7470       switch (length)
7471           {
7472           case 4:
7473             if (nullify)
7474               {
7475                 if (branch_needs_nop_p (insn))
7476                     return "addib,%C2,n %1,%0,%3%#";
7477                 else
7478                     return "addib,%C2,n %1,%0,%3";
7479               }
7480             else
7481               return "addib,%C2 %1,%0,%3";
7482 
7483           case 8:
7484             /* Handle weird backwards branch with a fulled delay slot
7485                which is nullified.  */
7486             if (dbr_sequence_length () != 0
7487                 && ! forward_branch_p (insn)
7488                 && nullify)
7489               return "addib,%N2,n %1,%0,.+12\n\tb %3";
7490             /* Handle short backwards branch with an unfilled delay slot.
7491                Using a addb;nop rather than addi;bl saves 1 cycle for both
7492                taken and untaken branches.  */
7493             else if (dbr_sequence_length () == 0
7494                        && ! forward_branch_p (insn)
7495                        && INSN_ADDRESSES_SET_P ()
7496                        && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7497                                               - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7498                 return "addib,%C2 %1,%0,%3%#";
7499 
7500             /* Handle normal cases.  */
7501             if (nullify)
7502               return "addi,%N2 %1,%0,%0\n\tb,n %3";
7503             else
7504               return "addi,%N2 %1,%0,%0\n\tb %3";
7505 
7506           default:
7507             /* The reversed conditional branch must branch over one additional
7508                instruction if the delay slot is filled and needs to be extracted
7509                by pa_output_lbranch.  If the delay slot is empty or this is a
7510                nullified forward branch, the instruction after the reversed
7511                condition branch must be nullified.  */
7512             if (dbr_sequence_length () == 0
7513                 || (nullify && forward_branch_p (insn)))
7514               {
7515                 nullify = 1;
7516                 xdelay = 0;
7517                 operands[4] = GEN_INT (length);
7518               }
7519             else
7520               {
7521                 xdelay = 1;
7522                 operands[4] = GEN_INT (length + 4);
7523               }
7524 
7525             if (nullify)
7526               output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7527             else
7528               output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7529 
7530             return pa_output_lbranch (operands[3], insn, xdelay);
7531           }
7532 
7533     }
7534   /* Deal with gross reload from FP register case.  */
7535   else if (which_alternative == 1)
7536     {
7537       /* Move loop counter from FP register to MEM then into a GR,
7538            increment the GR, store the GR into MEM, and finally reload
7539            the FP register from MEM from within the branch's delay slot.  */
7540       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7541                            operands);
7542       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7543       if (length == 24)
7544           return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7545       else if (length == 28)
7546           return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7547       else
7548           {
7549             operands[5] = GEN_INT (length - 16);
7550             output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7551             output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7552             return pa_output_lbranch (operands[3], insn, 0);
7553           }
7554     }
7555   /* Deal with gross reload from memory case.  */
7556   else
7557     {
7558       /* Reload loop counter from memory, the store back to memory
7559            happens in the branch's delay slot.  */
7560       output_asm_insn ("ldw %0,%4", operands);
7561       if (length == 12)
7562           return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7563       else if (length == 16)
7564           return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7565       else
7566           {
7567             operands[5] = GEN_INT (length - 4);
7568             output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7569             return pa_output_lbranch (operands[3], insn, 0);
7570           }
7571     }
7572 }
7573 
7574 /* Return the output template for emitting a movb type insn.
7575 
7576    Note it may perform some output operations on its own before
7577    returning the final output string.  */
7578 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7579 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7580                int reverse_comparison)
7581 {
7582   int length = get_attr_length (insn);
7583 
7584   /* A conditional branch to the following instruction (e.g. the delay slot) is
7585      asking for a disaster.  Be prepared!  */
7586 
7587   if (branch_to_delay_slot_p (insn))
7588     {
7589       if (which_alternative == 0)
7590           return "copy %1,%0";
7591       else if (which_alternative == 1)
7592           {
7593             output_asm_insn ("stw %1,-16(%%r30)", operands);
7594             return "{fldws|fldw} -16(%%r30),%0";
7595           }
7596       else if (which_alternative == 2)
7597           return "stw %1,%0";
7598       else
7599           return "mtsar %r1";
7600     }
7601 
7602   /* Support the second variant.  */
7603   if (reverse_comparison)
7604     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7605 
7606   if (which_alternative == 0)
7607     {
7608       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7609       int xdelay;
7610 
7611       /* If this is a long branch with its delay slot unfilled, set `nullify'
7612            as it can nullify the delay slot and save a nop.  */
7613       if (length == 8 && dbr_sequence_length () == 0)
7614           nullify = 1;
7615 
7616       /* If this is a short forward conditional branch which did not get
7617            its delay slot filled, the delay slot can still be nullified.  */
7618       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7619           nullify = forward_branch_p (insn);
7620 
7621       switch (length)
7622           {
7623           case 4:
7624             if (nullify)
7625               {
7626                 if (branch_needs_nop_p (insn))
7627                     return "movb,%C2,n %1,%0,%3%#";
7628                 else
7629                     return "movb,%C2,n %1,%0,%3";
7630               }
7631             else
7632               return "movb,%C2 %1,%0,%3";
7633 
7634           case 8:
7635             /* Handle weird backwards branch with a filled delay slot
7636                which is nullified.  */
7637             if (dbr_sequence_length () != 0
7638                 && ! forward_branch_p (insn)
7639                 && nullify)
7640               return "movb,%N2,n %1,%0,.+12\n\tb %3";
7641 
7642             /* Handle short backwards branch with an unfilled delay slot.
7643                Using a movb;nop rather than or;bl saves 1 cycle for both
7644                taken and untaken branches.  */
7645             else if (dbr_sequence_length () == 0
7646                        && ! forward_branch_p (insn)
7647                        && INSN_ADDRESSES_SET_P ()
7648                        && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7649                                               - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7650               return "movb,%C2 %1,%0,%3%#";
7651             /* Handle normal cases.  */
7652             if (nullify)
7653               return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7654             else
7655               return "or,%N2 %1,%%r0,%0\n\tb %3";
7656 
7657           default:
7658             /* The reversed conditional branch must branch over one additional
7659                instruction if the delay slot is filled and needs to be extracted
7660                by pa_output_lbranch.  If the delay slot is empty or this is a
7661                nullified forward branch, the instruction after the reversed
7662                condition branch must be nullified.  */
7663             if (dbr_sequence_length () == 0
7664                 || (nullify && forward_branch_p (insn)))
7665               {
7666                 nullify = 1;
7667                 xdelay = 0;
7668                 operands[4] = GEN_INT (length);
7669               }
7670             else
7671               {
7672                 xdelay = 1;
7673                 operands[4] = GEN_INT (length + 4);
7674               }
7675 
7676             if (nullify)
7677               output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7678             else
7679               output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7680 
7681             return pa_output_lbranch (operands[3], insn, xdelay);
7682           }
7683     }
7684   /* Deal with gross reload for FP destination register case.  */
7685   else if (which_alternative == 1)
7686     {
7687       /* Move source register to MEM, perform the branch test, then
7688            finally load the FP register from MEM from within the branch's
7689            delay slot.  */
7690       output_asm_insn ("stw %1,-16(%%r30)", operands);
7691       if (length == 12)
7692           return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7693       else if (length == 16)
7694           return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7695       else
7696           {
7697             operands[4] = GEN_INT (length - 4);
7698             output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7699             output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7700             return pa_output_lbranch (operands[3], insn, 0);
7701           }
7702     }
7703   /* Deal with gross reload from memory case.  */
7704   else if (which_alternative == 2)
7705     {
7706       /* Reload loop counter from memory, the store back to memory
7707            happens in the branch's delay slot.  */
7708       if (length == 8)
7709           return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7710       else if (length == 12)
7711           return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7712       else
7713           {
7714             operands[4] = GEN_INT (length);
7715             output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7716                                  operands);
7717             return pa_output_lbranch (operands[3], insn, 0);
7718           }
7719     }
7720   /* Handle SAR as a destination.  */
7721   else
7722     {
7723       if (length == 8)
7724           return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7725       else if (length == 12)
7726           return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7727       else
7728           {
7729             operands[4] = GEN_INT (length);
7730             output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7731                                  operands);
7732             return pa_output_lbranch (operands[3], insn, 0);
7733           }
7734     }
7735 }
7736 
7737 /* Copy any FP arguments in INSN into integer registers.  */
7738 static void
copy_fp_args(rtx_insn * insn)7739 copy_fp_args (rtx_insn *insn)
7740 {
7741   rtx link;
7742   rtx xoperands[2];
7743 
7744   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7745     {
7746       int arg_mode, regno;
7747       rtx use = XEXP (link, 0);
7748 
7749       if (! (GET_CODE (use) == USE
7750             && GET_CODE (XEXP (use, 0)) == REG
7751             && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7752           continue;
7753 
7754       arg_mode = GET_MODE (XEXP (use, 0));
7755       regno = REGNO (XEXP (use, 0));
7756 
7757       /* Is it a floating point register?  */
7758       if (regno >= 32 && regno <= 39)
7759           {
7760             /* Copy the FP register into an integer register via memory.  */
7761             if (arg_mode == SFmode)
7762               {
7763                 xoperands[0] = XEXP (use, 0);
7764                 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7765                 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7766                 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7767               }
7768             else
7769               {
7770                 xoperands[0] = XEXP (use, 0);
7771                 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7772                 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7773                 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7774                 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7775               }
7776           }
7777     }
7778 }
7779 
7780 /* Compute length of the FP argument copy sequence for INSN.  */
7781 static int
length_fp_args(rtx_insn * insn)7782 length_fp_args (rtx_insn *insn)
7783 {
7784   int length = 0;
7785   rtx link;
7786 
7787   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7788     {
7789       int arg_mode, regno;
7790       rtx use = XEXP (link, 0);
7791 
7792       if (! (GET_CODE (use) == USE
7793             && GET_CODE (XEXP (use, 0)) == REG
7794             && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7795           continue;
7796 
7797       arg_mode = GET_MODE (XEXP (use, 0));
7798       regno = REGNO (XEXP (use, 0));
7799 
7800       /* Is it a floating point register?  */
7801       if (regno >= 32 && regno <= 39)
7802           {
7803             if (arg_mode == SFmode)
7804               length += 8;
7805             else
7806               length += 12;
7807           }
7808     }
7809 
7810   return length;
7811 }
7812 
7813 /* Return the attribute length for the millicode call instruction INSN.
7814    The length must match the code generated by pa_output_millicode_call.
7815    We include the delay slot in the returned length as it is better to
7816    over estimate the length than to under estimate it.  */
7817 
7818 int
pa_attr_length_millicode_call(rtx_insn * insn)7819 pa_attr_length_millicode_call (rtx_insn *insn)
7820 {
7821   unsigned long distance = -1;
7822   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7823 
7824   if (INSN_ADDRESSES_SET_P ())
7825     {
7826       distance = (total + insn_current_reference_address (insn));
7827       if (distance < total)
7828           distance = -1;
7829     }
7830 
7831   if (TARGET_64BIT)
7832     {
7833       if (!TARGET_LONG_CALLS && distance < 7600000)
7834           return 8;
7835 
7836       return 20;
7837     }
7838   else if (TARGET_PORTABLE_RUNTIME)
7839     return 24;
7840   else
7841     {
7842       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7843           return 8;
7844 
7845       if (!flag_pic)
7846           return 12;
7847 
7848       return 24;
7849     }
7850 }
7851 
7852 /* INSN is a function call.
7853 
7854    CALL_DEST is the routine we are calling.  */
7855 
7856 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7857 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7858 {
7859   int attr_length = get_attr_length (insn);
7860   int seq_length = dbr_sequence_length ();
7861   rtx xoperands[4];
7862 
7863   xoperands[0] = call_dest;
7864 
7865   /* Handle the common case where we are sure that the branch will
7866      reach the beginning of the $CODE$ subspace.  The within reach
7867      form of the $$sh_func_adrs call has a length of 28.  Because it
7868      has an attribute type of sh_func_adrs, it never has a nonzero
7869      sequence length (i.e., the delay slot is never filled).  */
7870   if (!TARGET_LONG_CALLS
7871       && (attr_length == 8
7872             || (attr_length == 28
7873                 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7874     {
7875       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7876       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7877     }
7878   else
7879     {
7880       if (TARGET_64BIT)
7881           {
7882             /* It might seem that one insn could be saved by accessing
7883                the millicode function using the linkage table.  However,
7884                this doesn't work in shared libraries and other dynamically
7885                loaded objects.  Using a pc-relative sequence also avoids
7886                problems related to the implicit use of the gp register.  */
7887             xoperands[1] = gen_rtx_REG (Pmode, 1);
7888             xoperands[2] = xoperands[1];
7889             pa_output_pic_pcrel_sequence (xoperands);
7890             output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7891           }
7892       else if (TARGET_PORTABLE_RUNTIME)
7893           {
7894             /* Pure portable runtime doesn't allow be/ble; we also don't
7895                have PIC support in the assembler/linker, so this sequence
7896                is needed.  */
7897 
7898             /* Get the address of our target into %r1.  */
7899             output_asm_insn ("ldil L'%0,%%r1", xoperands);
7900             output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7901 
7902             /* Get our return address into %r31.  */
7903             output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7904             output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7905 
7906             /* Jump to our target address in %r1.  */
7907             output_asm_insn ("bv %%r0(%%r1)", xoperands);
7908           }
7909       else if (!flag_pic)
7910           {
7911             output_asm_insn ("ldil L'%0,%%r1", xoperands);
7912             if (TARGET_PA_20)
7913               output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7914             else
7915               output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7916           }
7917       else
7918           {
7919             xoperands[1] = gen_rtx_REG (Pmode, 31);
7920             xoperands[2] = gen_rtx_REG (Pmode, 1);
7921             pa_output_pic_pcrel_sequence (xoperands);
7922 
7923             /* Adjust return address.  */
7924             output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7925 
7926             /* Jump to our target address in %r1.  */
7927             output_asm_insn ("bv %%r0(%%r1)", xoperands);
7928           }
7929     }
7930 
7931   if (seq_length == 0)
7932     output_asm_insn ("nop", xoperands);
7933 
7934   return "";
7935 }
7936 
7937 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7938    flag indicates whether INSN is a regular call or a sibling call.  The
7939    length returned must be longer than the code actually generated by
7940    pa_output_call.  Since branch shortening is done before delay branch
7941    sequencing, there is no way to determine whether or not the delay
7942    slot will be filled during branch shortening.  Even when the delay
7943    slot is filled, we may have to add a nop if the delay slot contains
7944    a branch that can't reach its target.  Thus, we always have to include
7945    the delay slot in the length estimate.  This used to be done in
7946    pa_adjust_insn_length but we do it here now as some sequences always
7947    fill the delay slot and we can save four bytes in the estimate for
7948    these sequences.  */
7949 
7950 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7951 pa_attr_length_call (rtx_insn *insn, int sibcall)
7952 {
7953   int local_call;
7954   rtx call, call_dest;
7955   tree call_decl;
7956   int length = 0;
7957   rtx pat = PATTERN (insn);
7958   unsigned long distance = -1;
7959 
7960   gcc_assert (CALL_P (insn));
7961 
7962   if (INSN_ADDRESSES_SET_P ())
7963     {
7964       unsigned long total;
7965 
7966       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7967       distance = (total + insn_current_reference_address (insn));
7968       if (distance < total)
7969           distance = -1;
7970     }
7971 
7972   gcc_assert (GET_CODE (pat) == PARALLEL);
7973 
7974   /* Get the call rtx.  */
7975   call = XVECEXP (pat, 0, 0);
7976   if (GET_CODE (call) == SET)
7977     call = SET_SRC (call);
7978 
7979   gcc_assert (GET_CODE (call) == CALL);
7980 
7981   /* Determine if this is a local call.  */
7982   call_dest = XEXP (XEXP (call, 0), 0);
7983   call_decl = SYMBOL_REF_DECL (call_dest);
7984   local_call = call_decl && targetm.binds_local_p (call_decl);
7985 
7986   /* pc-relative branch.  */
7987   if (!TARGET_LONG_CALLS
7988       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7989             || distance < MAX_PCREL17F_OFFSET))
7990     length += 8;
7991 
7992   /* 64-bit plabel sequence.  */
7993   else if (TARGET_64BIT && !local_call)
7994     length += 24;
7995 
7996   /* non-pic long absolute branch sequence.  */
7997   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7998     length += 12;
7999 
8000   /* long pc-relative branch sequence.  */
8001   else if (TARGET_LONG_PIC_SDIFF_CALL
8002              || (TARGET_GAS && !TARGET_SOM && local_call))
8003     {
8004       length += 20;
8005 
8006       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8007           length += 8;
8008     }
8009 
8010   /* 32-bit plabel sequence.  */
8011   else
8012     {
8013       length += 32;
8014 
8015       if (TARGET_SOM)
8016           length += length_fp_args (insn);
8017 
8018       if (flag_pic)
8019           length += 4;
8020 
8021       if (!TARGET_PA_20)
8022           {
8023             if (!sibcall)
8024               length += 8;
8025 
8026             if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8027               length += 8;
8028           }
8029     }
8030 
8031   return length;
8032 }
8033 
8034 /* INSN is a function call.
8035 
8036    CALL_DEST is the routine we are calling.  */
8037 
8038 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8039 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8040 {
8041   int seq_length = dbr_sequence_length ();
8042   tree call_decl = SYMBOL_REF_DECL (call_dest);
8043   int local_call = call_decl && targetm.binds_local_p (call_decl);
8044   rtx xoperands[4];
8045 
8046   xoperands[0] = call_dest;
8047 
8048   /* Handle the common case where we're sure that the branch will reach
8049      the beginning of the "$CODE$" subspace.  This is the beginning of
8050      the current function if we are in a named section.  */
8051   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8052     {
8053       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8054       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8055     }
8056   else
8057     {
8058       if (TARGET_64BIT && !local_call)
8059           {
8060             /* ??? As far as I can tell, the HP linker doesn't support the
8061                long pc-relative sequence described in the 64-bit runtime
8062                architecture.  So, we use a slightly longer indirect call.  */
8063             xoperands[0] = pa_get_deferred_plabel (call_dest);
8064             xoperands[1] = gen_label_rtx ();
8065 
8066             /* Put the load of %r27 into the delay slot.  We don't need to
8067                do anything when generating fast indirect calls.  */
8068             if (seq_length != 0)
8069               {
8070                 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8071                                      optimize, 0, NULL);
8072 
8073                 /* Now delete the delay insn.  */
8074                 SET_INSN_DELETED (NEXT_INSN (insn));
8075               }
8076 
8077             output_asm_insn ("addil LT'%0,%%r27", xoperands);
8078             output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8079             output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8080             output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8081             output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8082             output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8083             seq_length = 1;
8084           }
8085       else
8086           {
8087             int indirect_call = 0;
8088 
8089             /* Emit a long call.  There are several different sequences
8090                of increasing length and complexity.  In most cases,
8091              they don't allow an instruction in the delay slot.  */
8092             if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8093                 && !TARGET_LONG_PIC_SDIFF_CALL
8094                 && !(TARGET_GAS && !TARGET_SOM && local_call)
8095                 && !TARGET_64BIT)
8096               indirect_call = 1;
8097 
8098             if (seq_length != 0
8099                 && !sibcall
8100                 && (!TARGET_PA_20
8101                       || indirect_call
8102                       || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8103               {
8104                 /* A non-jump insn in the delay slot.  By definition we can
8105                      emit this insn before the call (and in fact before argument
8106                      relocating.  */
8107                 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8108                                      NULL);
8109 
8110                 /* Now delete the delay insn.  */
8111                 SET_INSN_DELETED (NEXT_INSN (insn));
8112                 seq_length = 0;
8113               }
8114 
8115             if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8116               {
8117                 /* This is the best sequence for making long calls in
8118                      non-pic code.  Unfortunately, GNU ld doesn't provide
8119                      the stub needed for external calls, and GAS's support
8120                      for this with the SOM linker is buggy.  It is safe
8121                      to use this for local calls.  */
8122                 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8123                 if (sibcall)
8124                     output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8125                 else
8126                     {
8127                       if (TARGET_PA_20)
8128                         output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8129                                              xoperands);
8130                       else
8131                         output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8132 
8133                       output_asm_insn ("copy %%r31,%%r2", xoperands);
8134                       seq_length = 1;
8135                     }
8136               }
8137             else
8138               {
8139                 /* The HP assembler and linker can handle relocations for
8140                      the difference of two symbols.  The HP assembler
8141                      recognizes the sequence as a pc-relative call and
8142                      the linker provides stubs when needed.  */
8143 
8144                 /* GAS currently can't generate the relocations that
8145                      are needed for the SOM linker under HP-UX using this
8146                      sequence.  The GNU linker doesn't generate the stubs
8147                      that are needed for external calls on TARGET_ELF32
8148                      with this sequence.  For now, we have to use a longer
8149                    plabel sequence when using GAS for non local calls.  */
8150                 if (TARGET_LONG_PIC_SDIFF_CALL
8151                       || (TARGET_GAS && !TARGET_SOM && local_call))
8152                     {
8153                       xoperands[1] = gen_rtx_REG (Pmode, 1);
8154                       xoperands[2] = xoperands[1];
8155                       pa_output_pic_pcrel_sequence (xoperands);
8156                     }
8157                 else
8158                     {
8159                       /* Emit a long plabel-based call sequence.  This is
8160                          essentially an inline implementation of $$dyncall.
8161                          We don't actually try to call $$dyncall as this is
8162                          as difficult as calling the function itself.  */
8163                       xoperands[0] = pa_get_deferred_plabel (call_dest);
8164                       xoperands[1] = gen_label_rtx ();
8165 
8166                       /* Since the call is indirect, FP arguments in registers
8167                          need to be copied to the general registers.  Then, the
8168                          argument relocation stub will copy them back.  */
8169                       if (TARGET_SOM)
8170                         copy_fp_args (insn);
8171 
8172                       if (flag_pic)
8173                         {
8174                           output_asm_insn ("addil LT'%0,%%r19", xoperands);
8175                           output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8176                           output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8177                         }
8178                       else
8179                         {
8180                           output_asm_insn ("addil LR'%0-$global$,%%r27",
8181                                                xoperands);
8182                           output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8183                                                xoperands);
8184                         }
8185 
8186                       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8187                       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8188                       /* Should this be an ordered load to ensure the target
8189                        address is loaded before the global pointer?  */
8190                       output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8191                       output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8192 
8193                       if (!sibcall && !TARGET_PA_20)
8194                         {
8195                           output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8196                           if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8197                               output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8198                           else
8199                               output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8200                         }
8201                     }
8202 
8203                 if (TARGET_PA_20)
8204                     {
8205                       if (sibcall)
8206                         output_asm_insn ("bve (%%r1)", xoperands);
8207                       else
8208                         {
8209                           if (indirect_call)
8210                               {
8211                                 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8212                                 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8213                                 seq_length = 1;
8214                               }
8215                           else
8216                               output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8217                         }
8218                     }
8219                 else
8220                     {
8221                       if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8222                         output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8223                                              xoperands);
8224 
8225                       if (sibcall)
8226                         {
8227                           if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8228                               output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8229                           else
8230                               output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8231                         }
8232                       else
8233                         {
8234                           if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8235                               output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8236                           else
8237                               output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8238 
8239                           if (indirect_call)
8240                               output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8241                           else
8242                               output_asm_insn ("copy %%r31,%%r2", xoperands);
8243                           seq_length = 1;
8244                         }
8245                     }
8246               }
8247           }
8248     }
8249 
8250   if (seq_length == 0)
8251     output_asm_insn ("nop", xoperands);
8252 
8253   return "";
8254 }
8255 
8256 /* Return the attribute length of the indirect call instruction INSN.
8257    The length must match the code generated by output_indirect call.
8258    The returned length includes the delay slot.  Currently, the delay
8259    slot of an indirect call sequence is not exposed and it is used by
8260    the sequence itself.  */
8261 
8262 int
pa_attr_length_indirect_call(rtx_insn * insn)8263 pa_attr_length_indirect_call (rtx_insn *insn)
8264 {
8265   unsigned long distance = -1;
8266   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8267 
8268   if (INSN_ADDRESSES_SET_P ())
8269     {
8270       distance = (total + insn_current_reference_address (insn));
8271       if (distance < total)
8272           distance = -1;
8273     }
8274 
8275   if (TARGET_64BIT)
8276     return 12;
8277 
8278   if (TARGET_FAST_INDIRECT_CALLS)
8279     return 8;
8280 
8281   if (TARGET_PORTABLE_RUNTIME)
8282     return 16;
8283 
8284   if (!TARGET_LONG_CALLS
8285       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8286             || distance < MAX_PCREL17F_OFFSET))
8287     return 8;
8288 
8289   /* Out of reach, can use ble.  */
8290   if (!flag_pic)
8291     return 12;
8292 
8293   /* Inline versions of $$dyncall.  */
8294   if (!optimize_size)
8295     {
8296       if (TARGET_NO_SPACE_REGS)
8297           return 28;
8298 
8299       if (TARGET_PA_20)
8300           return 32;
8301     }
8302 
8303   /* Long PIC pc-relative call.  */
8304   return 20;
8305 }
8306 
8307 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8308 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8309 {
8310   rtx xoperands[4];
8311   int length;
8312 
8313   if (TARGET_64BIT)
8314     {
8315       xoperands[0] = call_dest;
8316       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8317                            "bve,l (%%r2),%%r2\n\t"
8318                            "ldd 24(%0),%%r27", xoperands);
8319       return "";
8320     }
8321 
8322   /* First the special case for kernels, level 0 systems, etc.  */
8323   if (TARGET_FAST_INDIRECT_CALLS)
8324     {
8325       pa_output_arg_descriptor (insn);
8326       if (TARGET_PA_20)
8327           return "bve,l,n (%%r22),%%r2\n\tnop";
8328       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8329     }
8330 
8331   if (TARGET_PORTABLE_RUNTIME)
8332     {
8333       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8334                            "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8335       pa_output_arg_descriptor (insn);
8336       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8337     }
8338 
8339   /* Now the normal case -- we can reach $$dyncall directly or
8340      we're sure that we can get there via a long-branch stub.
8341 
8342      No need to check target flags as the length uniquely identifies
8343      the remaining cases.  */
8344   length = pa_attr_length_indirect_call (insn);
8345   if (length == 8)
8346     {
8347       pa_output_arg_descriptor (insn);
8348 
8349       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8350            $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8351            variant of the B,L instruction can't be used on the SOM target.  */
8352       if (TARGET_PA_20 && !TARGET_SOM)
8353           return "b,l,n $$dyncall,%%r2\n\tnop";
8354       else
8355           return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8356     }
8357 
8358   /* Long millicode call, but we are not generating PIC or portable runtime
8359      code.  */
8360   if (length == 12)
8361     {
8362       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8363       pa_output_arg_descriptor (insn);
8364       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8365     }
8366 
8367   /* The long PIC pc-relative call sequence is five instructions.  So,
8368      let's use an inline version of $$dyncall when the calling sequence
8369      has a roughly similar number of instructions and we are not optimizing
8370      for size.  We need two instructions to load the return pointer plus
8371      the $$dyncall implementation.  */
8372   if (!optimize_size)
8373     {
8374       if (TARGET_NO_SPACE_REGS)
8375           {
8376             pa_output_arg_descriptor (insn);
8377             output_asm_insn ("bl .+8,%%r2\n\t"
8378                                  "ldo 20(%%r2),%%r2\n\t"
8379                                  "extru,<> %%r22,30,1,%%r0\n\t"
8380                                  "bv,n %%r0(%%r22)\n\t"
8381                                  "ldw -2(%%r22),%%r21\n\t"
8382                                  "bv %%r0(%%r21)\n\t"
8383                                  "ldw 2(%%r22),%%r19", xoperands);
8384             return "";
8385           }
8386       if (TARGET_PA_20)
8387           {
8388             pa_output_arg_descriptor (insn);
8389             output_asm_insn ("bl .+8,%%r2\n\t"
8390                                  "ldo 24(%%r2),%%r2\n\t"
8391                                  "stw %%r2,-24(%%sp)\n\t"
8392                                  "extru,<> %r22,30,1,%%r0\n\t"
8393                                  "bve,n (%%r22)\n\t"
8394                                  "ldw -2(%%r22),%%r21\n\t"
8395                                  "bve (%%r21)\n\t"
8396                                  "ldw 2(%%r22),%%r19", xoperands);
8397             return "";
8398           }
8399     }
8400 
8401   /* We need a long PIC call to $$dyncall.  */
8402   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8403   xoperands[1] = gen_rtx_REG (Pmode, 2);
8404   xoperands[2] = gen_rtx_REG (Pmode, 1);
8405   pa_output_pic_pcrel_sequence (xoperands);
8406   pa_output_arg_descriptor (insn);
8407   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8408 }
8409 
8410 /* In HPUX 8.0's shared library scheme, special relocations are needed
8411    for function labels if they might be passed to a function
8412    in a shared library (because shared libraries don't live in code
8413    space), and special magic is needed to construct their address.  */
8414 
8415 void
pa_encode_label(rtx sym)8416 pa_encode_label (rtx sym)
8417 {
8418   const char *str = XSTR (sym, 0);
8419   int len = strlen (str) + 1;
8420   char *newstr, *p;
8421 
8422   p = newstr = XALLOCAVEC (char, len + 1);
8423   *p++ = '@';
8424   strcpy (p, str);
8425 
8426   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8427 }
8428 
8429 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8430 pa_encode_section_info (tree decl, rtx rtl, int first)
8431 {
8432   int old_referenced = 0;
8433 
8434   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8435     old_referenced
8436       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8437 
8438   default_encode_section_info (decl, rtl, first);
8439 
8440   if (first && TEXT_SPACE_P (decl))
8441     {
8442       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8443       if (TREE_CODE (decl) == FUNCTION_DECL)
8444           pa_encode_label (XEXP (rtl, 0));
8445     }
8446   else if (old_referenced)
8447     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8448 }
8449 
8450 /* This is sort of inverse to pa_encode_section_info.  */
8451 
8452 static const char *
pa_strip_name_encoding(const char * str)8453 pa_strip_name_encoding (const char *str)
8454 {
8455   str += (*str == '@');
8456   str += (*str == '*');
8457   return str;
8458 }
8459 
8460 /* Returns 1 if OP is a function label involved in a simple addition
8461    with a constant.  Used to keep certain patterns from matching
8462    during instruction combination.  */
8463 int
pa_is_function_label_plus_const(rtx op)8464 pa_is_function_label_plus_const (rtx op)
8465 {
8466   /* Strip off any CONST.  */
8467   if (GET_CODE (op) == CONST)
8468     op = XEXP (op, 0);
8469 
8470   return (GET_CODE (op) == PLUS
8471             && function_label_operand (XEXP (op, 0), VOIDmode)
8472             && GET_CODE (XEXP (op, 1)) == CONST_INT);
8473 }
8474 
8475 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8476    declaration for the thunk function itself, FUNCTION is the decl for
8477    the target function.  DELTA is an immediate constant offset to be
8478    added to THIS.  If VCALL_OFFSET is nonzero, the word at
8479    *(*this + vcall_offset) should be added to THIS.  */
8480 
8481 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8482 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8483                               HOST_WIDE_INT vcall_offset, tree function)
8484 {
8485   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8486   static unsigned int current_thunk_number;
8487   int val_14 = VAL_14_BITS_P (delta);
8488   unsigned int old_last_address = last_address, nbytes = 0;
8489   char label[17];
8490   rtx xoperands[4];
8491 
8492   xoperands[0] = XEXP (DECL_RTL (function), 0);
8493   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8494   xoperands[2] = GEN_INT (delta);
8495 
8496   assemble_start_function (thunk_fndecl, fnname);
8497   final_start_function (emit_barrier (), file, 1);
8498 
8499   if (!vcall_offset)
8500     {
8501       /* Output the thunk.  We know that the function is in the same
8502            translation unit (i.e., the same space) as the thunk, and that
8503            thunks are output after their method.  Thus, we don't need an
8504            external branch to reach the function.  With SOM and GAS,
8505            functions and thunks are effectively in different sections.
8506            Thus, we can always use a IA-relative branch and the linker
8507            will add a long branch stub if necessary.
8508 
8509            However, we have to be careful when generating PIC code on the
8510            SOM port to ensure that the sequence does not transfer to an
8511            import stub for the target function as this could clobber the
8512            return value saved at SP-24.  This would also apply to the
8513           32-bit linux port if the multi-space model is implemented.  */
8514       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8515              && !(flag_pic && TREE_PUBLIC (function))
8516              && (TARGET_GAS || last_address < 262132))
8517             || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8518                 && ((targetm_common.have_named_sections
8519                        && DECL_SECTION_NAME (thunk_fndecl) != NULL
8520                        /* The GNU 64-bit linker has rather poor stub management.
8521                           So, we use a long branch from thunks that aren't in
8522                           the same section as the target function.  */
8523                         && ((!TARGET_64BIT
8524                                && (DECL_SECTION_NAME (thunk_fndecl)
8525                                    != DECL_SECTION_NAME (function)))
8526                               || ((DECL_SECTION_NAME (thunk_fndecl)
8527                                    == DECL_SECTION_NAME (function))
8528                                   && last_address < 262132)))
8529                       /* In this case, we need to be able to reach the start of
8530                          the stub table even though the function is likely closer
8531                          and can be jumped to directly.  */
8532                       || (targetm_common.have_named_sections
8533                           && DECL_SECTION_NAME (thunk_fndecl) == NULL
8534                           && DECL_SECTION_NAME (function) == NULL
8535                           && total_code_bytes < MAX_PCREL17F_OFFSET)
8536                       /* Likewise.  */
8537                       || (!targetm_common.have_named_sections
8538                           && total_code_bytes < MAX_PCREL17F_OFFSET))))
8539           {
8540             if (!val_14)
8541               output_asm_insn ("addil L'%2,%%r26", xoperands);
8542 
8543             output_asm_insn ("b %0", xoperands);
8544 
8545             if (val_14)
8546               {
8547                 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8548                 nbytes += 8;
8549               }
8550             else
8551               {
8552                 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8553                 nbytes += 12;
8554               }
8555           }
8556       else if (TARGET_64BIT)
8557           {
8558             rtx xop[4];
8559 
8560             /* We only have one call-clobbered scratch register, so we can't
8561                make use of the delay slot if delta doesn't fit in 14 bits.  */
8562             if (!val_14)
8563               {
8564                 output_asm_insn ("addil L'%2,%%r26", xoperands);
8565                 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8566               }
8567 
8568             /* Load function address into %r1.  */
8569             xop[0] = xoperands[0];
8570             xop[1] = gen_rtx_REG (Pmode, 1);
8571             xop[2] = xop[1];
8572             pa_output_pic_pcrel_sequence (xop);
8573 
8574             if (val_14)
8575               {
8576                 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8577                 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8578                 nbytes += 20;
8579               }
8580             else
8581               {
8582                 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8583                 nbytes += 24;
8584               }
8585           }
8586       else if (TARGET_PORTABLE_RUNTIME)
8587           {
8588             output_asm_insn ("ldil L'%0,%%r1", xoperands);
8589             output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8590 
8591             if (!val_14)
8592               output_asm_insn ("ldil L'%2,%%r26", xoperands);
8593 
8594             output_asm_insn ("bv %%r0(%%r22)", xoperands);
8595 
8596             if (val_14)
8597               {
8598                 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8599                 nbytes += 16;
8600               }
8601             else
8602               {
8603                 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8604                 nbytes += 20;
8605               }
8606           }
8607       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8608           {
8609             /* The function is accessible from outside this module.  The only
8610                way to avoid an import stub between the thunk and function is to
8611                call the function directly with an indirect sequence similar to
8612                that used by $$dyncall.  This is possible because $$dyncall acts
8613                as the import stub in an indirect call.  */
8614             ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8615             xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8616             output_asm_insn ("addil LT'%3,%%r19", xoperands);
8617             output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8618             output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8619             output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8620             output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8621             output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8622             output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8623 
8624             if (!val_14)
8625               {
8626                 output_asm_insn ("addil L'%2,%%r26", xoperands);
8627                 nbytes += 4;
8628               }
8629 
8630             if (TARGET_PA_20)
8631               {
8632                 output_asm_insn ("bve (%%r22)", xoperands);
8633                 nbytes += 36;
8634               }
8635             else if (TARGET_NO_SPACE_REGS)
8636               {
8637                 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8638                 nbytes += 36;
8639               }
8640             else
8641               {
8642                 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8643                 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8644                 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8645                 nbytes += 44;
8646               }
8647 
8648             if (val_14)
8649               output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8650             else
8651               output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8652           }
8653       else if (flag_pic)
8654           {
8655             rtx xop[4];
8656 
8657             /* Load function address into %r22.  */
8658             xop[0] = xoperands[0];
8659             xop[1] = gen_rtx_REG (Pmode, 1);
8660             xop[2] = gen_rtx_REG (Pmode, 22);
8661             pa_output_pic_pcrel_sequence (xop);
8662 
8663             if (!val_14)
8664               output_asm_insn ("addil L'%2,%%r26", xoperands);
8665 
8666             output_asm_insn ("bv %%r0(%%r22)", xoperands);
8667 
8668             if (val_14)
8669               {
8670                 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8671                 nbytes += 20;
8672               }
8673             else
8674               {
8675                 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8676                 nbytes += 24;
8677               }
8678           }
8679       else
8680           {
8681             if (!val_14)
8682               output_asm_insn ("addil L'%2,%%r26", xoperands);
8683 
8684             output_asm_insn ("ldil L'%0,%%r22", xoperands);
8685             output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8686 
8687             if (val_14)
8688               {
8689                 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8690                 nbytes += 12;
8691               }
8692             else
8693               {
8694                 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8695                 nbytes += 16;
8696               }
8697           }
8698     }
8699   else
8700     {
8701       rtx xop[4];
8702 
8703       /* Add DELTA to THIS.  */
8704       if (val_14)
8705           {
8706             output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8707             nbytes += 4;
8708           }
8709       else
8710           {
8711             output_asm_insn ("addil L'%2,%%r26", xoperands);
8712             output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8713             nbytes += 8;
8714           }
8715 
8716       if (TARGET_64BIT)
8717           {
8718             /* Load *(THIS + DELTA) to %r1.  */
8719             output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8720 
8721             val_14 = VAL_14_BITS_P (vcall_offset);
8722             xoperands[2] = GEN_INT (vcall_offset);
8723 
8724             /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8725             if (val_14)
8726               {
8727                 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8728                 nbytes += 8;
8729               }
8730             else
8731               {
8732                 output_asm_insn ("addil L'%2,%%r1", xoperands);
8733                 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8734                 nbytes += 12;
8735               }
8736           }
8737       else
8738           {
8739             /* Load *(THIS + DELTA) to %r1.  */
8740             output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8741 
8742             val_14 = VAL_14_BITS_P (vcall_offset);
8743             xoperands[2] = GEN_INT (vcall_offset);
8744 
8745             /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8746             if (val_14)
8747               {
8748                 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8749                 nbytes += 8;
8750               }
8751             else
8752               {
8753                 output_asm_insn ("addil L'%2,%%r1", xoperands);
8754                 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8755                 nbytes += 12;
8756               }
8757           }
8758 
8759       /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible.  */
8760       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8761              && !(flag_pic && TREE_PUBLIC (function))
8762              && (TARGET_GAS || last_address < 262132))
8763             || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8764                 && ((targetm_common.have_named_sections
8765                        && DECL_SECTION_NAME (thunk_fndecl) != NULL
8766                        /* The GNU 64-bit linker has rather poor stub management.
8767                           So, we use a long branch from thunks that aren't in
8768                           the same section as the target function.  */
8769                         && ((!TARGET_64BIT
8770                                && (DECL_SECTION_NAME (thunk_fndecl)
8771                                    != DECL_SECTION_NAME (function)))
8772                               || ((DECL_SECTION_NAME (thunk_fndecl)
8773                                    == DECL_SECTION_NAME (function))
8774                                   && last_address < 262132)))
8775                       /* In this case, we need to be able to reach the start of
8776                          the stub table even though the function is likely closer
8777                          and can be jumped to directly.  */
8778                       || (targetm_common.have_named_sections
8779                           && DECL_SECTION_NAME (thunk_fndecl) == NULL
8780                           && DECL_SECTION_NAME (function) == NULL
8781                           && total_code_bytes < MAX_PCREL17F_OFFSET)
8782                       /* Likewise.  */
8783                       || (!targetm_common.have_named_sections
8784                           && total_code_bytes < MAX_PCREL17F_OFFSET))))
8785           {
8786             nbytes += 4;
8787             output_asm_insn ("b %0", xoperands);
8788 
8789             /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8790             output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8791           }
8792       else if (TARGET_64BIT)
8793           {
8794             /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8795             output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8796 
8797             /* Load function address into %r1.  */
8798             nbytes += 16;
8799             xop[0] = xoperands[0];
8800             xop[1] = gen_rtx_REG (Pmode, 1);
8801             xop[2] = xop[1];
8802             pa_output_pic_pcrel_sequence (xop);
8803 
8804             output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8805           }
8806       else if (TARGET_PORTABLE_RUNTIME)
8807           {
8808             /* Load function address into %r22.  */
8809             nbytes += 12;
8810             output_asm_insn ("ldil L'%0,%%r22", xoperands);
8811             output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8812 
8813             output_asm_insn ("bv %%r0(%%r22)", xoperands);
8814 
8815             /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8816             output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8817           }
8818       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8819           {
8820             /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8821             output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8822 
8823             /* The function is accessible from outside this module.  The only
8824                way to avoid an import stub between the thunk and function is to
8825                call the function directly with an indirect sequence similar to
8826                that used by $$dyncall.  This is possible because $$dyncall acts
8827                as the import stub in an indirect call.  */
8828             ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8829             xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8830             output_asm_insn ("addil LT'%3,%%r19", xoperands);
8831             output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8832             output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8833             output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8834             output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8835             output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8836             output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8837 
8838             if (TARGET_PA_20)
8839               {
8840                 output_asm_insn ("bve,n (%%r22)", xoperands);
8841                 nbytes += 32;
8842               }
8843             else if (TARGET_NO_SPACE_REGS)
8844               {
8845                 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8846                 nbytes += 32;
8847               }
8848             else
8849               {
8850                 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8851                 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8852                 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8853                 nbytes += 40;
8854               }
8855           }
8856       else if (flag_pic)
8857           {
8858             /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8859             output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8860 
8861             /* Load function address into %r1.  */
8862             nbytes += 16;
8863             xop[0] = xoperands[0];
8864             xop[1] = gen_rtx_REG (Pmode, 1);
8865             xop[2] = xop[1];
8866             pa_output_pic_pcrel_sequence (xop);
8867 
8868             output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8869           }
8870       else
8871           {
8872             /* Load function address into %r22.  */
8873             nbytes += 8;
8874             output_asm_insn ("ldil L'%0,%%r22", xoperands);
8875             output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8876 
8877             /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8878             output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8879           }
8880     }
8881 
8882   final_end_function ();
8883 
8884   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8885     {
8886       switch_to_section (data_section);
8887       output_asm_insn (".align 4", xoperands);
8888       ASM_OUTPUT_LABEL (file, label);
8889       output_asm_insn (".word P'%0", xoperands);
8890     }
8891 
8892   current_thunk_number++;
8893   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8894               & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8895   last_address += nbytes;
8896   if (old_last_address > last_address)
8897     last_address = UINT_MAX;
8898   update_total_code_bytes (nbytes);
8899   assemble_end_function (thunk_fndecl, fnname);
8900 }
8901 
8902 /* Only direct calls to static functions are allowed to be sibling (tail)
8903    call optimized.
8904 
8905    This restriction is necessary because some linker generated stubs will
8906    store return pointers into rp' in some cases which might clobber a
8907    live value already in rp'.
8908 
8909    In a sibcall the current function and the target function share stack
8910    space.  Thus if the path to the current function and the path to the
8911    target function save a value in rp', they save the value into the
8912    same stack slot, which has undesirable consequences.
8913 
8914    Because of the deferred binding nature of shared libraries any function
8915    with external scope could be in a different load module and thus require
8916    rp' to be saved when calling that function.  So sibcall optimizations
8917    can only be safe for static function.
8918 
8919    Note that GCC never needs return value relocations, so we don't have to
8920    worry about static calls with return value relocations (which require
8921    saving rp').
8922 
8923    It is safe to perform a sibcall optimization when the target function
8924    will never return.  */
8925 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8926 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8927 {
8928   /* Sibcalls are not ok because the arg pointer register is not a fixed
8929      register.  This prevents the sibcall optimization from occurring.  In
8930      addition, there are problems with stub placement using GNU ld.  This
8931      is because a normal sibcall branch uses a 17-bit relocation while
8932      a regular call branch uses a 22-bit relocation.  As a result, more
8933      care needs to be taken in the placement of long-branch stubs.  */
8934   if (TARGET_64BIT)
8935     return false;
8936 
8937   if (TARGET_PORTABLE_RUNTIME)
8938     return false;
8939 
8940   /* Sibcalls are only ok within a translation unit.  */
8941   return decl && targetm.binds_local_p (decl);
8942 }
8943 
8944 /* ??? Addition is not commutative on the PA due to the weird implicit
8945    space register selection rules for memory addresses.  Therefore, we
8946    don't consider a + b == b + a, as this might be inside a MEM.  */
8947 static bool
pa_commutative_p(const_rtx x,int outer_code)8948 pa_commutative_p (const_rtx x, int outer_code)
8949 {
8950   return (COMMUTATIVE_P (x)
8951             && (TARGET_NO_SPACE_REGS
8952                 || (outer_code != UNKNOWN && outer_code != MEM)
8953                 || GET_CODE (x) != PLUS));
8954 }
8955 
8956 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8957    use in fmpyadd instructions.  */
8958 int
pa_fmpyaddoperands(rtx * operands)8959 pa_fmpyaddoperands (rtx *operands)
8960 {
8961   machine_mode mode = GET_MODE (operands[0]);
8962 
8963   /* Must be a floating point mode.  */
8964   if (mode != SFmode && mode != DFmode)
8965     return 0;
8966 
8967   /* All modes must be the same.  */
8968   if (! (mode == GET_MODE (operands[1])
8969            && mode == GET_MODE (operands[2])
8970            && mode == GET_MODE (operands[3])
8971            && mode == GET_MODE (operands[4])
8972            && mode == GET_MODE (operands[5])))
8973     return 0;
8974 
8975   /* All operands must be registers.  */
8976   if (! (GET_CODE (operands[1]) == REG
8977            && GET_CODE (operands[2]) == REG
8978            && GET_CODE (operands[3]) == REG
8979            && GET_CODE (operands[4]) == REG
8980            && GET_CODE (operands[5]) == REG))
8981     return 0;
8982 
8983   /* Only 2 real operands to the addition.  One of the input operands must
8984      be the same as the output operand.  */
8985   if (! rtx_equal_p (operands[3], operands[4])
8986       && ! rtx_equal_p (operands[3], operands[5]))
8987     return 0;
8988 
8989   /* Inout operand of add cannot conflict with any operands from multiply.  */
8990   if (rtx_equal_p (operands[3], operands[0])
8991      || rtx_equal_p (operands[3], operands[1])
8992      || rtx_equal_p (operands[3], operands[2]))
8993     return 0;
8994 
8995   /* multiply cannot feed into addition operands.  */
8996   if (rtx_equal_p (operands[4], operands[0])
8997       || rtx_equal_p (operands[5], operands[0]))
8998     return 0;
8999 
9000   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9001   if (mode == SFmode
9002       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9003             || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9004             || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9005             || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9006             || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9007             || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9008     return 0;
9009 
9010   /* Passed.  Operands are suitable for fmpyadd.  */
9011   return 1;
9012 }
9013 
9014 #if !defined(USE_COLLECT2)
9015 static void
pa_asm_out_constructor(rtx symbol,int priority)9016 pa_asm_out_constructor (rtx symbol, int priority)
9017 {
9018   if (!function_label_operand (symbol, VOIDmode))
9019     pa_encode_label (symbol);
9020 
9021 #ifdef CTORS_SECTION_ASM_OP
9022   default_ctor_section_asm_out_constructor (symbol, priority);
9023 #else
9024 # ifdef TARGET_ASM_NAMED_SECTION
9025   default_named_section_asm_out_constructor (symbol, priority);
9026 # else
9027   default_stabs_asm_out_constructor (symbol, priority);
9028 # endif
9029 #endif
9030 }
9031 
9032 static void
pa_asm_out_destructor(rtx symbol,int priority)9033 pa_asm_out_destructor (rtx symbol, int priority)
9034 {
9035   if (!function_label_operand (symbol, VOIDmode))
9036     pa_encode_label (symbol);
9037 
9038 #ifdef DTORS_SECTION_ASM_OP
9039   default_dtor_section_asm_out_destructor (symbol, priority);
9040 #else
9041 # ifdef TARGET_ASM_NAMED_SECTION
9042   default_named_section_asm_out_destructor (symbol, priority);
9043 # else
9044   default_stabs_asm_out_destructor (symbol, priority);
9045 # endif
9046 #endif
9047 }
9048 #endif
9049 
9050 /* This function places uninitialized global data in the bss section.
9051    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9052    function on the SOM port to prevent uninitialized global data from
9053    being placed in the data section.  */
9054 
9055 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9056 pa_asm_output_aligned_bss (FILE *stream,
9057                                  const char *name,
9058                                  unsigned HOST_WIDE_INT size,
9059                                  unsigned int align)
9060 {
9061   switch_to_section (bss_section);
9062 
9063 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9064   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9065 #endif
9066 
9067 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9068   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9069 #endif
9070 
9071   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9072   ASM_OUTPUT_LABEL (stream, name);
9073   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9074 }
9075 
9076 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9077    that doesn't allow the alignment of global common storage to be directly
9078    specified.  The SOM linker aligns common storage based on the rounded
9079    value of the NUM_BYTES parameter in the .comm directive.  It's not
9080    possible to use the .align directive as it doesn't affect the alignment
9081    of the label associated with a .comm directive.  */
9082 
9083 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9084 pa_asm_output_aligned_common (FILE *stream,
9085                                     const char *name,
9086                                     unsigned HOST_WIDE_INT size,
9087                                     unsigned int align)
9088 {
9089   unsigned int max_common_align;
9090 
9091   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9092   if (align > max_common_align)
9093     {
9094       /* Alignment exceeds maximum alignment for global common data.  */
9095       align = max_common_align;
9096     }
9097 
9098   switch_to_section (bss_section);
9099 
9100   assemble_name (stream, name);
9101   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9102            MAX (size, align / BITS_PER_UNIT));
9103 }
9104 
9105 /* We can't use .comm for local common storage as the SOM linker effectively
9106    treats the symbol as universal and uses the same storage for local symbols
9107    with the same name in different object files.  The .block directive
9108    reserves an uninitialized block of storage.  However, it's not common
9109    storage.  Fortunately, GCC never requests common storage with the same
9110    name in any given translation unit.  */
9111 
9112 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9113 pa_asm_output_aligned_local (FILE *stream,
9114                                    const char *name,
9115                                    unsigned HOST_WIDE_INT size,
9116                                    unsigned int align)
9117 {
9118   switch_to_section (bss_section);
9119   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9120 
9121 #ifdef LOCAL_ASM_OP
9122   fprintf (stream, "%s", LOCAL_ASM_OP);
9123   assemble_name (stream, name);
9124   fprintf (stream, "\n");
9125 #endif
9126 
9127   ASM_OUTPUT_LABEL (stream, name);
9128   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9129 }
9130 
9131 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9132    use in fmpysub instructions.  */
9133 int
pa_fmpysuboperands(rtx * operands)9134 pa_fmpysuboperands (rtx *operands)
9135 {
9136   machine_mode mode = GET_MODE (operands[0]);
9137 
9138   /* Must be a floating point mode.  */
9139   if (mode != SFmode && mode != DFmode)
9140     return 0;
9141 
9142   /* All modes must be the same.  */
9143   if (! (mode == GET_MODE (operands[1])
9144            && mode == GET_MODE (operands[2])
9145            && mode == GET_MODE (operands[3])
9146            && mode == GET_MODE (operands[4])
9147            && mode == GET_MODE (operands[5])))
9148     return 0;
9149 
9150   /* All operands must be registers.  */
9151   if (! (GET_CODE (operands[1]) == REG
9152            && GET_CODE (operands[2]) == REG
9153            && GET_CODE (operands[3]) == REG
9154            && GET_CODE (operands[4]) == REG
9155            && GET_CODE (operands[5]) == REG))
9156     return 0;
9157 
9158   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
9159      operation, so operands[4] must be the same as operand[3].  */
9160   if (! rtx_equal_p (operands[3], operands[4]))
9161     return 0;
9162 
9163   /* multiply cannot feed into subtraction.  */
9164   if (rtx_equal_p (operands[5], operands[0]))
9165     return 0;
9166 
9167   /* Inout operand of sub cannot conflict with any operands from multiply.  */
9168   if (rtx_equal_p (operands[3], operands[0])
9169      || rtx_equal_p (operands[3], operands[1])
9170      || rtx_equal_p (operands[3], operands[2]))
9171     return 0;
9172 
9173   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9174   if (mode == SFmode
9175       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9176             || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9177             || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9178             || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9179             || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9180             || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9181     return 0;
9182 
9183   /* Passed.  Operands are suitable for fmpysub.  */
9184   return 1;
9185 }
9186 
9187 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
9188    constants for a MULT embedded inside a memory address.  */
9189 int
pa_mem_shadd_constant_p(int val)9190 pa_mem_shadd_constant_p (int val)
9191 {
9192   if (val == 2 || val == 4 || val == 8)
9193     return 1;
9194   else
9195     return 0;
9196 }
9197 
9198 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
9199    constants for shadd instructions.  */
9200 int
pa_shadd_constant_p(int val)9201 pa_shadd_constant_p (int val)
9202 {
9203   if (val == 1 || val == 2 || val == 3)
9204     return 1;
9205   else
9206     return 0;
9207 }
9208 
9209 /* Return TRUE if INSN branches forward.  */
9210 
9211 static bool
forward_branch_p(rtx_insn * insn)9212 forward_branch_p (rtx_insn *insn)
9213 {
9214   rtx lab = JUMP_LABEL (insn);
9215 
9216   /* The INSN must have a jump label.  */
9217   gcc_assert (lab != NULL_RTX);
9218 
9219   if (INSN_ADDRESSES_SET_P ())
9220     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9221 
9222   while (insn)
9223     {
9224       if (insn == lab)
9225           return true;
9226       else
9227           insn = NEXT_INSN (insn);
9228     }
9229 
9230   return false;
9231 }
9232 
9233 /* Output an unconditional move and branch insn.  */
9234 
9235 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9236 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9237 {
9238   int length = get_attr_length (insn);
9239 
9240   /* These are the cases in which we win.  */
9241   if (length == 4)
9242     return "mov%I1b,tr %1,%0,%2";
9243 
9244   /* None of the following cases win, but they don't lose either.  */
9245   if (length == 8)
9246     {
9247       if (dbr_sequence_length () == 0)
9248           {
9249             /* Nothing in the delay slot, fake it by putting the combined
9250                insn (the copy or add) in the delay slot of a bl.  */
9251             if (GET_CODE (operands[1]) == CONST_INT)
9252               return "b %2\n\tldi %1,%0";
9253             else
9254               return "b %2\n\tcopy %1,%0";
9255           }
9256       else
9257           {
9258             /* Something in the delay slot, but we've got a long branch.  */
9259             if (GET_CODE (operands[1]) == CONST_INT)
9260               return "ldi %1,%0\n\tb %2";
9261             else
9262               return "copy %1,%0\n\tb %2";
9263           }
9264     }
9265 
9266   if (GET_CODE (operands[1]) == CONST_INT)
9267     output_asm_insn ("ldi %1,%0", operands);
9268   else
9269     output_asm_insn ("copy %1,%0", operands);
9270   return pa_output_lbranch (operands[2], insn, 1);
9271 }
9272 
9273 /* Output an unconditional add and branch insn.  */
9274 
9275 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9276 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9277 {
9278   int length = get_attr_length (insn);
9279 
9280   /* To make life easy we want operand0 to be the shared input/output
9281      operand and operand1 to be the readonly operand.  */
9282   if (operands[0] == operands[1])
9283     operands[1] = operands[2];
9284 
9285   /* These are the cases in which we win.  */
9286   if (length == 4)
9287     return "add%I1b,tr %1,%0,%3";
9288 
9289   /* None of the following cases win, but they don't lose either.  */
9290   if (length == 8)
9291     {
9292       if (dbr_sequence_length () == 0)
9293           /* Nothing in the delay slot, fake it by putting the combined
9294              insn (the copy or add) in the delay slot of a bl.  */
9295           return "b %3\n\tadd%I1 %1,%0,%0";
9296       else
9297           /* Something in the delay slot, but we've got a long branch.  */
9298           return "add%I1 %1,%0,%0\n\tb %3";
9299     }
9300 
9301   output_asm_insn ("add%I1 %1,%0,%0", operands);
9302   return pa_output_lbranch (operands[3], insn, 1);
9303 }
9304 
9305 /* We use this hook to perform a PA specific optimization which is difficult
9306    to do in earlier passes.  */
9307 
9308 static void
pa_reorg(void)9309 pa_reorg (void)
9310 {
9311   remove_useless_addtr_insns (1);
9312 
9313   if (pa_cpu < PROCESSOR_8000)
9314     pa_combine_instructions ();
9315 }
9316 
9317 /* The PA has a number of odd instructions which can perform multiple
9318    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
9319    it may be profitable to combine two instructions into one instruction
9320    with two outputs.  It's not profitable PA2.0 machines because the
9321    two outputs would take two slots in the reorder buffers.
9322 
9323    This routine finds instructions which can be combined and combines
9324    them.  We only support some of the potential combinations, and we
9325    only try common ways to find suitable instructions.
9326 
9327       * addb can add two registers or a register and a small integer
9328       and jump to a nearby (+-8k) location.  Normally the jump to the
9329       nearby location is conditional on the result of the add, but by
9330       using the "true" condition we can make the jump unconditional.
9331       Thus addb can perform two independent operations in one insn.
9332 
9333       * movb is similar to addb in that it can perform a reg->reg
9334       or small immediate->reg copy and jump to a nearby (+-8k location).
9335 
9336       * fmpyadd and fmpysub can perform a FP multiply and either an
9337       FP add or FP sub if the operands of the multiply and add/sub are
9338       independent (there are other minor restrictions).  Note both
9339       the fmpy and fadd/fsub can in theory move to better spots according
9340       to data dependencies, but for now we require the fmpy stay at a
9341       fixed location.
9342 
9343       * Many of the memory operations can perform pre & post updates
9344       of index registers.  GCC's pre/post increment/decrement addressing
9345       is far too simple to take advantage of all the possibilities.  This
9346       pass may not be suitable since those insns may not be independent.
9347 
9348       * comclr can compare two ints or an int and a register, nullify
9349       the following instruction and zero some other register.  This
9350       is more difficult to use as it's harder to find an insn which
9351       will generate a comclr than finding something like an unconditional
9352       branch.  (conditional moves & long branches create comclr insns).
9353 
9354       * Most arithmetic operations can conditionally skip the next
9355       instruction.  They can be viewed as "perform this operation
9356       and conditionally jump to this nearby location" (where nearby
9357       is an insns away).  These are difficult to use due to the
9358       branch length restrictions.  */
9359 
9360 static void
pa_combine_instructions(void)9361 pa_combine_instructions (void)
9362 {
9363   rtx_insn *anchor;
9364 
9365   /* This can get expensive since the basic algorithm is on the
9366      order of O(n^2) (or worse).  Only do it for -O2 or higher
9367      levels of optimization.  */
9368   if (optimize < 2)
9369     return;
9370 
9371   /* Walk down the list of insns looking for "anchor" insns which
9372      may be combined with "floating" insns.  As the name implies,
9373      "anchor" instructions don't move, while "floating" insns may
9374      move around.  */
9375   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9376   rtx_insn *new_rtx = make_insn_raw (par);
9377 
9378   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9379     {
9380       enum attr_pa_combine_type anchor_attr;
9381       enum attr_pa_combine_type floater_attr;
9382 
9383       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9384            Also ignore any special USE insns.  */
9385       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9386             || GET_CODE (PATTERN (anchor)) == USE
9387             || GET_CODE (PATTERN (anchor)) == CLOBBER)
9388           continue;
9389 
9390       anchor_attr = get_attr_pa_combine_type (anchor);
9391       /* See if anchor is an insn suitable for combination.  */
9392       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9393             || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9394             || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9395                 && ! forward_branch_p (anchor)))
9396           {
9397             rtx_insn *floater;
9398 
9399             for (floater = PREV_INSN (anchor);
9400                  floater;
9401                  floater = PREV_INSN (floater))
9402               {
9403                 if (NOTE_P (floater)
9404                       || (NONJUMP_INSN_P (floater)
9405                           && (GET_CODE (PATTERN (floater)) == USE
9406                                 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9407                     continue;
9408 
9409                 /* Anything except a regular INSN will stop our search.  */
9410                 if (! NONJUMP_INSN_P (floater))
9411                     {
9412                       floater = NULL;
9413                       break;
9414                     }
9415 
9416                 /* See if FLOATER is suitable for combination with the
9417                      anchor.  */
9418                 floater_attr = get_attr_pa_combine_type (floater);
9419                 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9420                        && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9421                       || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9422                           && floater_attr == PA_COMBINE_TYPE_FMPY))
9423                     {
9424                       /* If ANCHOR and FLOATER can be combined, then we're
9425                          done with this pass.  */
9426                       if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9427                                                   SET_DEST (PATTERN (floater)),
9428                                                   XEXP (SET_SRC (PATTERN (floater)), 0),
9429                                                   XEXP (SET_SRC (PATTERN (floater)), 1)))
9430                         break;
9431                     }
9432 
9433                 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9434                            && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9435                     {
9436                       if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9437                         {
9438                           if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9439                                                       SET_DEST (PATTERN (floater)),
9440                                                   XEXP (SET_SRC (PATTERN (floater)), 0),
9441                                                   XEXP (SET_SRC (PATTERN (floater)), 1)))
9442                               break;
9443                         }
9444                       else
9445                         {
9446                           if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9447                                                       SET_DEST (PATTERN (floater)),
9448                                                       SET_SRC (PATTERN (floater)),
9449                                                       SET_SRC (PATTERN (floater))))
9450                               break;
9451                         }
9452                     }
9453               }
9454 
9455             /* If we didn't find anything on the backwards scan try forwards.  */
9456             if (!floater
9457                 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9458                       || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9459               {
9460                 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9461                     {
9462                       if (NOTE_P (floater)
9463                           || (NONJUMP_INSN_P (floater)
9464                                 && (GET_CODE (PATTERN (floater)) == USE
9465                                     || GET_CODE (PATTERN (floater)) == CLOBBER)))
9466 
9467                         continue;
9468 
9469                       /* Anything except a regular INSN will stop our search.  */
9470                       if (! NONJUMP_INSN_P (floater))
9471                         {
9472                           floater = NULL;
9473                           break;
9474                         }
9475 
9476                       /* See if FLOATER is suitable for combination with the
9477                          anchor.  */
9478                       floater_attr = get_attr_pa_combine_type (floater);
9479                       if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9480                            && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9481                           || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9482                                 && floater_attr == PA_COMBINE_TYPE_FMPY))
9483                         {
9484                           /* If ANCHOR and FLOATER can be combined, then we're
9485                                done with this pass.  */
9486                           if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9487                                                       SET_DEST (PATTERN (floater)),
9488                                                       XEXP (SET_SRC (PATTERN (floater)),
9489                                                               0),
9490                                                       XEXP (SET_SRC (PATTERN (floater)),
9491                                                               1)))
9492                               break;
9493                         }
9494                     }
9495               }
9496 
9497             /* FLOATER will be nonzero if we found a suitable floating
9498                insn for combination with ANCHOR.  */
9499             if (floater
9500                 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9501                       || anchor_attr == PA_COMBINE_TYPE_FMPY))
9502               {
9503                 /* Emit the new instruction and delete the old anchor.  */
9504                 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9505                                                copy_rtx (PATTERN (floater)));
9506                 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9507                 emit_insn_before (temp, anchor);
9508 
9509                 SET_INSN_DELETED (anchor);
9510 
9511                 /* Emit a special USE insn for FLOATER, then delete
9512                      the floating insn.  */
9513                 temp = copy_rtx (PATTERN (floater));
9514                 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9515                 delete_insn (floater);
9516 
9517                 continue;
9518               }
9519             else if (floater
9520                        && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9521               {
9522                 /* Emit the new_jump instruction and delete the old anchor.  */
9523                 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9524                                                copy_rtx (PATTERN (floater)));
9525                 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9526                 temp = emit_jump_insn_before (temp, anchor);
9527 
9528                 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9529                 SET_INSN_DELETED (anchor);
9530 
9531                 /* Emit a special USE insn for FLOATER, then delete
9532                      the floating insn.  */
9533                 temp = copy_rtx (PATTERN (floater));
9534                 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9535                 delete_insn (floater);
9536                 continue;
9537               }
9538           }
9539     }
9540 }
9541 
9542 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9543 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9544                       int reversed, rtx dest,
9545                       rtx src1, rtx src2)
9546 {
9547   int insn_code_number;
9548   rtx_insn *start, *end;
9549 
9550   /* Create a PARALLEL with the patterns of ANCHOR and
9551      FLOATER, try to recognize it, then test constraints
9552      for the resulting pattern.
9553 
9554      If the pattern doesn't match or the constraints
9555      aren't met keep searching for a suitable floater
9556      insn.  */
9557   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9558   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9559   INSN_CODE (new_rtx) = -1;
9560   insn_code_number = recog_memoized (new_rtx);
9561   basic_block bb = BLOCK_FOR_INSN (anchor);
9562   if (insn_code_number < 0
9563       || (extract_insn (new_rtx),
9564             !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9565     return 0;
9566 
9567   if (reversed)
9568     {
9569       start = anchor;
9570       end = floater;
9571     }
9572   else
9573     {
9574       start = floater;
9575       end = anchor;
9576     }
9577 
9578   /* There's up to three operands to consider.  One
9579      output and two inputs.
9580 
9581      The output must not be used between FLOATER & ANCHOR
9582      exclusive.  The inputs must not be set between
9583      FLOATER and ANCHOR exclusive.  */
9584 
9585   if (reg_used_between_p (dest, start, end))
9586     return 0;
9587 
9588   if (reg_set_between_p (src1, start, end))
9589     return 0;
9590 
9591   if (reg_set_between_p (src2, start, end))
9592     return 0;
9593 
9594   /* If we get here, then everything is good.  */
9595   return 1;
9596 }
9597 
9598 /* Return nonzero if references for INSN are delayed.
9599 
9600    Millicode insns are actually function calls with some special
9601    constraints on arguments and register usage.
9602 
9603    Millicode calls always expect their arguments in the integer argument
9604    registers, and always return their result in %r29 (ret1).  They
9605    are expected to clobber their arguments, %r1, %r29, and the return
9606    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9607 
9608    This function tells reorg that the references to arguments and
9609    millicode calls do not appear to happen until after the millicode call.
9610    This allows reorg to put insns which set the argument registers into the
9611    delay slot of the millicode call -- thus they act more like traditional
9612    CALL_INSNs.
9613 
9614    Note we cannot consider side effects of the insn to be delayed because
9615    the branch and link insn will clobber the return pointer.  If we happened
9616    to use the return pointer in the delay slot of the call, then we lose.
9617 
9618    get_attr_type will try to recognize the given insn, so make sure to
9619    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9620    in particular.  */
9621 int
pa_insn_refs_are_delayed(rtx_insn * insn)9622 pa_insn_refs_are_delayed (rtx_insn *insn)
9623 {
9624   return ((NONJUMP_INSN_P (insn)
9625              && GET_CODE (PATTERN (insn)) != SEQUENCE
9626              && GET_CODE (PATTERN (insn)) != USE
9627              && GET_CODE (PATTERN (insn)) != CLOBBER
9628              && get_attr_type (insn) == TYPE_MILLI));
9629 }
9630 
9631 /* Promote the return value, but not the arguments.  */
9632 
9633 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9634 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9635                           machine_mode mode,
9636                           int *punsignedp ATTRIBUTE_UNUSED,
9637                           const_tree fntype ATTRIBUTE_UNUSED,
9638                           int for_return)
9639 {
9640   if (for_return == 0)
9641     return mode;
9642   return promote_mode (type, mode, punsignedp);
9643 }
9644 
9645 /* On the HP-PA the value is found in register(s) 28(-29), unless
9646    the mode is SF or DF. Then the value is returned in fr4 (32).
9647 
9648    This must perform the same promotions as PROMOTE_MODE, else promoting
9649    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9650 
9651    Small structures must be returned in a PARALLEL on PA64 in order
9652    to match the HP Compiler ABI.  */
9653 
9654 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9655 pa_function_value (const_tree valtype,
9656                    const_tree func ATTRIBUTE_UNUSED,
9657                    bool outgoing ATTRIBUTE_UNUSED)
9658 {
9659   machine_mode valmode;
9660 
9661   if (AGGREGATE_TYPE_P (valtype)
9662       || TREE_CODE (valtype) == COMPLEX_TYPE
9663       || TREE_CODE (valtype) == VECTOR_TYPE)
9664     {
9665       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9666 
9667       /* Handle aggregates that fit exactly in a word or double word.  */
9668       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9669           return gen_rtx_REG (TYPE_MODE (valtype), 28);
9670 
9671       if (TARGET_64BIT)
9672           {
9673           /* Aggregates with a size less than or equal to 128 bits are
9674                returned in GR 28(-29).  They are left justified.  The pad
9675                bits are undefined.  Larger aggregates are returned in
9676                memory.  */
9677             rtx loc[2];
9678             int i, offset = 0;
9679             int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9680 
9681             for (i = 0; i < ub; i++)
9682               {
9683                 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9684                                                     gen_rtx_REG (DImode, 28 + i),
9685                                                     GEN_INT (offset));
9686                 offset += 8;
9687               }
9688 
9689             return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9690           }
9691       else if (valsize > UNITS_PER_WORD)
9692           {
9693             /* Aggregates 5 to 8 bytes in size are returned in general
9694                registers r28-r29 in the same manner as other non
9695                floating-point objects.  The data is right-justified and
9696                zero-extended to 64 bits.  This is opposite to the normal
9697                justification used on big endian targets and requires
9698                special treatment.  */
9699             rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9700                                                gen_rtx_REG (DImode, 28), const0_rtx);
9701             return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9702           }
9703     }
9704 
9705   if ((INTEGRAL_TYPE_P (valtype)
9706        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9707       || POINTER_TYPE_P (valtype))
9708     valmode = word_mode;
9709   else
9710     valmode = TYPE_MODE (valtype);
9711 
9712   if (TREE_CODE (valtype) == REAL_TYPE
9713       && !AGGREGATE_TYPE_P (valtype)
9714       && TYPE_MODE (valtype) != TFmode
9715       && !TARGET_SOFT_FLOAT)
9716     return gen_rtx_REG (valmode, 32);
9717 
9718   return gen_rtx_REG (valmode, 28);
9719 }
9720 
9721 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9722 
9723 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9724 pa_libcall_value (machine_mode mode,
9725                       const_rtx fun ATTRIBUTE_UNUSED)
9726 {
9727   if (! TARGET_SOFT_FLOAT
9728       && (mode == SFmode || mode == DFmode))
9729     return  gen_rtx_REG (mode, 32);
9730   else
9731     return  gen_rtx_REG (mode, 28);
9732 }
9733 
9734 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9735 
9736 static bool
pa_function_value_regno_p(const unsigned int regno)9737 pa_function_value_regno_p (const unsigned int regno)
9738 {
9739   if (regno == 28
9740       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9741     return true;
9742 
9743   return false;
9744 }
9745 
9746 /* Update the data in CUM to advance over argument ARG.  */
9747 
9748 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9749 pa_function_arg_advance (cumulative_args_t cum_v,
9750                                const function_arg_info &arg)
9751 {
9752   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9753   int arg_size = pa_function_arg_size (arg.mode, arg.type);
9754 
9755   cum->nargs_prototype--;
9756   cum->words += (arg_size
9757                      + ((cum->words & 01)
9758                         && arg.type != NULL_TREE
9759                         && arg_size > 1));
9760 }
9761 
9762 /* Return the location of a parameter that is passed in a register or NULL
9763    if the parameter has any component that is passed in memory.
9764 
9765    This is new code and will be pushed to into the net sources after
9766    further testing.
9767 
9768    ??? We might want to restructure this so that it looks more like other
9769    ports.  */
9770 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9771 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9772 {
9773   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9774   tree type = arg.type;
9775   machine_mode mode = arg.mode;
9776   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9777   int alignment = 0;
9778   int arg_size;
9779   int fpr_reg_base;
9780   int gpr_reg_base;
9781   rtx retval;
9782 
9783   if (arg.end_marker_p ())
9784     return NULL_RTX;
9785 
9786   arg_size = pa_function_arg_size (mode, type);
9787   if (!arg_size)
9788     return NULL_RTX;
9789 
9790   /* If this arg would be passed partially or totally on the stack, then
9791      this routine should return zero.  pa_arg_partial_bytes will
9792      handle arguments which are split between regs and stack slots if
9793      the ABI mandates split arguments.  */
9794   if (!TARGET_64BIT)
9795     {
9796       /* The 32-bit ABI does not split arguments.  */
9797       if (cum->words + arg_size > max_arg_words)
9798           return NULL_RTX;
9799     }
9800   else
9801     {
9802       if (arg_size > 1)
9803           alignment = cum->words & 1;
9804       if (cum->words + alignment >= max_arg_words)
9805           return NULL_RTX;
9806     }
9807 
9808   /* The 32bit ABIs and the 64bit ABIs are rather different,
9809      particularly in their handling of FP registers.  We might
9810      be able to cleverly share code between them, but I'm not
9811      going to bother in the hope that splitting them up results
9812      in code that is more easily understood.  */
9813 
9814   if (TARGET_64BIT)
9815     {
9816       /* Advance the base registers to their current locations.
9817 
9818          Remember, gprs grow towards smaller register numbers while
9819            fprs grow to higher register numbers.  Also remember that
9820            although FP regs are 32-bit addressable, we pretend that
9821            the registers are 64-bits wide.  */
9822       gpr_reg_base = 26 - cum->words;
9823       fpr_reg_base = 32 + cum->words;
9824 
9825       /* Arguments wider than one word and small aggregates need special
9826            treatment.  */
9827       if (arg_size > 1
9828             || mode == BLKmode
9829             || (type && (AGGREGATE_TYPE_P (type)
9830                            || TREE_CODE (type) == COMPLEX_TYPE
9831                            || TREE_CODE (type) == VECTOR_TYPE)))
9832           {
9833             /* Double-extended precision (80-bit), quad-precision (128-bit)
9834                and aggregates including complex numbers are aligned on
9835                128-bit boundaries.  The first eight 64-bit argument slots
9836                are associated one-to-one, with general registers r26
9837                through r19, and also with floating-point registers fr4
9838                through fr11.  Arguments larger than one word are always
9839                passed in general registers.
9840 
9841                Using a PARALLEL with a word mode register results in left
9842                justified data on a big-endian target.  */
9843 
9844             rtx loc[8];
9845             int i, offset = 0, ub = arg_size;
9846 
9847             /* Align the base register.  */
9848             gpr_reg_base -= alignment;
9849 
9850             ub = MIN (ub, max_arg_words - cum->words - alignment);
9851             for (i = 0; i < ub; i++)
9852               {
9853                 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9854                                                     gen_rtx_REG (DImode, gpr_reg_base),
9855                                                     GEN_INT (offset));
9856                 gpr_reg_base -= 1;
9857                 offset += 8;
9858               }
9859 
9860             return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9861           }
9862      }
9863   else
9864     {
9865       /* If the argument is larger than a word, then we know precisely
9866            which registers we must use.  */
9867       if (arg_size > 1)
9868           {
9869             if (cum->words)
9870               {
9871                 gpr_reg_base = 23;
9872                 fpr_reg_base = 38;
9873               }
9874             else
9875               {
9876                 gpr_reg_base = 25;
9877                 fpr_reg_base = 34;
9878               }
9879 
9880             /* Structures 5 to 8 bytes in size are passed in the general
9881                registers in the same manner as other non floating-point
9882                objects.  The data is right-justified and zero-extended
9883                to 64 bits.  This is opposite to the normal justification
9884                used on big endian targets and requires special treatment.
9885                We now define BLOCK_REG_PADDING to pad these objects.
9886                Aggregates, complex and vector types are passed in the same
9887                manner as structures.  */
9888             if (mode == BLKmode
9889                 || (type && (AGGREGATE_TYPE_P (type)
9890                                  || TREE_CODE (type) == COMPLEX_TYPE
9891                                  || TREE_CODE (type) == VECTOR_TYPE)))
9892               {
9893                 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9894                                                      gen_rtx_REG (DImode, gpr_reg_base),
9895                                                      const0_rtx);
9896                 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9897               }
9898           }
9899       else
9900         {
9901              /* We have a single word (32 bits).  A simple computation
9902                 will get us the register #s we need.  */
9903              gpr_reg_base = 26 - cum->words;
9904              fpr_reg_base = 32 + 2 * cum->words;
9905           }
9906     }
9907 
9908   /* Determine if the argument needs to be passed in both general and
9909      floating point registers.  */
9910   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9911        /* If we are doing soft-float with portable runtime, then there
9912             is no need to worry about FP regs.  */
9913        && !TARGET_SOFT_FLOAT
9914        /* The parameter must be some kind of scalar float, else we just
9915             pass it in integer registers.  */
9916        && GET_MODE_CLASS (mode) == MODE_FLOAT
9917        /* The target function must not have a prototype.  */
9918        && cum->nargs_prototype <= 0
9919        /* libcalls do not need to pass items in both FP and general
9920             registers.  */
9921        && type != NULL_TREE
9922        /* All this hair applies to "outgoing" args only.  This includes
9923             sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9924        && !cum->incoming)
9925       /* Also pass outgoing floating arguments in both registers in indirect
9926            calls with the 32 bit ABI and the HP assembler since there is no
9927            way to the specify argument locations in static functions.  */
9928       || (!TARGET_64BIT
9929             && !TARGET_GAS
9930             && !cum->incoming
9931             && cum->indirect
9932             && GET_MODE_CLASS (mode) == MODE_FLOAT))
9933     {
9934       retval
9935           = gen_rtx_PARALLEL
9936               (mode,
9937                gen_rtvec (2,
9938                               gen_rtx_EXPR_LIST (VOIDmode,
9939                                                      gen_rtx_REG (mode, fpr_reg_base),
9940                                                      const0_rtx),
9941                               gen_rtx_EXPR_LIST (VOIDmode,
9942                                                      gen_rtx_REG (mode, gpr_reg_base),
9943                                                      const0_rtx)));
9944     }
9945   else
9946     {
9947       /* See if we should pass this parameter in a general register.  */
9948       if (TARGET_SOFT_FLOAT
9949             /* Indirect calls in the normal 32bit ABI require all arguments
9950                to be passed in general registers.  */
9951             || (!TARGET_PORTABLE_RUNTIME
9952                 && !TARGET_64BIT
9953                 && !TARGET_ELF32
9954                 && cum->indirect)
9955             /* If the parameter is not a scalar floating-point parameter,
9956                then it belongs in GPRs.  */
9957             || GET_MODE_CLASS (mode) != MODE_FLOAT
9958             /* Structure with single SFmode field belongs in GPR.  */
9959             || (type && AGGREGATE_TYPE_P (type)))
9960           retval = gen_rtx_REG (mode, gpr_reg_base);
9961       else
9962           retval = gen_rtx_REG (mode, fpr_reg_base);
9963     }
9964   return retval;
9965 }
9966 
9967 /* Arguments larger than one word are double word aligned.  */
9968 
9969 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9970 pa_function_arg_boundary (machine_mode mode, const_tree type)
9971 {
9972   bool singleword = (type
9973                          ? (integer_zerop (TYPE_SIZE (type))
9974                               || !TREE_CONSTANT (TYPE_SIZE (type))
9975                               || int_size_in_bytes (type) <= UNITS_PER_WORD)
9976                          : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9977 
9978   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9979 }
9980 
9981 /* If this arg would be passed totally in registers or totally on the stack,
9982    then this routine should return zero.  */
9983 
9984 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9985 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9986 {
9987   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9988   unsigned int max_arg_words = 8;
9989   unsigned int offset = 0;
9990   int arg_size;
9991 
9992   if (!TARGET_64BIT)
9993     return 0;
9994 
9995   arg_size = pa_function_arg_size (arg.mode, arg.type);
9996   if (arg_size > 1 && (cum->words & 1))
9997     offset = 1;
9998 
9999   if (cum->words + offset + arg_size <= max_arg_words)
10000     /* Arg fits fully into registers.  */
10001     return 0;
10002   else if (cum->words + offset >= max_arg_words)
10003     /* Arg fully on the stack.  */
10004     return 0;
10005   else
10006     /* Arg is split.  */
10007     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10008 }
10009 
10010 
10011 /* A get_unnamed_section callback for switching to the text section.
10012 
10013    This function is only used with SOM.  Because we don't support
10014    named subspaces, we can only create a new subspace or switch back
10015    to the default text subspace.  */
10016 
10017 static void
som_output_text_section_asm_op(const char * data ATTRIBUTE_UNUSED)10018 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10019 {
10020   gcc_assert (TARGET_SOM);
10021   if (TARGET_GAS)
10022     {
10023       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10024           {
10025             /* We only want to emit a .nsubspa directive once at the
10026                start of the function.  */
10027             cfun->machine->in_nsubspa = 1;
10028 
10029             /* Create a new subspace for the text.  This provides
10030                better stub placement and one-only functions.  */
10031             if (cfun->decl
10032                 && DECL_ONE_ONLY (cfun->decl)
10033                 && !DECL_WEAK (cfun->decl))
10034               {
10035                 output_section_asm_op ("\t.SPACE $TEXT$\n"
10036                                              "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10037                                              "ACCESS=44,SORT=24,COMDAT");
10038                 return;
10039               }
10040           }
10041       else
10042           {
10043             /* There isn't a current function or the body of the current
10044                function has been completed.  So, we are changing to the
10045                text section to output debugging information.  Thus, we
10046                need to forget that we are in the text section so that
10047                varasm.cc will call us when text_section is selected again.  */
10048             gcc_assert (!cfun || !cfun->machine
10049                           || cfun->machine->in_nsubspa == 2);
10050             in_section = NULL;
10051           }
10052       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10053       return;
10054     }
10055   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10056 }
10057 
10058 /* A get_unnamed_section callback for switching to comdat data
10059    sections.  This function is only used with SOM.  */
10060 
10061 static void
som_output_comdat_data_section_asm_op(const char * data)10062 som_output_comdat_data_section_asm_op (const char *data)
10063 {
10064   in_section = NULL;
10065   output_section_asm_op (data);
10066 }
10067 
10068 /* Implement TARGET_ASM_INIT_SECTIONS.  */
10069 
10070 static void
pa_som_asm_init_sections(void)10071 pa_som_asm_init_sections (void)
10072 {
10073   text_section
10074     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10075 
10076   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10077      is not being generated.  */
10078   som_readonly_data_section
10079     = get_unnamed_section (0, output_section_asm_op,
10080                                  "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10081 
10082   /* When secondary definitions are not supported, SOM makes readonly
10083      data one-only by creating a new $LIT$ subspace in $TEXT$ with
10084      the comdat flag.  */
10085   som_one_only_readonly_data_section
10086     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10087                                  "\t.SPACE $TEXT$\n"
10088                                  "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10089                                  "ACCESS=0x2c,SORT=16,COMDAT");
10090 
10091 
10092   /* When secondary definitions are not supported, SOM makes data one-only
10093      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
10094   som_one_only_data_section
10095     = get_unnamed_section (SECTION_WRITE,
10096                                  som_output_comdat_data_section_asm_op,
10097                                  "\t.SPACE $PRIVATE$\n"
10098                                  "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10099                                  "ACCESS=31,SORT=24,COMDAT");
10100 
10101   if (flag_tm)
10102     som_tm_clone_table_section
10103       = get_unnamed_section (0, output_section_asm_op,
10104                                    "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10105 
10106   /* HPUX ld generates incorrect GOT entries for "T" fixups which
10107      reference data within the $TEXT$ space (for example constant
10108      strings in the $LIT$ subspace).
10109 
10110      The assemblers (GAS and HP as) both have problems with handling
10111      the difference of two symbols.  This is the other correct way to
10112      reference constant data during PIC code generation.
10113 
10114      Thus, we can't put constant data needing relocation in the $TEXT$
10115      space during PIC generation.
10116 
10117      Previously, we placed all constant data into the $DATA$ subspace
10118      when generating PIC code.  This reduces sharing, but it works
10119      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
10120      This puts constant data not needing relocation into the $TEXT$ space.  */
10121   readonly_data_section = som_readonly_data_section;
10122 
10123   /* We must not have a reference to an external symbol defined in a
10124      shared library in a readonly section, else the SOM linker will
10125      complain.
10126 
10127      So, we force exception information into the data section.  */
10128   exception_section = data_section;
10129 }
10130 
10131 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
10132 
10133 static section *
pa_som_tm_clone_table_section(void)10134 pa_som_tm_clone_table_section (void)
10135 {
10136   return som_tm_clone_table_section;
10137 }
10138 
10139 /* On hpux10, the linker will give an error if we have a reference
10140    in the read-only data section to a symbol defined in a shared
10141    library.  Therefore, expressions that might require a reloc
10142    cannot be placed in the read-only data section.  */
10143 
10144 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10145 pa_select_section (tree exp, int reloc,
10146                        unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10147 {
10148   if (TREE_CODE (exp) == VAR_DECL
10149       && TREE_READONLY (exp)
10150       && !TREE_THIS_VOLATILE (exp)
10151       && DECL_INITIAL (exp)
10152       && (DECL_INITIAL (exp) == error_mark_node
10153           || TREE_CONSTANT (DECL_INITIAL (exp)))
10154       && !(reloc & pa_reloc_rw_mask ()))
10155     {
10156       if (TARGET_SOM
10157             && DECL_ONE_ONLY (exp)
10158             && !DECL_WEAK (exp))
10159           return som_one_only_readonly_data_section;
10160       else
10161           return readonly_data_section;
10162     }
10163   else if (CONSTANT_CLASS_P (exp)
10164              && !(reloc & pa_reloc_rw_mask ()))
10165     return readonly_data_section;
10166   else if (TARGET_SOM
10167              && TREE_CODE (exp) == VAR_DECL
10168              && DECL_ONE_ONLY (exp)
10169              && !DECL_WEAK (exp))
10170     return som_one_only_data_section;
10171   else
10172     return data_section;
10173 }
10174 
10175 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
10176    and the function is in a COMDAT group, place the plabel reference in the
10177    .data.rel.ro.local section.  The linker ignores references to symbols in
10178    discarded sections from this section.  */
10179 
10180 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10181 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10182                                  unsigned HOST_WIDE_INT align)
10183 {
10184   if (function_label_operand (x, VOIDmode))
10185     {
10186       tree decl = SYMBOL_REF_DECL (x);
10187 
10188       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10189           return get_named_section (NULL, ".data.rel.ro.local", 1);
10190     }
10191 
10192   return default_elf_select_rtx_section (mode, x, align);
10193 }
10194 
10195 /* Implement pa_reloc_rw_mask.  */
10196 
10197 static int
pa_reloc_rw_mask(void)10198 pa_reloc_rw_mask (void)
10199 {
10200   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10201     return 3;
10202 
10203   /* HP linker does not support global relocs in readonly memory.  */
10204   return TARGET_SOM ? 2 : 0;
10205 }
10206 
10207 static void
pa_globalize_label(FILE * stream,const char * name)10208 pa_globalize_label (FILE *stream, const char *name)
10209 {
10210   /* We only handle DATA objects here, functions are globalized in
10211      ASM_DECLARE_FUNCTION_NAME.  */
10212   if (! FUNCTION_NAME_P (name))
10213   {
10214     fputs ("\t.EXPORT ", stream);
10215     assemble_name (stream, name);
10216     fputs (",DATA\n", stream);
10217   }
10218 }
10219 
10220 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10221 
10222 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10223 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10224                          int incoming ATTRIBUTE_UNUSED)
10225 {
10226   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10227 }
10228 
10229 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
10230 
10231 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10232 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10233 {
10234   /* SOM ABI says that objects larger than 64 bits are returned in memory.
10235      PA64 ABI says that objects larger than 128 bits are returned in memory.
10236      Note, int_size_in_bytes can return -1 if the size of the object is
10237      variable or larger than the maximum value that can be expressed as
10238      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
10239      simplest way to handle variable and empty types is to pass them in
10240      memory.  This avoids problems in defining the boundaries of argument
10241      slots, allocating registers, etc.  */
10242   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10243             || int_size_in_bytes (type) <= 0);
10244 }
10245 
10246 /* Structure to hold declaration and name of external symbols that are
10247    emitted by GCC.  We generate a vector of these symbols and output them
10248    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10249    This avoids putting out names that are never really used.  */
10250 
10251 typedef struct GTY(()) extern_symbol
10252 {
10253   tree decl;
10254   const char *name;
10255 } extern_symbol;
10256 
10257 /* Define gc'd vector type for extern_symbol.  */
10258 
10259 /* Vector of extern_symbol pointers.  */
10260 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10261 
10262 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10263 /* Mark DECL (name NAME) as an external reference (assembler output
10264    file FILE).  This saves the names to output at the end of the file
10265    if actually referenced.  */
10266 
10267 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10268 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10269 {
10270   gcc_assert (file == asm_out_file);
10271   extern_symbol p = {decl, name};
10272   vec_safe_push (extern_symbols, p);
10273 }
10274 #endif
10275 
10276 /* Output text required at the end of an assembler file.
10277    This includes deferred plabels and .import directives for
10278    all external symbols that were actually referenced.  */
10279 
10280 static void
pa_file_end(void)10281 pa_file_end (void)
10282 {
10283 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10284   unsigned int i;
10285   extern_symbol *p;
10286 
10287   if (!NO_DEFERRED_PROFILE_COUNTERS)
10288     output_deferred_profile_counters ();
10289 #endif
10290 
10291   output_deferred_plabels ();
10292 
10293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10294   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10295     {
10296       tree decl = p->decl;
10297 
10298       if (!TREE_ASM_WRITTEN (decl)
10299             && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10300           ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10301     }
10302 
10303   vec_free (extern_symbols);
10304 #endif
10305 
10306   if (NEED_INDICATE_EXEC_STACK)
10307     file_end_indicate_exec_stack ();
10308 }
10309 
10310 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10311 
10312 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10313 pa_can_change_mode_class (machine_mode from, machine_mode to,
10314                                 reg_class_t rclass)
10315 {
10316   if (from == to)
10317     return true;
10318 
10319   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10320     return true;
10321 
10322   /* Reject changes to/from modes with zero size.  */
10323   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10324     return false;
10325 
10326   /* Reject changes to/from complex and vector modes.  */
10327   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10328       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10329     return false;
10330 
10331   /* There is no way to load QImode or HImode values directly from memory
10332      to a FP register.  SImode loads to the FP registers are not zero
10333      extended.  On the 64-bit target, this conflicts with the definition
10334      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10335      except for DImode to SImode on the 64-bit target.  It is handled by
10336      register renaming in pa_print_operand.  */
10337   if (MAYBE_FP_REG_CLASS_P (rclass))
10338     return TARGET_64BIT && from == DImode && to == SImode;
10339 
10340   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10341      in specific sets of registers.  Thus, we cannot allow changing
10342      to a larger mode when it's larger than a word.  */
10343   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10344       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10345     return false;
10346 
10347   return true;
10348 }
10349 
10350 /* Implement TARGET_MODES_TIEABLE_P.
10351 
10352    We should return FALSE for QImode and HImode because these modes
10353    are not ok in the floating-point registers.  However, this prevents
10354    tieing these modes to SImode and DImode in the general registers.
10355    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10356    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10357    in the floating-point registers.  */
10358 
10359 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10360 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10361 {
10362   /* Don't tie modes in different classes.  */
10363   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10364     return false;
10365 
10366   return true;
10367 }
10368 
10369 
10370 /* Length in units of the trampoline instruction code.  */
10371 
10372 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10373 
10374 
10375 /* Output assembler code for a block containing the constant parts
10376    of a trampoline, leaving space for the variable parts.\
10377 
10378    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10379    and then branches to the specified routine.
10380 
10381    This code template is copied from text segment to stack location
10382    and then patched with pa_trampoline_init to contain valid values,
10383    and then entered as a subroutine.
10384 
10385    It is best to keep this as small as possible to avoid having to
10386    flush multiple lines in the cache.  */
10387 
10388 static void
pa_asm_trampoline_template(FILE * f)10389 pa_asm_trampoline_template (FILE *f)
10390 {
10391   if (!TARGET_64BIT)
10392     {
10393       if (TARGET_PA_20)
10394           {
10395             fputs ("\tmfia    %r20\n", f);
10396             fputs ("\tldw               48(%r20),%r22\n", f);
10397             fputs ("\tcopy    %r22,%r21\n", f);
10398             fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10399             fputs ("\tdepwi   0,31,2,%r22\n", f);
10400             fputs ("\tldw               0(%r22),%r21\n", f);
10401             fputs ("\tldw               4(%r22),%r19\n", f);
10402             fputs ("\tbve               (%r21)\n", f);
10403             fputs ("\tldw               52(%r20),%r29\n", f);
10404             fputs ("\t.word   0\n", f);
10405             fputs ("\t.word   0\n", f);
10406             fputs ("\t.word   0\n", f);
10407           }
10408       else
10409           {
10410             if (ASSEMBLER_DIALECT == 0)
10411               {
10412                 fputs ("\tbl  .+8,%r20\n", f);
10413                 fputs ("\tdepi          0,31,2,%r20\n", f);
10414               }
10415             else
10416               {
10417                 fputs ("\tb,l .+8,%r20\n", f);
10418                 fputs ("\tdepwi         0,31,2,%r20\n", f);
10419               }
10420             fputs ("\tldw               40(%r20),%r22\n", f);
10421             fputs ("\tcopy    %r22,%r21\n", f);
10422             fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10423             if (ASSEMBLER_DIALECT == 0)
10424               fputs ("\tdepi  0,31,2,%r22\n", f);
10425             else
10426               fputs ("\tdepwi 0,31,2,%r22\n", f);
10427             fputs ("\tldw               0(%r22),%r21\n", f);
10428             fputs ("\tldw               4(%r22),%r19\n", f);
10429             fputs ("\tldsid   (%r21),%r1\n", f);
10430             fputs ("\tmtsp    %r1,%sr0\n", f);
10431             fputs ("\tbe                0(%sr0,%r21)\n", f);
10432             fputs ("\tldw               44(%r20),%r29\n", f);
10433           }
10434       fputs ("\t.word         0\n", f);
10435       fputs ("\t.word         0\n", f);
10436       fputs ("\t.word         0\n", f);
10437       fputs ("\t.word         0\n", f);
10438     }
10439   else
10440     {
10441       fputs ("\t.dword 0\n", f);
10442       fputs ("\t.dword 0\n", f);
10443       fputs ("\t.dword 0\n", f);
10444       fputs ("\t.dword 0\n", f);
10445       fputs ("\tmfia          %r31\n", f);
10446       fputs ("\tldd 24(%r31),%r27\n", f);
10447       fputs ("\tldd 32(%r31),%r31\n", f);
10448       fputs ("\tldd 16(%r27),%r1\n", f);
10449       fputs ("\tbve (%r1)\n", f);
10450       fputs ("\tldd 24(%r27),%r27\n", f);
10451       fputs ("\t.dword 0  ; fptr\n", f);
10452       fputs ("\t.dword 0  ; static link\n", f);
10453     }
10454 }
10455 
10456 /* Emit RTL insns to initialize the variable parts of a trampoline.
10457    FNADDR is an RTX for the address of the function's pure code.
10458    CXT is an RTX for the static chain value for the function.
10459 
10460    Move the function address to the trampoline template at offset 48.
10461    Move the static chain value to trampoline template at offset 52.
10462    Move the trampoline address to trampoline template at offset 56.
10463    Move r19 to trampoline template at offset 60.  The latter two
10464    words create a plabel for the indirect call to the trampoline.
10465 
10466    A similar sequence is used for the 64-bit port but the plabel is
10467    at the beginning of the trampoline.
10468 
10469    Finally, the cache entries for the trampoline code are flushed.
10470    This is necessary to ensure that the trampoline instruction sequence
10471    is written to memory prior to any attempts at prefetching the code
10472    sequence.  */
10473 
10474 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10475 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10476 {
10477   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10478   rtx start_addr = gen_reg_rtx (Pmode);
10479   rtx end_addr = gen_reg_rtx (Pmode);
10480   rtx line_length = gen_reg_rtx (Pmode);
10481   rtx r_tramp, tmp;
10482 
10483   emit_block_move (m_tramp, assemble_trampoline_template (),
10484                        GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10485   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10486 
10487   if (!TARGET_64BIT)
10488     {
10489       tmp = adjust_address (m_tramp, Pmode, 48);
10490       emit_move_insn (tmp, fnaddr);
10491       tmp = adjust_address (m_tramp, Pmode, 52);
10492       emit_move_insn (tmp, chain_value);
10493 
10494       /* Create a fat pointer for the trampoline.  */
10495       tmp = adjust_address (m_tramp, Pmode, 56);
10496       emit_move_insn (tmp, r_tramp);
10497       tmp = adjust_address (m_tramp, Pmode, 60);
10498       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10499 
10500       /* fdc and fic only use registers for the address to flush,
10501            they do not accept integer displacements.  We align the
10502            start and end addresses to the beginning of their respective
10503            cache lines to minimize the number of lines flushed.  */
10504       emit_insn (gen_andsi3 (start_addr, r_tramp,
10505                                    GEN_INT (-MIN_CACHELINE_SIZE)));
10506       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10507                                                        TRAMPOLINE_CODE_SIZE-1));
10508       emit_insn (gen_andsi3 (end_addr, tmp,
10509                                    GEN_INT (-MIN_CACHELINE_SIZE)));
10510       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10511       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10512       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10513                                             gen_reg_rtx (Pmode),
10514                                             gen_reg_rtx (Pmode)));
10515     }
10516   else
10517     {
10518       tmp = adjust_address (m_tramp, Pmode, 56);
10519       emit_move_insn (tmp, fnaddr);
10520       tmp = adjust_address (m_tramp, Pmode, 64);
10521       emit_move_insn (tmp, chain_value);
10522 
10523       /* Create a fat pointer for the trampoline.  */
10524       tmp = adjust_address (m_tramp, Pmode, 16);
10525       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10526                                                                           r_tramp, 32)));
10527       tmp = adjust_address (m_tramp, Pmode, 24);
10528       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10529 
10530       /* fdc and fic only use registers for the address to flush,
10531            they do not accept integer displacements.  We align the
10532            start and end addresses to the beginning of their respective
10533            cache lines to minimize the number of lines flushed.  */
10534       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10535       emit_insn (gen_anddi3 (start_addr, tmp,
10536                                    GEN_INT (-MIN_CACHELINE_SIZE)));
10537       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10538                                                        TRAMPOLINE_CODE_SIZE - 1));
10539       emit_insn (gen_anddi3 (end_addr, tmp,
10540                                    GEN_INT (-MIN_CACHELINE_SIZE)));
10541       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10542       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10543       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10544                                             gen_reg_rtx (Pmode),
10545                                             gen_reg_rtx (Pmode)));
10546     }
10547 
10548 #ifdef HAVE_ENABLE_EXECUTE_STACK
10549   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10550                          LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10551 #endif
10552 }
10553 
10554 /* Perform any machine-specific adjustment in the address of the trampoline.
10555    ADDR contains the address that was passed to pa_trampoline_init.
10556    Adjust the trampoline address to point to the plabel at offset 56.  */
10557 
10558 static rtx
pa_trampoline_adjust_address(rtx addr)10559 pa_trampoline_adjust_address (rtx addr)
10560 {
10561   if (!TARGET_64BIT)
10562     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10563   return addr;
10564 }
10565 
10566 static rtx
pa_delegitimize_address(rtx orig_x)10567 pa_delegitimize_address (rtx orig_x)
10568 {
10569   rtx x = delegitimize_mem_from_attrs (orig_x);
10570 
10571   if (GET_CODE (x) == LO_SUM
10572       && GET_CODE (XEXP (x, 1)) == UNSPEC
10573       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10574     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10575   return x;
10576 }
10577 
10578 static rtx
pa_internal_arg_pointer(void)10579 pa_internal_arg_pointer (void)
10580 {
10581   /* The argument pointer and the hard frame pointer are the same in
10582      the 32-bit runtime, so we don't need a copy.  */
10583   if (TARGET_64BIT)
10584     return copy_to_reg (virtual_incoming_args_rtx);
10585   else
10586     return virtual_incoming_args_rtx;
10587 }
10588 
10589 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10590    Frame pointer elimination is automatically handled.  */
10591 
10592 static bool
pa_can_eliminate(const int from,const int to)10593 pa_can_eliminate (const int from, const int to)
10594 {
10595   /* The argument cannot be eliminated in the 64-bit runtime.  */
10596   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10597     return false;
10598 
10599   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10600           ? ! frame_pointer_needed
10601           : true);
10602 }
10603 
10604 /* Define the offset between two registers, FROM to be eliminated and its
10605    replacement TO, at the start of a routine.  */
10606 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10607 pa_initial_elimination_offset (int from, int to)
10608 {
10609   HOST_WIDE_INT offset;
10610 
10611   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10612       && to == STACK_POINTER_REGNUM)
10613     offset = -pa_compute_frame_size (get_frame_size (), 0);
10614   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10615     offset = 0;
10616   else
10617     gcc_unreachable ();
10618 
10619   return offset;
10620 }
10621 
10622 static void
pa_conditional_register_usage(void)10623 pa_conditional_register_usage (void)
10624 {
10625   int i;
10626 
10627   if (!TARGET_64BIT && !TARGET_PA_11)
10628     {
10629       for (i = 56; i <= FP_REG_LAST; i++)
10630           fixed_regs[i] = call_used_regs[i] = 1;
10631       for (i = 33; i < 56; i += 2)
10632           fixed_regs[i] = call_used_regs[i] = 1;
10633     }
10634   if (TARGET_SOFT_FLOAT)
10635     {
10636       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10637           fixed_regs[i] = call_used_regs[i] = 1;
10638     }
10639   if (flag_pic)
10640     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10641 }
10642 
10643 /* Target hook for c_mode_for_suffix.  */
10644 
10645 static machine_mode
pa_c_mode_for_suffix(char suffix)10646 pa_c_mode_for_suffix (char suffix)
10647 {
10648   if (HPUX_LONG_DOUBLE_LIBRARY)
10649     {
10650       if (suffix == 'q')
10651           return TFmode;
10652     }
10653 
10654   return VOIDmode;
10655 }
10656 
10657 /* Target hook for function_section.  */
10658 
10659 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10660 pa_function_section (tree decl, enum node_frequency freq,
10661                          bool startup, bool exit)
10662 {
10663   /* Put functions in text section if target doesn't have named sections.  */
10664   if (!targetm_common.have_named_sections)
10665     return text_section;
10666 
10667   /* Force nested functions into the same section as the containing
10668      function.  */
10669   if (decl
10670       && DECL_SECTION_NAME (decl) == NULL
10671       && DECL_CONTEXT (decl) != NULL_TREE
10672       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10673       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10674     return function_section (DECL_CONTEXT (decl));
10675 
10676   /* Otherwise, use the default function section.  */
10677   return default_function_section (decl, freq, startup, exit);
10678 }
10679 
10680 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10681 
10682    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10683    that need more than three instructions to load prior to reload.  This
10684    limit is somewhat arbitrary.  It takes three instructions to load a
10685    CONST_INT from memory but two are memory accesses.  It may be better
10686    to increase the allowed range for CONST_INTS.  We may also be able
10687    to handle CONST_DOUBLES.  */
10688 
10689 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10690 pa_legitimate_constant_p (machine_mode mode, rtx x)
10691 {
10692   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10693     return false;
10694 
10695   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10696     return false;
10697 
10698   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10699      legitimate constants.  The other variants can't be handled by
10700      the move patterns after reload starts.  */
10701   if (tls_referenced_p (x))
10702     return false;
10703 
10704   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10705     return false;
10706 
10707   if (TARGET_64BIT
10708       && HOST_BITS_PER_WIDE_INT > 32
10709       && GET_CODE (x) == CONST_INT
10710       && !reload_in_progress
10711       && !reload_completed
10712       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10713       && !pa_cint_ok_for_move (UINTVAL (x)))
10714     return false;
10715 
10716   if (function_label_operand (x, mode))
10717     return false;
10718 
10719   return true;
10720 }
10721 
10722 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10723 
10724 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10725 pa_section_type_flags (tree decl, const char *name, int reloc)
10726 {
10727   unsigned int flags;
10728 
10729   flags = default_section_type_flags (decl, name, reloc);
10730 
10731   /* Function labels are placed in the constant pool.  This can
10732      cause a section conflict if decls are put in ".data.rel.ro"
10733      or ".data.rel.ro.local" using the __attribute__ construct.  */
10734   if (strcmp (name, ".data.rel.ro") == 0
10735       || strcmp (name, ".data.rel.ro.local") == 0)
10736     flags |= SECTION_WRITE | SECTION_RELRO;
10737 
10738   return flags;
10739 }
10740 
10741 /* pa_legitimate_address_p recognizes an RTL expression that is a
10742    valid memory address for an instruction.  The MODE argument is the
10743    machine mode for the MEM expression that wants to use this address.
10744 
10745    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10746    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10747    available with floating point loads and stores, and integer loads.
10748    We get better code by allowing indexed addresses in the initial
10749    RTL generation.
10750 
10751    The acceptance of indexed addresses as legitimate implies that we
10752    must provide patterns for doing indexed integer stores, or the move
10753    expanders must force the address of an indexed store to a register.
10754    We have adopted the latter approach.
10755 
10756    Another function of pa_legitimate_address_p is to ensure that
10757    the base register is a valid pointer for indexed instructions.
10758    On targets that have non-equivalent space registers, we have to
10759    know at the time of assembler output which register in a REG+REG
10760    pair is the base register.  The REG_POINTER flag is sometimes lost
10761    in reload and the following passes, so it can't be relied on during
10762    code generation.  Thus, we either have to canonicalize the order
10763    of the registers in REG+REG indexed addresses, or treat REG+REG
10764    addresses separately and provide patterns for both permutations.
10765 
10766    The latter approach requires several hundred additional lines of
10767    code in pa.md.  The downside to canonicalizing is that a PLUS
10768    in the wrong order can't combine to form to make a scaled indexed
10769    memory operand.  As we won't need to canonicalize the operands if
10770    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10771 
10772    We initially break out scaled indexed addresses in canonical order
10773    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10774    scaled indexed addresses during RTL generation.  However, fold_rtx
10775    has its own opinion on how the operands of a PLUS should be ordered.
10776    If one of the operands is equivalent to a constant, it will make
10777    that operand the second operand.  As the base register is likely to
10778    be equivalent to a SYMBOL_REF, we have made it the second operand.
10779 
10780    pa_legitimate_address_p accepts REG+REG as legitimate when the
10781    operands are in the order INDEX+BASE on targets with non-equivalent
10782    space registers, and in any order on targets with equivalent space
10783    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10784 
10785    We treat a SYMBOL_REF as legitimate if it is part of the current
10786    function's constant-pool, because such addresses can actually be
10787    output as REG+SMALLINT.  */
10788 
10789 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10790 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10791 {
10792   if ((REG_P (x)
10793        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10794                       : REG_OK_FOR_BASE_P (x)))
10795       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10796              || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10797             && REG_P (XEXP (x, 0))
10798             && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10799                          : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10800     return true;
10801 
10802   if (GET_CODE (x) == PLUS)
10803     {
10804       rtx base, index;
10805 
10806       /* For REG+REG, the base register should be in XEXP (x, 1),
10807            so check it first.  */
10808       if (REG_P (XEXP (x, 1))
10809             && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10810                          : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10811           base = XEXP (x, 1), index = XEXP (x, 0);
10812       else if (REG_P (XEXP (x, 0))
10813                  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10814                                 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10815           base = XEXP (x, 0), index = XEXP (x, 1);
10816       else
10817           return false;
10818 
10819       if (GET_CODE (index) == CONST_INT)
10820           {
10821             if (INT_5_BITS (index))
10822               return true;
10823 
10824             /* When INT14_OK_STRICT is false, a secondary reload is needed
10825                to adjust the displacement of SImode and DImode floating point
10826                instructions but this may fail when the register also needs
10827                reloading.  So, we return false when STRICT is true.  We
10828                also reject long displacements for float mode addresses since
10829                the majority of accesses will use floating point instructions
10830                that don't support 14-bit offsets.  */
10831             if (!INT14_OK_STRICT
10832                 && (strict || !(reload_in_progress || reload_completed))
10833                 && mode != QImode
10834                 && mode != HImode)
10835               return false;
10836 
10837             return base14_operand (index, mode);
10838           }
10839 
10840       if (!TARGET_DISABLE_INDEXING
10841             /* Only accept the "canonical" INDEX+BASE operand order
10842                on targets with non-equivalent space registers.  */
10843             && (TARGET_NO_SPACE_REGS
10844                 ? REG_P (index)
10845                 : (base == XEXP (x, 1) && REG_P (index)
10846                      && (reload_completed
10847                          || (reload_in_progress && HARD_REGISTER_P (base))
10848                          || REG_POINTER (base))
10849                      && (reload_completed
10850                          || (reload_in_progress && HARD_REGISTER_P (index))
10851                          || !REG_POINTER (index))))
10852             && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10853             && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10854                          : REG_OK_FOR_INDEX_P (index))
10855             && borx_reg_operand (base, Pmode)
10856             && borx_reg_operand (index, Pmode))
10857           return true;
10858 
10859       if (!TARGET_DISABLE_INDEXING
10860             && GET_CODE (index) == MULT
10861             /* Only accept base operands with the REG_POINTER flag prior to
10862                reload on targets with non-equivalent space registers.  */
10863             && (TARGET_NO_SPACE_REGS
10864                 || (base == XEXP (x, 1)
10865                       && (reload_completed
10866                           || (reload_in_progress && HARD_REGISTER_P (base))
10867                           || REG_POINTER (base))))
10868             && REG_P (XEXP (index, 0))
10869             && GET_MODE (XEXP (index, 0)) == Pmode
10870             && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10871             && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10872                          : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10873             && GET_CODE (XEXP (index, 1)) == CONST_INT
10874             && INTVAL (XEXP (index, 1))
10875                == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10876             && borx_reg_operand (base, Pmode))
10877           return true;
10878 
10879       return false;
10880     }
10881 
10882   if (GET_CODE (x) == LO_SUM)
10883     {
10884       rtx y = XEXP (x, 0);
10885 
10886       if (GET_CODE (y) == SUBREG)
10887           y = SUBREG_REG (y);
10888 
10889       if (REG_P (y)
10890             && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10891                          : REG_OK_FOR_BASE_P (y)))
10892           {
10893             /* Needed for -fPIC */
10894             if (mode == Pmode
10895                 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10896               return true;
10897 
10898             if (!INT14_OK_STRICT
10899                 && (strict || !(reload_in_progress || reload_completed))
10900                 && mode != QImode
10901                 && mode != HImode)
10902               return false;
10903 
10904             if (CONSTANT_P (XEXP (x, 1)))
10905               return true;
10906           }
10907       return false;
10908     }
10909 
10910   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10911     return true;
10912 
10913   return false;
10914 }
10915 
10916 /* Look for machine dependent ways to make the invalid address AD a
10917    valid address.
10918 
10919    For the PA, transform:
10920 
10921         memory(X + <large int>)
10922 
10923    into:
10924 
10925         if (<large int> & mask) >= 16
10926           Y = (<large int> & ~mask) + mask + 1  Round up.
10927         else
10928           Y = (<large int> & ~mask)             Round down.
10929         Z = X + Y
10930         memory (Z + (<large int> - Y));
10931 
10932    This makes reload inheritance and reload_cse work better since Z
10933    can be reused.
10934 
10935    There may be more opportunities to improve code with this hook.  */
10936 
10937 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10938 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10939                                     int opnum, int type,
10940                                     int ind_levels ATTRIBUTE_UNUSED)
10941 {
10942   long offset, newoffset, mask;
10943   rtx new_rtx, temp = NULL_RTX;
10944 
10945   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10946             && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10947 
10948   if (optimize && GET_CODE (ad) == PLUS)
10949     temp = simplify_binary_operation (PLUS, Pmode,
10950                                               XEXP (ad, 0), XEXP (ad, 1));
10951 
10952   new_rtx = temp ? temp : ad;
10953 
10954   if (optimize
10955       && GET_CODE (new_rtx) == PLUS
10956       && GET_CODE (XEXP (new_rtx, 0)) == REG
10957       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10958     {
10959       offset = INTVAL (XEXP ((new_rtx), 1));
10960 
10961       /* Choose rounding direction.  Round up if we are >= halfway.  */
10962       if ((offset & mask) >= ((mask + 1) / 2))
10963           newoffset = (offset & ~mask) + mask + 1;
10964       else
10965           newoffset = offset & ~mask;
10966 
10967       /* Ensure that long displacements are aligned.  */
10968       if (mask == 0x3fff
10969             && (GET_MODE_CLASS (mode) == MODE_FLOAT
10970                 || (TARGET_64BIT && (mode) == DImode)))
10971           newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10972 
10973       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10974           {
10975             temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10976                                      GEN_INT (newoffset));
10977             ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10978             push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10979                            BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10980                            opnum, (enum reload_type) type);
10981             return ad;
10982           }
10983     }
10984 
10985   return NULL_RTX;
10986 }
10987 
10988 /* Output address vector.  */
10989 
10990 void
pa_output_addr_vec(rtx lab,rtx body)10991 pa_output_addr_vec (rtx lab, rtx body)
10992 {
10993   int idx, vlen = XVECLEN (body, 0);
10994 
10995   if (!TARGET_SOM)
10996     fputs ("\t.align 4\n", asm_out_file);
10997   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10998   if (TARGET_GAS)
10999     fputs ("\t.begin_brtab\n", asm_out_file);
11000   for (idx = 0; idx < vlen; idx++)
11001     {
11002       ASM_OUTPUT_ADDR_VEC_ELT
11003           (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11004     }
11005   if (TARGET_GAS)
11006     fputs ("\t.end_brtab\n", asm_out_file);
11007 }
11008 
11009 /* Output address difference vector.  */
11010 
11011 void
pa_output_addr_diff_vec(rtx lab,rtx body)11012 pa_output_addr_diff_vec (rtx lab, rtx body)
11013 {
11014   rtx base = XEXP (XEXP (body, 0), 0);
11015   int idx, vlen = XVECLEN (body, 1);
11016 
11017   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11018   if (TARGET_GAS)
11019     fputs ("\t.begin_brtab\n", asm_out_file);
11020   for (idx = 0; idx < vlen; idx++)
11021     {
11022       ASM_OUTPUT_ADDR_DIFF_ELT
11023           (asm_out_file,
11024            body,
11025            CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11026            CODE_LABEL_NUMBER (base));
11027     }
11028   if (TARGET_GAS)
11029     fputs ("\t.end_brtab\n", asm_out_file);
11030 }
11031 
11032 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
11033    arguments passed by hidden reference in the 32-bit HP runtime.  Users
11034    can override this behavior for better compatibility with openmp at the
11035    risk of library incompatibilities.  Arguments are always passed by value
11036    in the 64-bit HP runtime.  */
11037 
11038 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11039 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11040 {
11041   return !TARGET_CALLER_COPIES;
11042 }
11043 
11044 /* Implement TARGET_HARD_REGNO_NREGS.  */
11045 
11046 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11047 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11048 {
11049   return PA_HARD_REGNO_NREGS (regno, mode);
11050 }
11051 
11052 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
11053 
11054 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11055 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11056 {
11057   return PA_HARD_REGNO_MODE_OK (regno, mode);
11058 }
11059 
11060 /* Implement TARGET_STARTING_FRAME_OFFSET.
11061 
11062    On the 32-bit ports, we reserve one slot for the previous frame
11063    pointer and one fill slot.  The fill slot is for compatibility
11064    with HP compiled programs.  On the 64-bit ports, we reserve one
11065    slot for the previous frame pointer.  */
11066 
11067 static HOST_WIDE_INT
pa_starting_frame_offset(void)11068 pa_starting_frame_offset (void)
11069 {
11070   return 8;
11071 }
11072 
11073 /* Figure out the size in words of the function argument.  */
11074 
11075 int
pa_function_arg_size(machine_mode mode,const_tree type)11076 pa_function_arg_size (machine_mode mode, const_tree type)
11077 {
11078   HOST_WIDE_INT size;
11079 
11080   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11081 
11082   /* The 64-bit runtime does not restrict the size of stack frames,
11083      but the gcc calling conventions limit argument sizes to 1G.  Our
11084      prologue/epilogue code limits frame sizes to just under 32 bits.
11085      1G is also the maximum frame size that can be handled by the HPUX
11086      unwind descriptor.  Since very large TYPE_SIZE_UNIT values can
11087      occur for (parallel:BLK []), we need to ignore large arguments
11088      passed by value.  */
11089   if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11090     size = 0;
11091   return (int) CEIL (size, UNITS_PER_WORD);
11092 }
11093 
11094 #include "gt-pa.h"
11095