1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstrInfo.h"
15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
16 #include "AMDGPURegisterInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600InstrInfo.h"
19 #include "SIDefines.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/IR/Function.h"
29
30 using namespace llvm;
31
32 //===----------------------------------------------------------------------===//
33 // Instruction Selector Implementation
34 //===----------------------------------------------------------------------===//
35
36 namespace {
37 /// AMDGPU specific code to select AMDGPU machine instructions for
38 /// SelectionDAG operations.
39 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
41 // make the right decision when generating code for different targets.
42 const AMDGPUSubtarget *Subtarget;
43 public:
44 AMDGPUDAGToDAGISel(TargetMachine &TM);
45 virtual ~AMDGPUDAGToDAGISel();
46 bool runOnMachineFunction(MachineFunction &MF) override;
47 SDNode *Select(SDNode *N) override;
48 const char *getPassName() const override;
49 void PostprocessISelDAG() override;
50
51 private:
52 bool isInlineImmediate(SDNode *N) const;
53 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
54 const R600InstrInfo *TII);
55 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
56 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
57
58 // Complex pattern selectors
59 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
60 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
61 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
62
63 static bool checkType(const Value *ptr, unsigned int addrspace);
64 static bool checkPrivateAddress(const MachineMemOperand *Op);
65
66 static bool isGlobalStore(const StoreSDNode *N);
67 static bool isFlatStore(const StoreSDNode *N);
68 static bool isPrivateStore(const StoreSDNode *N);
69 static bool isLocalStore(const StoreSDNode *N);
70 static bool isRegionStore(const StoreSDNode *N);
71
72 bool isCPLoad(const LoadSDNode *N) const;
73 bool isConstantLoad(const LoadSDNode *N, int cbID) const;
74 bool isGlobalLoad(const LoadSDNode *N) const;
75 bool isFlatLoad(const LoadSDNode *N) const;
76 bool isParamLoad(const LoadSDNode *N) const;
77 bool isPrivateLoad(const LoadSDNode *N) const;
78 bool isLocalLoad(const LoadSDNode *N) const;
79 bool isRegionLoad(const LoadSDNode *N) const;
80
81 SDNode *glueCopyToM0(SDNode *N) const;
82
83 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
84 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
85 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
86 SDValue& Offset);
87 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
88 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
89 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
90 unsigned OffsetBits) const;
91 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
92 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
93 SDValue &Offset1) const;
94 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
95 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
96 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
97 SDValue &TFE) const;
98 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
99 SDValue &SOffset, SDValue &Offset, SDValue &GLC,
100 SDValue &SLC, SDValue &TFE) const;
101 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
102 SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
103 SDValue &SLC) const;
104 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
105 SDValue &SOffset, SDValue &ImmOffset) const;
106 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
107 SDValue &Offset, SDValue &GLC, SDValue &SLC,
108 SDValue &TFE) const;
109 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
110 SDValue &Offset, SDValue &GLC) const;
111 SDNode *SelectAddrSpaceCast(SDNode *N);
112 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
113 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
114 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
115 SDValue &Clamp, SDValue &Omod) const;
116 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
117 SDValue &Clamp, SDValue &Omod) const;
118
119 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
120 SDValue &Omod) const;
121 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
122 SDValue &Clamp,
123 SDValue &Omod) const;
124
125 SDNode *SelectADD_SUB_I64(SDNode *N);
126 SDNode *SelectDIV_SCALE(SDNode *N);
127
128 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
129 uint32_t Offset, uint32_t Width);
130 SDNode *SelectS_BFEFromShifts(SDNode *N);
131 SDNode *SelectS_BFE(SDNode *N);
132
133 // Include the pieces autogenerated from the target description.
134 #include "AMDGPUGenDAGISel.inc"
135 };
136 } // end anonymous namespace
137
138 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
139 // DAG, ready for instruction scheduling.
createAMDGPUISelDag(TargetMachine & TM)140 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
141 return new AMDGPUDAGToDAGISel(TM);
142 }
143
AMDGPUDAGToDAGISel(TargetMachine & TM)144 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
145 : SelectionDAGISel(TM) {}
146
runOnMachineFunction(MachineFunction & MF)147 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
148 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
149 return SelectionDAGISel::runOnMachineFunction(MF);
150 }
151
~AMDGPUDAGToDAGISel()152 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
153 }
154
isInlineImmediate(SDNode * N) const155 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
156 const SITargetLowering *TL
157 = static_cast<const SITargetLowering *>(getTargetLowering());
158 return TL->analyzeImmediate(N) == 0;
159 }
160
161 /// \brief Determine the register class for \p OpNo
162 /// \returns The register class of the virtual register that will be used for
163 /// the given operand number \OpNo or NULL if the register class cannot be
164 /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const165 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
166 unsigned OpNo) const {
167 if (!N->isMachineOpcode())
168 return nullptr;
169
170 switch (N->getMachineOpcode()) {
171 default: {
172 const MCInstrDesc &Desc =
173 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
174 unsigned OpIdx = Desc.getNumDefs() + OpNo;
175 if (OpIdx >= Desc.getNumOperands())
176 return nullptr;
177 int RegClass = Desc.OpInfo[OpIdx].RegClass;
178 if (RegClass == -1)
179 return nullptr;
180
181 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
182 }
183 case AMDGPU::REG_SEQUENCE: {
184 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
185 const TargetRegisterClass *SuperRC =
186 Subtarget->getRegisterInfo()->getRegClass(RCID);
187
188 SDValue SubRegOp = N->getOperand(OpNo + 1);
189 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
190 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
191 SubRegIdx);
192 }
193 }
194 }
195
SelectADDRParam(SDValue Addr,SDValue & R1,SDValue & R2)196 bool AMDGPUDAGToDAGISel::SelectADDRParam(
197 SDValue Addr, SDValue& R1, SDValue& R2) {
198
199 if (Addr.getOpcode() == ISD::FrameIndex) {
200 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
201 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
202 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
203 } else {
204 R1 = Addr;
205 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
206 }
207 } else if (Addr.getOpcode() == ISD::ADD) {
208 R1 = Addr.getOperand(0);
209 R2 = Addr.getOperand(1);
210 } else {
211 R1 = Addr;
212 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
213 }
214 return true;
215 }
216
SelectADDR(SDValue Addr,SDValue & R1,SDValue & R2)217 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
218 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
219 Addr.getOpcode() == ISD::TargetGlobalAddress) {
220 return false;
221 }
222 return SelectADDRParam(Addr, R1, R2);
223 }
224
225
SelectADDR64(SDValue Addr,SDValue & R1,SDValue & R2)226 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
227 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
228 Addr.getOpcode() == ISD::TargetGlobalAddress) {
229 return false;
230 }
231
232 if (Addr.getOpcode() == ISD::FrameIndex) {
233 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
234 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
235 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
236 } else {
237 R1 = Addr;
238 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
239 }
240 } else if (Addr.getOpcode() == ISD::ADD) {
241 R1 = Addr.getOperand(0);
242 R2 = Addr.getOperand(1);
243 } else {
244 R1 = Addr;
245 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
246 }
247 return true;
248 }
249
glueCopyToM0(SDNode * N) const250 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
251 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
252 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
253 AMDGPUAS::LOCAL_ADDRESS))
254 return N;
255
256 const SITargetLowering& Lowering =
257 *static_cast<const SITargetLowering*>(getTargetLowering());
258
259 // Write max value to m0 before each load operation
260
261 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
262 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
263
264 SDValue Glue = M0.getValue(1);
265
266 SmallVector <SDValue, 8> Ops;
267 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
268 Ops.push_back(N->getOperand(i));
269 }
270 Ops.push_back(Glue);
271 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
272
273 return N;
274 }
275
Select(SDNode * N)276 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
277 unsigned int Opc = N->getOpcode();
278 if (N->isMachineOpcode()) {
279 N->setNodeId(-1);
280 return nullptr; // Already selected.
281 }
282
283 if (isa<AtomicSDNode>(N))
284 N = glueCopyToM0(N);
285
286 switch (Opc) {
287 default: break;
288 // We are selecting i64 ADD here instead of custom lower it during
289 // DAG legalization, so we can fold some i64 ADDs used for address
290 // calculation into the LOAD and STORE instructions.
291 case ISD::ADD:
292 case ISD::SUB: {
293 if (N->getValueType(0) != MVT::i64 ||
294 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
295 break;
296
297 return SelectADD_SUB_I64(N);
298 }
299 case ISD::SCALAR_TO_VECTOR:
300 case AMDGPUISD::BUILD_VERTICAL_VECTOR:
301 case ISD::BUILD_VECTOR: {
302 unsigned RegClassID;
303 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
304 EVT VT = N->getValueType(0);
305 unsigned NumVectorElts = VT.getVectorNumElements();
306 EVT EltVT = VT.getVectorElementType();
307 assert(EltVT.bitsEq(MVT::i32));
308 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
309 bool UseVReg = true;
310 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
311 U != E; ++U) {
312 if (!U->isMachineOpcode()) {
313 continue;
314 }
315 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
316 if (!RC) {
317 continue;
318 }
319 if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
320 UseVReg = false;
321 }
322 }
323 switch(NumVectorElts) {
324 case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
325 AMDGPU::SReg_32RegClassID;
326 break;
327 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
328 AMDGPU::SReg_64RegClassID;
329 break;
330 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
331 AMDGPU::SReg_128RegClassID;
332 break;
333 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
334 AMDGPU::SReg_256RegClassID;
335 break;
336 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
337 AMDGPU::SReg_512RegClassID;
338 break;
339 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
340 }
341 } else {
342 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
343 // that adds a 128 bits reg copy when going through TwoAddressInstructions
344 // pass. We want to avoid 128 bits copies as much as possible because they
345 // can't be bundled by our scheduler.
346 switch(NumVectorElts) {
347 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
348 case 4:
349 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
350 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
351 else
352 RegClassID = AMDGPU::R600_Reg128RegClassID;
353 break;
354 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
355 }
356 }
357
358 SDLoc DL(N);
359 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
360
361 if (NumVectorElts == 1) {
362 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
363 N->getOperand(0), RegClass);
364 }
365
366 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
367 "supported yet");
368 // 16 = Max Num Vector Elements
369 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
370 // 1 = Vector Register Class
371 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
372
373 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
374 bool IsRegSeq = true;
375 unsigned NOps = N->getNumOperands();
376 for (unsigned i = 0; i < NOps; i++) {
377 // XXX: Why is this here?
378 if (isa<RegisterSDNode>(N->getOperand(i))) {
379 IsRegSeq = false;
380 break;
381 }
382 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
383 RegSeqArgs[1 + (2 * i) + 1] =
384 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
385 MVT::i32);
386 }
387
388 if (NOps != NumVectorElts) {
389 // Fill in the missing undef elements if this was a scalar_to_vector.
390 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
391
392 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
393 DL, EltVT);
394 for (unsigned i = NOps; i < NumVectorElts; ++i) {
395 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
396 RegSeqArgs[1 + (2 * i) + 1] =
397 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
398 }
399 }
400
401 if (!IsRegSeq)
402 break;
403 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
404 RegSeqArgs);
405 }
406 case ISD::BUILD_PAIR: {
407 SDValue RC, SubReg0, SubReg1;
408 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
409 break;
410 }
411 SDLoc DL(N);
412 if (N->getValueType(0) == MVT::i128) {
413 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
414 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
415 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
416 } else if (N->getValueType(0) == MVT::i64) {
417 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
418 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
419 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
420 } else {
421 llvm_unreachable("Unhandled value type for BUILD_PAIR");
422 }
423 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
424 N->getOperand(1), SubReg1 };
425 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
426 DL, N->getValueType(0), Ops);
427 }
428
429 case ISD::Constant:
430 case ISD::ConstantFP: {
431 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
432 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
433 break;
434
435 uint64_t Imm;
436 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
437 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
438 else {
439 ConstantSDNode *C = cast<ConstantSDNode>(N);
440 Imm = C->getZExtValue();
441 }
442
443 SDLoc DL(N);
444 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
445 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
446 MVT::i32));
447 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
448 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
449 const SDValue Ops[] = {
450 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
451 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
452 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
453 };
454
455 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
456 N->getValueType(0), Ops);
457 }
458
459 case ISD::LOAD: {
460 LoadSDNode *LD = cast<LoadSDNode>(N);
461 SDLoc SL(N);
462 EVT VT = N->getValueType(0);
463
464 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
465 N = glueCopyToM0(N);
466 break;
467 }
468
469 // To simplify the TableGen patters, we replace all i64 loads with
470 // v2i32 loads. Alternatively, we could promote i64 loads to v2i32
471 // during DAG legalization, however, so places (ExpandUnalignedLoad)
472 // in the DAG legalizer assume that if i64 is legal, so doing this
473 // promotion early can cause problems.
474
475 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
476 LD->getBasePtr(), LD->getMemOperand());
477 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
478 MVT::i64, NewLoad);
479 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
480 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
481 SDNode *Load = glueCopyToM0(NewLoad.getNode());
482 SelectCode(Load);
483 N = BitCast.getNode();
484 break;
485 }
486
487 case ISD::STORE: {
488 // Handle i64 stores here for the same reason mentioned above for loads.
489 StoreSDNode *ST = cast<StoreSDNode>(N);
490 SDValue Value = ST->getValue();
491 if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
492
493 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
494 MVT::v2i32, Value);
495 SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
496 ST->getBasePtr(), ST->getMemOperand());
497
498 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
499
500 if (NewValue.getOpcode() == ISD::BITCAST) {
501 Select(NewStore.getNode());
502 return SelectCode(NewValue.getNode());
503 }
504
505 // getNode() may fold the bitcast if its input was another bitcast. If that
506 // happens we should only select the new store.
507 N = NewStore.getNode();
508 }
509
510 N = glueCopyToM0(N);
511 break;
512 }
513
514 case AMDGPUISD::REGISTER_LOAD: {
515 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
516 break;
517 SDValue Addr, Offset;
518
519 SDLoc DL(N);
520 SelectADDRIndirect(N->getOperand(1), Addr, Offset);
521 const SDValue Ops[] = {
522 Addr,
523 Offset,
524 CurDAG->getTargetConstant(0, DL, MVT::i32),
525 N->getOperand(0),
526 };
527 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
528 CurDAG->getVTList(MVT::i32, MVT::i64,
529 MVT::Other),
530 Ops);
531 }
532 case AMDGPUISD::REGISTER_STORE: {
533 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
534 break;
535 SDValue Addr, Offset;
536 SelectADDRIndirect(N->getOperand(2), Addr, Offset);
537 SDLoc DL(N);
538 const SDValue Ops[] = {
539 N->getOperand(1),
540 Addr,
541 Offset,
542 CurDAG->getTargetConstant(0, DL, MVT::i32),
543 N->getOperand(0),
544 };
545 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
546 CurDAG->getVTList(MVT::Other),
547 Ops);
548 }
549
550 case AMDGPUISD::BFE_I32:
551 case AMDGPUISD::BFE_U32: {
552 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
553 break;
554
555 // There is a scalar version available, but unlike the vector version which
556 // has a separate operand for the offset and width, the scalar version packs
557 // the width and offset into a single operand. Try to move to the scalar
558 // version if the offsets are constant, so that we can try to keep extended
559 // loads of kernel arguments in SGPRs.
560
561 // TODO: Technically we could try to pattern match scalar bitshifts of
562 // dynamic values, but it's probably not useful.
563 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
564 if (!Offset)
565 break;
566
567 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
568 if (!Width)
569 break;
570
571 bool Signed = Opc == AMDGPUISD::BFE_I32;
572
573 uint32_t OffsetVal = Offset->getZExtValue();
574 uint32_t WidthVal = Width->getZExtValue();
575
576 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
577 N->getOperand(0), OffsetVal, WidthVal);
578
579 }
580 case AMDGPUISD::DIV_SCALE: {
581 return SelectDIV_SCALE(N);
582 }
583 case ISD::CopyToReg: {
584 const SITargetLowering& Lowering =
585 *static_cast<const SITargetLowering*>(getTargetLowering());
586 Lowering.legalizeTargetIndependentNode(N, *CurDAG);
587 break;
588 }
589 case ISD::ADDRSPACECAST:
590 return SelectAddrSpaceCast(N);
591 case ISD::AND:
592 case ISD::SRL:
593 case ISD::SRA:
594 if (N->getValueType(0) != MVT::i32 ||
595 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
596 break;
597
598 return SelectS_BFE(N);
599 }
600
601 return SelectCode(N);
602 }
603
604
checkType(const Value * Ptr,unsigned AS)605 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
606 assert(AS != 0 && "Use checkPrivateAddress instead.");
607 if (!Ptr)
608 return false;
609
610 return Ptr->getType()->getPointerAddressSpace() == AS;
611 }
612
checkPrivateAddress(const MachineMemOperand * Op)613 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
614 if (Op->getPseudoValue())
615 return true;
616
617 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
618 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
619
620 return false;
621 }
622
isGlobalStore(const StoreSDNode * N)623 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
624 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
625 }
626
isPrivateStore(const StoreSDNode * N)627 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
628 const Value *MemVal = N->getMemOperand()->getValue();
629 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
630 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
631 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
632 }
633
isLocalStore(const StoreSDNode * N)634 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
635 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
636 }
637
isFlatStore(const StoreSDNode * N)638 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
639 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
640 }
641
isRegionStore(const StoreSDNode * N)642 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
643 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
644 }
645
isConstantLoad(const LoadSDNode * N,int CbId) const646 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
647 const Value *MemVal = N->getMemOperand()->getValue();
648 if (CbId == -1)
649 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
650
651 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
652 }
653
isGlobalLoad(const LoadSDNode * N) const654 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
655 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
656 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
657 N->getMemoryVT().bitsLT(MVT::i32))
658 return true;
659
660 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
661 }
662
isParamLoad(const LoadSDNode * N) const663 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
664 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
665 }
666
isLocalLoad(const LoadSDNode * N) const667 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
668 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
669 }
670
isFlatLoad(const LoadSDNode * N) const671 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
672 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
673 }
674
isRegionLoad(const LoadSDNode * N) const675 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
676 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
677 }
678
isCPLoad(const LoadSDNode * N) const679 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
680 MachineMemOperand *MMO = N->getMemOperand();
681 if (checkPrivateAddress(N->getMemOperand())) {
682 if (MMO) {
683 const PseudoSourceValue *PSV = MMO->getPseudoValue();
684 if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
685 return true;
686 }
687 }
688 }
689 return false;
690 }
691
isPrivateLoad(const LoadSDNode * N) const692 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
693 if (checkPrivateAddress(N->getMemOperand())) {
694 // Check to make sure we are not a constant pool load or a constant load
695 // that is marked as a private load
696 if (isCPLoad(N) || isConstantLoad(N, -1)) {
697 return false;
698 }
699 }
700
701 const Value *MemVal = N->getMemOperand()->getValue();
702 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
703 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
704 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
705 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
706 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
707 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
708 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
709 return true;
710 }
711 return false;
712 }
713
getPassName() const714 const char *AMDGPUDAGToDAGISel::getPassName() const {
715 return "AMDGPU DAG->DAG Pattern Instruction Selection";
716 }
717
718 #ifdef DEBUGTMP
719 #undef INT64_C
720 #endif
721 #undef DEBUGTMP
722
723 //===----------------------------------------------------------------------===//
724 // Complex Patterns
725 //===----------------------------------------------------------------------===//
726
SelectGlobalValueConstantOffset(SDValue Addr,SDValue & IntPtr)727 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
728 SDValue& IntPtr) {
729 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
730 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
731 true);
732 return true;
733 }
734 return false;
735 }
736
SelectGlobalValueVariableOffset(SDValue Addr,SDValue & BaseReg,SDValue & Offset)737 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
738 SDValue& BaseReg, SDValue &Offset) {
739 if (!isa<ConstantSDNode>(Addr)) {
740 BaseReg = Addr;
741 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
742 return true;
743 }
744 return false;
745 }
746
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)747 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
748 SDValue &Offset) {
749 ConstantSDNode *IMMOffset;
750
751 if (Addr.getOpcode() == ISD::ADD
752 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
753 && isInt<16>(IMMOffset->getZExtValue())) {
754
755 Base = Addr.getOperand(0);
756 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
757 MVT::i32);
758 return true;
759 // If the pointer address is constant, we can move it to the offset field.
760 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
761 && isInt<16>(IMMOffset->getZExtValue())) {
762 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
763 SDLoc(CurDAG->getEntryNode()),
764 AMDGPU::ZERO, MVT::i32);
765 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
766 MVT::i32);
767 return true;
768 }
769
770 // Default case, no offset
771 Base = Addr;
772 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
773 return true;
774 }
775
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)776 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
777 SDValue &Offset) {
778 ConstantSDNode *C;
779 SDLoc DL(Addr);
780
781 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
782 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
783 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
784 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
785 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
786 Base = Addr.getOperand(0);
787 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
788 } else {
789 Base = Addr;
790 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
791 }
792
793 return true;
794 }
795
SelectADD_SUB_I64(SDNode * N)796 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
797 SDLoc DL(N);
798 SDValue LHS = N->getOperand(0);
799 SDValue RHS = N->getOperand(1);
800
801 bool IsAdd = (N->getOpcode() == ISD::ADD);
802
803 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
804 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
805
806 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
807 DL, MVT::i32, LHS, Sub0);
808 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
809 DL, MVT::i32, LHS, Sub1);
810
811 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
812 DL, MVT::i32, RHS, Sub0);
813 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
814 DL, MVT::i32, RHS, Sub1);
815
816 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
817 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
818
819
820 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
821 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
822
823 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
824 SDValue Carry(AddLo, 1);
825 SDNode *AddHi
826 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
827 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
828
829 SDValue Args[5] = {
830 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
831 SDValue(AddLo,0),
832 Sub0,
833 SDValue(AddHi,0),
834 Sub1,
835 };
836 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
837 }
838
839 // We need to handle this here because tablegen doesn't support matching
840 // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)841 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
842 SDLoc SL(N);
843 EVT VT = N->getValueType(0);
844
845 assert(VT == MVT::f32 || VT == MVT::f64);
846
847 unsigned Opc
848 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
849
850 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
851 SDValue Ops[8];
852
853 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
854 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
855 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
856 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
857 }
858
isDSOffsetLegal(const SDValue & Base,unsigned Offset,unsigned OffsetBits) const859 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
860 unsigned OffsetBits) const {
861 if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
862 (OffsetBits == 8 && !isUInt<8>(Offset)))
863 return false;
864
865 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
866 Subtarget->unsafeDSOffsetFoldingEnabled())
867 return true;
868
869 // On Southern Islands instruction with a negative base value and an offset
870 // don't seem to work.
871 return CurDAG->SignBitIsZero(Base);
872 }
873
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const874 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
875 SDValue &Offset) const {
876 if (CurDAG->isBaseWithConstantOffset(Addr)) {
877 SDValue N0 = Addr.getOperand(0);
878 SDValue N1 = Addr.getOperand(1);
879 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
880 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
881 // (add n0, c0)
882 Base = N0;
883 Offset = N1;
884 return true;
885 }
886 }
887
888 SDLoc DL(Addr);
889
890 // If we have a constant address, prefer to put the constant into the
891 // offset. This can save moves to load the constant address since multiple
892 // operations can share the zero base address register, and enables merging
893 // into read2 / write2 instructions.
894 if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
895 if (isUInt<16>(CAddr->getZExtValue())) {
896 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
897 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
898 DL, MVT::i32, Zero);
899 Base = SDValue(MovZero, 0);
900 Offset = Addr;
901 return true;
902 }
903 }
904
905 // default case
906 Base = Addr;
907 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
908 return true;
909 }
910
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const911 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
912 SDValue &Offset0,
913 SDValue &Offset1) const {
914 SDLoc DL(Addr);
915
916 if (CurDAG->isBaseWithConstantOffset(Addr)) {
917 SDValue N0 = Addr.getOperand(0);
918 SDValue N1 = Addr.getOperand(1);
919 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
920 unsigned DWordOffset0 = C1->getZExtValue() / 4;
921 unsigned DWordOffset1 = DWordOffset0 + 1;
922 // (add n0, c0)
923 if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
924 Base = N0;
925 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
926 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
927 return true;
928 }
929 }
930
931 if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
932 unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
933 unsigned DWordOffset1 = DWordOffset0 + 1;
934 assert(4 * DWordOffset0 == CAddr->getZExtValue());
935
936 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
937 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
938 MachineSDNode *MovZero
939 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
940 DL, MVT::i32, Zero);
941 Base = SDValue(MovZero, 0);
942 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
943 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
944 return true;
945 }
946 }
947
948 // default case
949 Base = Addr;
950 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
951 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
952 return true;
953 }
954
isLegalMUBUFImmOffset(const ConstantSDNode * Imm)955 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
956 return isUInt<12>(Imm->getZExtValue());
957 }
958
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64,SDValue & GLC,SDValue & SLC,SDValue & TFE) const959 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
960 SDValue &VAddr, SDValue &SOffset,
961 SDValue &Offset, SDValue &Offen,
962 SDValue &Idxen, SDValue &Addr64,
963 SDValue &GLC, SDValue &SLC,
964 SDValue &TFE) const {
965 SDLoc DL(Addr);
966
967 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
968 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
969 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
970
971 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
972 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
973 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
974 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
975
976 if (CurDAG->isBaseWithConstantOffset(Addr)) {
977 SDValue N0 = Addr.getOperand(0);
978 SDValue N1 = Addr.getOperand(1);
979 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
980
981 if (N0.getOpcode() == ISD::ADD) {
982 // (add (add N2, N3), C1) -> addr64
983 SDValue N2 = N0.getOperand(0);
984 SDValue N3 = N0.getOperand(1);
985 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
986 Ptr = N2;
987 VAddr = N3;
988 } else {
989
990 // (add N0, C1) -> offset
991 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
992 Ptr = N0;
993 }
994
995 if (isLegalMUBUFImmOffset(C1)) {
996 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
997 return;
998 } else if (isUInt<32>(C1->getZExtValue())) {
999 // Illegal offset, store it in soffset.
1000 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1001 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1002 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1003 0);
1004 return;
1005 }
1006 }
1007
1008 if (Addr.getOpcode() == ISD::ADD) {
1009 // (add N0, N1) -> addr64
1010 SDValue N0 = Addr.getOperand(0);
1011 SDValue N1 = Addr.getOperand(1);
1012 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1013 Ptr = N0;
1014 VAddr = N1;
1015 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1016 return;
1017 }
1018
1019 // default case -> offset
1020 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1021 Ptr = Addr;
1022 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1023
1024 }
1025
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & GLC,SDValue & SLC,SDValue & TFE) const1026 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1027 SDValue &VAddr, SDValue &SOffset,
1028 SDValue &Offset, SDValue &GLC,
1029 SDValue &SLC, SDValue &TFE) const {
1030 SDValue Ptr, Offen, Idxen, Addr64;
1031
1032 // addr64 bit was removed for volcanic islands.
1033 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1034 return false;
1035
1036 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1037 GLC, SLC, TFE);
1038
1039 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1040 if (C->getSExtValue()) {
1041 SDLoc DL(Addr);
1042
1043 const SITargetLowering& Lowering =
1044 *static_cast<const SITargetLowering*>(getTargetLowering());
1045
1046 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1047 return true;
1048 }
1049
1050 return false;
1051 }
1052
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & SLC) const1053 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1054 SDValue &VAddr, SDValue &SOffset,
1055 SDValue &Offset,
1056 SDValue &SLC) const {
1057 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1058 SDValue GLC, TFE;
1059
1060 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1061 }
1062
SelectMUBUFScratch(SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const1063 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1064 SDValue &VAddr, SDValue &SOffset,
1065 SDValue &ImmOffset) const {
1066
1067 SDLoc DL(Addr);
1068 MachineFunction &MF = CurDAG->getMachineFunction();
1069 const SIRegisterInfo *TRI =
1070 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1071 MachineRegisterInfo &MRI = MF.getRegInfo();
1072 const SITargetLowering& Lowering =
1073 *static_cast<const SITargetLowering*>(getTargetLowering());
1074
1075 unsigned ScratchOffsetReg =
1076 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
1077 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
1078 ScratchOffsetReg, MVT::i32);
1079 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
1080 SDValue ScratchRsrcDword0 =
1081 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
1082
1083 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
1084 SDValue ScratchRsrcDword1 =
1085 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
1086
1087 const SDValue RsrcOps[] = {
1088 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1089 ScratchRsrcDword0,
1090 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
1091 ScratchRsrcDword1,
1092 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
1093 };
1094 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1095 MVT::v2i32, RsrcOps), 0);
1096 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
1097 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
1098 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
1099
1100 // (add n0, c1)
1101 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1102 SDValue N0 = Addr.getOperand(0);
1103 SDValue N1 = Addr.getOperand(1);
1104 // Offsets in vaddr must be positive.
1105 if (CurDAG->SignBitIsZero(N0)) {
1106 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1107 if (isLegalMUBUFImmOffset(C1)) {
1108 VAddr = N0;
1109 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1110 return true;
1111 }
1112 }
1113 }
1114
1115 // (node)
1116 VAddr = Addr;
1117 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1118 return true;
1119 }
1120
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset,SDValue & GLC,SDValue & SLC,SDValue & TFE) const1121 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1122 SDValue &SOffset, SDValue &Offset,
1123 SDValue &GLC, SDValue &SLC,
1124 SDValue &TFE) const {
1125 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1126 const SIInstrInfo *TII =
1127 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1128
1129 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1130 GLC, SLC, TFE);
1131
1132 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1133 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1134 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1135 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1136 APInt::getAllOnesValue(32).getZExtValue(); // Size
1137 SDLoc DL(Addr);
1138
1139 const SITargetLowering& Lowering =
1140 *static_cast<const SITargetLowering*>(getTargetLowering());
1141
1142 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1143 return true;
1144 }
1145 return false;
1146 }
1147
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & Soffset,SDValue & Offset,SDValue & GLC) const1148 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1149 SDValue &Soffset, SDValue &Offset,
1150 SDValue &GLC) const {
1151 SDValue SLC, TFE;
1152
1153 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1154 }
1155
1156 // FIXME: This is incorrect and only enough to be able to compile.
SelectAddrSpaceCast(SDNode * N)1157 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
1158 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
1159 SDLoc DL(N);
1160
1161 assert(Subtarget->hasFlatAddressSpace() &&
1162 "addrspacecast only supported with flat address space!");
1163
1164 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
1165 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
1166 "Cannot cast address space to / from constant address!");
1167
1168 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
1169 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
1170 "Can only cast to / from flat address space!");
1171
1172 // The flat instructions read the address as the index of the VGPR holding the
1173 // address, so casting should just be reinterpreting the base VGPR, so just
1174 // insert trunc / bitcast / zext.
1175
1176 SDValue Src = ASC->getOperand(0);
1177 EVT DestVT = ASC->getValueType(0);
1178 EVT SrcVT = Src.getValueType();
1179
1180 unsigned SrcSize = SrcVT.getSizeInBits();
1181 unsigned DestSize = DestVT.getSizeInBits();
1182
1183 if (SrcSize > DestSize) {
1184 assert(SrcSize == 64 && DestSize == 32);
1185 return CurDAG->getMachineNode(
1186 TargetOpcode::EXTRACT_SUBREG,
1187 DL,
1188 DestVT,
1189 Src,
1190 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
1191 }
1192
1193
1194 if (DestSize > SrcSize) {
1195 assert(SrcSize == 32 && DestSize == 64);
1196
1197 // FIXME: This is probably wrong, we should never be defining
1198 // a register class with both VGPRs and SGPRs
1199 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
1200 MVT::i32);
1201
1202 const SDValue Ops[] = {
1203 RC,
1204 Src,
1205 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
1206 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1207 CurDAG->getConstant(0, DL, MVT::i32)), 0),
1208 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
1209 };
1210
1211 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
1212 DL, N->getValueType(0), Ops);
1213 }
1214
1215 assert(SrcSize == 64 && DestSize == 64);
1216 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
1217 }
1218
getS_BFE(unsigned Opcode,SDLoc DL,SDValue Val,uint32_t Offset,uint32_t Width)1219 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1220 uint32_t Offset, uint32_t Width) {
1221 // Transformation function, pack the offset and width of a BFE into
1222 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1223 // source, bits [5:0] contain the offset and bits [22:16] the width.
1224 uint32_t PackedVal = Offset | (Width << 16);
1225 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1226
1227 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1228 }
1229
SelectS_BFEFromShifts(SDNode * N)1230 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1231 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1232 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1233 // Predicate: 0 < b <= c < 32
1234
1235 const SDValue &Shl = N->getOperand(0);
1236 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1237 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1238
1239 if (B && C) {
1240 uint32_t BVal = B->getZExtValue();
1241 uint32_t CVal = C->getZExtValue();
1242
1243 if (0 < BVal && BVal <= CVal && CVal < 32) {
1244 bool Signed = N->getOpcode() == ISD::SRA;
1245 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1246
1247 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
1248 CVal - BVal, 32 - CVal);
1249 }
1250 }
1251 return SelectCode(N);
1252 }
1253
SelectS_BFE(SDNode * N)1254 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1255 switch (N->getOpcode()) {
1256 case ISD::AND:
1257 if (N->getOperand(0).getOpcode() == ISD::SRL) {
1258 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1259 // Predicate: isMask(mask)
1260 const SDValue &Srl = N->getOperand(0);
1261 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1262 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1263
1264 if (Shift && Mask) {
1265 uint32_t ShiftVal = Shift->getZExtValue();
1266 uint32_t MaskVal = Mask->getZExtValue();
1267
1268 if (isMask_32(MaskVal)) {
1269 uint32_t WidthVal = countPopulation(MaskVal);
1270
1271 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
1272 ShiftVal, WidthVal);
1273 }
1274 }
1275 }
1276 break;
1277 case ISD::SRL:
1278 if (N->getOperand(0).getOpcode() == ISD::AND) {
1279 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1280 // Predicate: isMask(mask >> b)
1281 const SDValue &And = N->getOperand(0);
1282 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1283 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1284
1285 if (Shift && Mask) {
1286 uint32_t ShiftVal = Shift->getZExtValue();
1287 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1288
1289 if (isMask_32(MaskVal)) {
1290 uint32_t WidthVal = countPopulation(MaskVal);
1291
1292 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
1293 ShiftVal, WidthVal);
1294 }
1295 }
1296 } else if (N->getOperand(0).getOpcode() == ISD::SHL)
1297 return SelectS_BFEFromShifts(N);
1298 break;
1299 case ISD::SRA:
1300 if (N->getOperand(0).getOpcode() == ISD::SHL)
1301 return SelectS_BFEFromShifts(N);
1302 break;
1303 }
1304
1305 return SelectCode(N);
1306 }
1307
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const1308 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1309 SDValue &SrcMods) const {
1310
1311 unsigned Mods = 0;
1312
1313 Src = In;
1314
1315 if (Src.getOpcode() == ISD::FNEG) {
1316 Mods |= SISrcMods::NEG;
1317 Src = Src.getOperand(0);
1318 }
1319
1320 if (Src.getOpcode() == ISD::FABS) {
1321 Mods |= SISrcMods::ABS;
1322 Src = Src.getOperand(0);
1323 }
1324
1325 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1326
1327 return true;
1328 }
1329
SelectVOP3NoMods(SDValue In,SDValue & Src,SDValue & SrcMods) const1330 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1331 SDValue &SrcMods) const {
1332 bool Res = SelectVOP3Mods(In, Src, SrcMods);
1333 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1334 }
1335
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const1336 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1337 SDValue &SrcMods, SDValue &Clamp,
1338 SDValue &Omod) const {
1339 SDLoc DL(In);
1340 // FIXME: Handle Clamp and Omod
1341 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1342 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1343
1344 return SelectVOP3Mods(In, Src, SrcMods);
1345 }
1346
SelectVOP3NoMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const1347 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1348 SDValue &SrcMods, SDValue &Clamp,
1349 SDValue &Omod) const {
1350 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1351
1352 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1353 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1354 cast<ConstantSDNode>(Omod)->isNullValue();
1355 }
1356
SelectVOP3Mods0Clamp(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Omod) const1357 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1358 SDValue &SrcMods,
1359 SDValue &Omod) const {
1360 // FIXME: Handle Omod
1361 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1362
1363 return SelectVOP3Mods(In, Src, SrcMods);
1364 }
1365
SelectVOP3Mods0Clamp0OMod(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const1366 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1367 SDValue &SrcMods,
1368 SDValue &Clamp,
1369 SDValue &Omod) const {
1370 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1371 return SelectVOP3Mods(In, Src, SrcMods);
1372 }
1373
PostprocessISelDAG()1374 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1375 const AMDGPUTargetLowering& Lowering =
1376 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1377 bool IsModified = false;
1378 do {
1379 IsModified = false;
1380 // Go over all selected nodes and try to fold them a bit more
1381 for (SDNode &Node : CurDAG->allnodes()) {
1382 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1383 if (!MachineNode)
1384 continue;
1385
1386 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1387 if (ResNode != &Node) {
1388 ReplaceUses(&Node, ResNode);
1389 IsModified = true;
1390 }
1391 }
1392 CurDAG->RemoveDeadNodes();
1393 } while (IsModified);
1394 }
1395