diff --git lib/Target/AMDGPU/AMDGPU.td lib/Target/AMDGPU/AMDGPU.td index 2030ee0..40f4741 100644 --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -32,6 +32,7 @@ def AMDGPU : Target { // Include AMDGPU TD files include "R600Schedule.td" +include "SISchedule.td" include "Processors.td" include "AMDGPUInstrInfo.td" include "AMDGPUIntrinsics.td" diff --git lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index c8b080f..4978113 100644 --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,6 +17,8 @@ #include "AMDGPUAsmPrinter.h" #include "AMDGPU.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/TargetRegistry.h" @@ -42,6 +44,91 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } SetupMachineFunction(MF); OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); + if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + EmitProgramInfo(MF); + } EmitFunctionBody(); return false; } + +void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { + unsigned MaxSGPR = 0; + unsigned MaxVGPR = 0; + bool VCCUsed = false; + const SIRegisterInfo * RI = + static_cast(TM.getRegisterInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + + unsigned numOperands = MI.getNumOperands(); + for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { + MachineOperand & MO = MI.getOperand(op_idx); + unsigned maxUsed; + unsigned width = 0; + bool isSGPR = false; + unsigned reg; + unsigned hwReg; + if (!MO.isReg()) { + continue; + } + reg = MO.getReg(); + if (reg == AMDGPU::VCC) { + VCCUsed = true; + continue; + } + switch (reg) { + default: break; + case AMDGPU::EXEC: + case AMDGPU::SI_LITERAL_CONSTANT: + case AMDGPU::SREG_LIT_0: + case AMDGPU::M0: + continue; + } + + if (AMDGPU::SReg_32RegClass.contains(reg)) { + isSGPR = true; + width = 1; + } else if (AMDGPU::VReg_32RegClass.contains(reg)) { + isSGPR = false; + width = 1; + } else if (AMDGPU::SReg_64RegClass.contains(reg)) { + isSGPR = true; + width = 2; + } else if (AMDGPU::VReg_64RegClass.contains(reg)) { + isSGPR = false; + width = 2; + } else if (AMDGPU::SReg_128RegClass.contains(reg)) { + isSGPR = true; + width = 4; + } else if (AMDGPU::VReg_128RegClass.contains(reg)) { + isSGPR = false; + width = 4; + } else if (AMDGPU::SReg_256RegClass.contains(reg)) { + isSGPR = true; + width = 8; + } else { + assert(!"Unknown register class"); + } + hwReg = RI->getEncodingValue(reg); + maxUsed = hwReg + width - 1; + if (isSGPR) { + MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; + } else { + MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; + } + } + } + } + if (VCCUsed) { + MaxSGPR += 2; + } + SIMachineFunctionInfo * MFI = MF.getInfo(); + OutStreamer.EmitIntValue(MaxSGPR + 1, 4); + OutStreamer.EmitIntValue(MaxVGPR + 1, 4); + OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4); +} diff --git lib/Target/AMDGPU/AMDGPUInstructions.td lib/Target/AMDGPU/AMDGPUInstructions.td index c0249e2..443c6d4 100644 --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -180,3 +180,6 @@ class BitConvert : Pat < >; include "R600Instructions.td" + +include "SIInstrInfo.td" + diff --git lib/Target/AMDGPU/AMDGPUIntrinsics.td lib/Target/AMDGPU/AMDGPUIntrinsics.td index 680065a..1f2428a 100644 --- lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -59,3 +59,4 @@ let TargetPrefix = "TGSI", isTarget = 1 in { def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>; } +include "SIIntrinsics.td" diff --git lib/Target/AMDGPU/AMDGPURegisterInfo.td lib/Target/AMDGPU/AMDGPURegisterInfo.td index d9c72ed..8181e02 100644 --- lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -19,3 +19,4 @@ let Namespace = "AMDGPU" in { } include "R600RegisterInfo.td" +include "SIRegisterInfo.td" diff --git lib/Target/AMDGPU/AMDGPUTargetMachine.cpp lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0fd5f30..dd4b733 100644 --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -16,6 +16,8 @@ #include "AMDGPU.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -57,6 +59,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { InstrInfo = new R600InstrInfo(*this); TLInfo = new R600TargetLowering(*this); + } else { + InstrInfo = new SIInstrInfo(*this); + TLInfo = new SITargetLowering(*this); } } @@ -102,6 +107,9 @@ bool AMDGPUPassConfig::addInstSelector() { bool AMDGPUPassConfig::addPreRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget(); + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + addPass(createSIAssignInterpRegsPass(*TM)); + } addPass(createAMDGPUConvertToISAPass(*TM)); return false; } @@ -124,6 +132,9 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); + } else { + addPass(createSILowerLiteralConstantsPass(*TM)); + addPass(createSILowerFlowControlPass(*TM)); } return false; diff --git lib/Target/AMDGPU/AMDILCFGStructurizer.cpp lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 5ff115e..01a5d89 100644 --- lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -2718,6 +2718,7 @@ struct CFGStructTraits switch(oldOpcode) { case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32; ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); + case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; default: assert(0 && "internal error"); }; @@ -2728,6 +2729,7 @@ struct CFGStructTraits switch(oldOpcode) { case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32; ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); + case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; default: assert(0 && "internal error"); }; @@ -2781,6 +2783,8 @@ struct CFGStructTraits case AMDGPU::JUMP: return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); + case AMDGPU::SI_IF_NZ: + case AMDGPU::SI_IF_Z: break; default: return false; diff --git lib/Target/AMDGPU/CMakeLists.txt lib/Target/AMDGPU/CMakeLists.txt index bd53757..6bb7ba0 100644 --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -37,6 +37,13 @@ add_llvm_target(AMDGPUCodeGen R600ISelLowering.cpp R600MachineFunctionInfo.cpp R600RegisterInfo.cpp + SIAssignInterpRegs.cpp + SIInstrInfo.cpp + SIISelLowering.cpp + SILowerLiteralConstants.cpp + SILowerFlowControl.cpp + SIMachineFunctionInfo.cpp + SIRegisterInfo.cpp ) add_dependencies(LLVMAMDGPUCodeGen intrinsics_gen) diff --git lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 0d472dc..87f6372 100644 --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -74,7 +74,11 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { - return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); + if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { + return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); + } else { + return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); + } } static MCStreamer *createMCStreamer(const Target &T, StringRef TT, diff --git lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 7cb4f97..9d921df 100644 --- lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMAMDGPUDesc AMDGPUMCTargetDesc.cpp AMDGPUMCAsmInfo.cpp R600MCCodeEmitter.cpp + SIMCCodeEmitter.cpp ) add_dependencies(LLVMAMDGPUDesc AMDGPUCommonTableGen) diff --git lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp new file mode 100644 index 0000000..f907a53 --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -0,0 +1,291 @@ +//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The SI code emitter produces machine code that can be executed directly on +// the GPU device. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" + +#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) +#define SI_INSTR_FLAGS_ENCODING_MASK 0xf + +// These must be kept in sync with SIInstructions.td and also the +// InstrEncodingInfo array in SIInstrInfo.cpp. +// +// NOTE: This enum is only used to identify the encoding type within LLVM, +// the actual encoding type that is part of the instruction format is different +namespace SIInstrEncodingType { + enum Encoding { + EXP = 0, + LDS = 1, + MIMG = 2, + MTBUF = 3, + MUBUF = 4, + SMRD = 5, + SOP1 = 6, + SOP2 = 7, + SOPC = 8, + SOPK = 9, + SOPP = 10, + VINTRP = 11, + VOP1 = 12, + VOP2 = 13, + VOP3 = 14, + VOPC = 15 + }; +} + +using namespace llvm; + +namespace { +class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { + SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + const MCSubtargetInfo &STI; + MCContext &Ctx; + +public: + SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + const MCSubtargetInfo &sti, MCContext &ctx) + : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + + ~SIMCCodeEmitter() { } + + /// EncodeInstruction - Encode the instruction and write it to the OS. + virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const; + + /// getMachineOpValue - Reutrn the encoding for an MCOperand. + virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups) const; + +public: + + /// GPRAlign - Encode a sequence of registers with the correct alignment. + unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; + + /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used + virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixup) const; + + /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used + virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixup) const; + + /// SMRDmemriEncode - Encoding for SMRD indexed loads + virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixup) const; + + /// VOPPostEncode - Post-Encoder method for VOP instructions + virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; + +private: + + ///getEncodingType = Return this SIInstrEncodingType for this instruction. + unsigned getEncodingType(const MCInst &MI) const; + + ///getEncodingBytes - Get then size in bytes of this instructions encoding. + unsigned getEncodingBytes(const MCInst &MI) const; + + /// getRegBinaryCode - Returns the hardware encoding for a register + unsigned getRegBinaryCode(unsigned reg) const; + + /// getHWRegNum - Generated function that returns the hardware encoding for + /// a register + unsigned getHWRegNum(unsigned reg) const; + +}; + +} // End anonymous namespace + +MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SIMCCodeEmitter(MCII, MRI, STI, Ctx); +} + +void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const { + uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); + unsigned bytes = getEncodingBytes(MI); + for (unsigned i = 0; i < bytes; i++) { + OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); + } +} + +uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl &Fixups) const { + if (MO.isReg()) { + return getRegBinaryCode(MO.getReg()); + } else if (MO.isImm()) { + return MO.getImm(); + } else if (MO.isFPImm()) { + // XXX: Not all instructions can use inline literals + // XXX: We should make sure this is a 32-bit constant + union { + float F; + uint32_t I; + } Imm; + Imm.F = MO.getFPImm(); + return Imm.I; + } else{ + llvm_unreachable("Encoding of this operand type is not supported yet."); + } + return 0; +} + +//===----------------------------------------------------------------------===// +// Custom Operand Encodings +//===----------------------------------------------------------------------===// + +unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, + unsigned shift) const { + unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); + return regCode >> shift; + return 0; +} +unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, + unsigned OpNo , + SmallVectorImpl &Fixup) const { + return GPRAlign(MI, OpNo, 1); +} + +unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, + unsigned OpNo, + SmallVectorImpl &Fixup) const { + return GPRAlign(MI, OpNo, 2); +} + +#define SMRD_OFFSET_MASK 0xff +#define SMRD_IMM_SHIFT 8 +#define SMRD_SBASE_MASK 0x3f +#define SMRD_SBASE_SHIFT 9 +/// SMRDmemriEncode - This function is responsibe for encoding the offset +/// and the base ptr for SMRD instructions it should return a bit string in +/// this format: +/// +/// OFFSET = bits{7-0} +/// IMM = bits{8} +/// SBASE = bits{14-9} +/// +uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixup) const { + uint32_t Encoding; + + const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); + + //XXX: Use this function for SMRD loads with register offsets + assert(OffsetOp.isImm()); + + Encoding = + (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) + | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit + | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) + ; + + return Encoding; +} + +//===----------------------------------------------------------------------===// +// Post Encoder Callbacks +//===----------------------------------------------------------------------===// + +uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ + unsigned encodingType = getEncodingType(MI); + unsigned numSrcOps; + unsigned vgprBitOffset; + + if (encodingType == SIInstrEncodingType::VOP3) { + numSrcOps = 3; + vgprBitOffset = 32; + } else { + numSrcOps = 1; + vgprBitOffset = 0; + } + + // Add one to skip over the destination reg operand. + for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { + const MCOperand &MO = MI.getOperand(opIdx); + if (MO.isReg()) { + unsigned reg = MI.getOperand(opIdx).getReg(); + if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || + AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { + Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; + } + } else if (MO.isFPImm()) { + union { + float f; + uint32_t i; + } Imm; + // XXX: Not all instructions can use inline literals + // XXX: We should make sure this is a 32-bit constant + Imm.f = MO.getFPImm(); + Value |= ((uint64_t)Imm.i) << 32; + } + } + return Value; +} + +//===----------------------------------------------------------------------===// +// Encoding helper functions +//===----------------------------------------------------------------------===// + +unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { + return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; +} + +unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { + + // These instructions aren't real instructions with an encoding type, so + // we need to manually specify their size. + switch (MI.getOpcode()) { + default: break; + case AMDGPU::SI_LOAD_LITERAL_I32: + case AMDGPU::SI_LOAD_LITERAL_F32: + return 4; + } + + unsigned encoding_type = getEncodingType(MI); + switch (encoding_type) { + case SIInstrEncodingType::EXP: + case SIInstrEncodingType::LDS: + case SIInstrEncodingType::MUBUF: + case SIInstrEncodingType::MTBUF: + case SIInstrEncodingType::MIMG: + case SIInstrEncodingType::VOP3: + return 8; + default: + return 4; + } +} + + +unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { + switch (reg) { + case AMDGPU::M0: return 124; + case AMDGPU::SREG_LIT_0: return 128; + case AMDGPU::SI_LITERAL_CONSTANT: return 255; + default: return MRI.getEncodingValue(reg); + } +} + diff --git lib/Target/AMDGPU/Processors.td lib/Target/AMDGPU/Processors.td index 55816c6..3dc1ecd 100644 --- lib/Target/AMDGPU/Processors.td +++ lib/Target/AMDGPU/Processors.td @@ -25,4 +25,5 @@ def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; +def : Proc<"SI", SI_Itin, [Feature64BitPtr]>; diff --git lib/Target/AMDGPU/SIAssignInterpRegs.cpp lib/Target/AMDGPU/SIAssignInterpRegs.cpp new file mode 100644 index 0000000..1fc0a87 --- /dev/null +++ lib/Target/AMDGPU/SIAssignInterpRegs.cpp @@ -0,0 +1,151 @@ +//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass maps the pseudo interpolation registers to the correct physical +// registers. Prior to executing a fragment shader, the GPU loads interpolation +// parameters into physical registers. The specific physical register that each +// interpolation parameter ends up in depends on the type of the interpolation +// parameter as well as how many interpolation parameters are used by the +// shader. +// +//===----------------------------------------------------------------------===// + + + +#include "AMDGPU.h" +#include "AMDIL.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SIAssignInterpRegsPass : public MachineFunctionPass { + +private: + static char ID; + TargetMachine &TM; + + void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, + unsigned physReg, unsigned virtReg); + +public: + SIAssignInterpRegsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "SI Assign intrpolation registers"; } +}; + +} // End anonymous namespace + +char SIAssignInterpRegsPass::ID = 0; + +#define INTERP_VALUES 16 +#define REQUIRED_VALUE_MAX_INDEX 7 + +struct InterpInfo { + bool Enabled; + unsigned Regs[3]; + unsigned RegCount; +}; + + +FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { + return new SIAssignInterpRegsPass(tm); +} + +bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) +{ + + struct InterpInfo InterpUse[INTERP_VALUES] = { + {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, + {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, + {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, + {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, + {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, + {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, + {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, + {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, + {false, {AMDGPU::POS_X_FLOAT}, 1}, + {false, {AMDGPU::POS_Y_FLOAT}, 1}, + {false, {AMDGPU::POS_Z_FLOAT}, 1}, + {false, {AMDGPU::POS_W_FLOAT}, 1}, + {false, {AMDGPU::FRONT_FACE}, 1}, + {false, {AMDGPU::ANCILLARY}, 1}, + {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, + {false, {AMDGPU::POS_FIXED_PT}, 1} + }; + + SIMachineFunctionInfo * MFI = MF.getInfo(); + // This pass is only needed for pixel shaders. + if (MFI->ShaderType != ShaderType::PIXEL) { + return false; + } + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool ForceEnable = true; + + // First pass, mark the interpolation values that are used. + for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { + for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; + RegIdx++) { + InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled || + !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]); + if (InterpUse[InterpIdx].Enabled && + InterpIdx <= REQUIRED_VALUE_MAX_INDEX) { + ForceEnable = false; + } + } + } + + // At least one interpolation mode must be enabled or else the GPU will hang. + if (ForceEnable) { + InterpUse[0].Enabled = true; + } + + unsigned UsedVgprs = 0; + + // Second pass, replace with VGPRs. + for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { + if (!InterpUse[InterpIdx].Enabled) { + continue; + } + MFI->SPIPSInputAddr |= (1 << InterpIdx); + + for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; + RegIdx++, UsedVgprs++) { + unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs); + unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg); + addLiveIn(&MF, MRI, NewReg, VirtReg); + } + } + + return false; +} + +void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF, + MachineRegisterInfo & MRI, + unsigned physReg, unsigned virtReg) +{ + const TargetInstrInfo * TII = TM.getInstrInfo(); + if (!MRI.isLiveIn(physReg)) { + MRI.addLiveIn(physReg, virtReg); + MF->front().addLiveIn(physReg); + BuildMI(MF->front(), MF->front().begin(), DebugLoc(), + TII->get(TargetOpcode::COPY), virtReg) + .addReg(physReg); + } else { + MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); + } +} diff --git lib/Target/AMDGPU/SIISelLowering.cpp lib/Target/AMDGPU/SIISelLowering.cpp new file mode 100644 index 0000000..45f180f --- /dev/null +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -0,0 +1,449 @@ +//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is +// mostly EmitInstrWithCustomInserter(). +// +//===----------------------------------------------------------------------===// + +#include "SIISelLowering.h" +#include "AMDIL.h" +#include "AMDILIntrinsicInfo.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +SITargetLowering::SITargetLowering(TargetMachine &TM) : + AMDGPUTargetLowering(TM), + TII(static_cast(TM.getInstrInfo())) +{ + addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); + addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); + + addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); + + computeRegisterProperties(); + + setOperationAction(ISD::AND, MVT::i1, Custom); + + setOperationAction(ISD::ADD, MVT::i64, Legal); + setOperationAction(ISD::ADD, MVT::i32, Legal); + + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // We need to custom lower loads from the USER_SGPR address space, so we can + // add the SGPRs as livein registers. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setTargetDAGCombine(ISD::SELECT_CC); + + setTargetDAGCombine(ISD::SETCC); +} + +MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( + MachineInstr * MI, MachineBasicBlock * BB) const +{ + const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); + MachineBasicBlock::iterator I = MI; + + if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { + AppendS_WAITCNT(MI, *BB, llvm::next(I)); + return BB; + } + + switch (MI->getOpcode()) { + default: + return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case AMDGPU::BRANCH: return BB; + case AMDGPU::CLAMP_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + // VSRC1-2 are unused, but we still need to fill all the + // operand slots, so we just reuse the VSRC0 operand + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(0) // ABS + .addImm(1) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG + MI->eraseFromParent(); + break; + + case AMDGPU::FABS_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + // VSRC1-2 are unused, but we still need to fill all the + // operand slots, so we just reuse the VSRC0 operand + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(1) // ABS + .addImm(0) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG + MI->eraseFromParent(); + break; + + case AMDGPU::FNEG_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + // VSRC1-2 are unused, but we still need to fill all the + // operand slots, so we just reuse the VSRC0 operand + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(0) // ABS + .addImm(0) // CLAMP + .addImm(0) // OMOD + .addImm(1); // NEG + MI->eraseFromParent(); + break; + case AMDGPU::SHADER_TYPE: + BB->getParent()->getInfo()->ShaderType = + MI->getOperand(0).getImm(); + MI->eraseFromParent(); + break; + + case AMDGPU::SI_INTERP: + LowerSI_INTERP(MI, *BB, I, MRI); + break; + case AMDGPU::SI_INTERP_CONST: + LowerSI_INTERP_CONST(MI, *BB, I, MRI); + break; + case AMDGPU::SI_KIL: + LowerSI_KIL(MI, *BB, I, MRI); + break; + case AMDGPU::SI_WQM: + LowerSI_WQM(MI, *BB, I, MRI); + break; + case AMDGPU::SI_V_CNDLT: + LowerSI_V_CNDLT(MI, *BB, I, MRI); + break; + } + return BB; +} + +void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const +{ + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); +} + + +void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const +{ + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const +{ + unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); + MachineOperand dst = MI->getOperand(0); + MachineOperand iReg = MI->getOperand(1); + MachineOperand jReg = MI->getOperand(2); + MachineOperand attr_chan = MI->getOperand(3); + MachineOperand attr = MI->getOperand(4); + MachineOperand params = MI->getOperand(5); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) + .addOperand(params); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) + .addOperand(iReg) + .addOperand(attr_chan) + .addOperand(attr) + .addReg(M0); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) + .addOperand(dst) + .addReg(tmp) + .addOperand(jReg) + .addOperand(attr_chan) + .addOperand(attr) + .addReg(M0); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineRegisterInfo &MRI) const +{ + MachineOperand dst = MI->getOperand(0); + MachineOperand attr_chan = MI->getOperand(1); + MachineOperand attr = MI->getOperand(2); + MachineOperand params = MI->getOperand(3); + unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) + .addOperand(params); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) + .addOperand(dst) + .addOperand(attr_chan) + .addOperand(attr) + .addReg(M0); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const +{ + // Clear this pixel from the exec mask if the operand is negative + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), + AMDGPU::VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI->getOperand(0)); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const +{ + unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + + BuildMI(BB, I, BB.findDebugLoc(I), + TII->get(AMDGPU::V_CMP_GT_F32_e32), + VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI->getOperand(1)); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(2)) + .addReg(VCC); + + MI->eraseFromParent(); +} + +EVT SITargetLowering::getSetCCResultType(EVT VT) const +{ + return MVT::i1; +} + +//===----------------------------------------------------------------------===// +// Custom DAG Lowering Operations +//===----------------------------------------------------------------------===// + +SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + switch (Op.getOpcode()) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast(Op.getOperand(0))->getZExtValue(); + EVT VT = Op.getValueType(); + switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_vs_load_buffer_index: + return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, + AMDGPU::VGPR0, VT); + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + } + break; + } + } + return SDValue(); +} + +/// Loweri1ContextSwitch - The function is for lowering i1 operations on the +/// VCC register. In the VALU context, VCC is a one bit register, but in the +/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only +/// the SALU can perform operations on the VCC register, we need to promote +/// the operand types from i1 to i64 in order for tablegen to be able to match +/// this operation to the correct SALU instruction. We do this promotion by +/// wrapping the operands in a CopyToReg node. +/// +SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, + SelectionDAG &DAG, + unsigned VCCNode) const +{ + DebugLoc DL = Op.getDebugLoc(); + + SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, + DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, + Op.getOperand(0)), + DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, + Op.getOperand(1))); + + return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); +} + +SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue CC = Op.getOperand(1); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue JumpT = Op.getOperand(4); + SDValue CmpValue; + SDValue Result; + CmpValue = DAG.getNode( + ISD::SETCC, + Op.getDebugLoc(), + MVT::i1, + LHS, RHS, + CC); + + Result = DAG.getNode( + AMDGPUISD::BRANCH_COND, + CmpValue.getDebugLoc(), + MVT::Other, Chain, + JumpT, CmpValue); + return Result; +} + +SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + LoadSDNode *Ptr = dyn_cast(Op); + + assert(Ptr); + + unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); + + // We only need to lower USER_SGPR address space loads + if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { + return SDValue(); + } + + // Loads from the USER_SGPR address space can only have constant value + // pointers. + ConstantSDNode *BasePtr = dyn_cast(Ptr->getBasePtr()); + assert(BasePtr); + + unsigned TypeDwordWidth = VT.getSizeInBits() / 32; + const TargetRegisterClass * dstClass; + switch (TypeDwordWidth) { + default: + assert(!"USER_SGPR value size not implemented"); + return SDValue(); + case 1: + dstClass = &AMDGPU::SReg_32RegClass; + break; + case 2: + dstClass = &AMDGPU::SReg_64RegClass; + break; + } + uint64_t Index = BasePtr->getZExtValue(); + assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); + unsigned SGPRIndex = Index / TypeDwordWidth; + unsigned Reg = dstClass->getRegister(SGPRIndex); + + DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, + VT)); + return SDValue(); +} + +SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue True = Op.getOperand(2); + SDValue False = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); + return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); +} + +//===----------------------------------------------------------------------===// +// Custom DAG optimizations +//===----------------------------------------------------------------------===// + +SDValue SITargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + switch (N->getOpcode()) { + default: break; + case ISD::SELECT_CC: { + N->dump(); + ConstantSDNode *True, *False; + // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) + if ((True = dyn_cast(N->getOperand(2))) + && (False = dyn_cast(N->getOperand(3))) + && True->isAllOnesValue() + && False->isNullValue() + && VT == MVT::i1) { + return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), + N->getOperand(1), N->getOperand(4)); + + } + break; + } + case ISD::SETCC: { + SDValue Arg0 = N->getOperand(0); + SDValue Arg1 = N->getOperand(1); + SDValue CC = N->getOperand(2); + ConstantSDNode * C = NULL; + ISD::CondCode CCOp = dyn_cast(CC)->get(); + + // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) + if (VT == MVT::i1 + && Arg0.getOpcode() == ISD::SIGN_EXTEND + && Arg0.getOperand(0).getValueType() == MVT::i1 + && (C = dyn_cast(Arg1)) + && C->isNullValue() + && CCOp == ISD::SETNE) { + return SimplifySetCC(VT, Arg0.getOperand(0), + DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); + } + break; + } + } + return SDValue(); +} + +#define NODE_NAME_CASE(node) case SIISD::node: return #node; + +const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) { + default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); + NODE_NAME_CASE(VCC_AND) + NODE_NAME_CASE(VCC_BITCAST) + } +} diff --git lib/Target/AMDGPU/SIISelLowering.h lib/Target/AMDGPU/SIISelLowering.h new file mode 100644 index 0000000..4407bf0 --- /dev/null +++ lib/Target/AMDGPU/SIISelLowering.h @@ -0,0 +1,63 @@ +//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SI DAG Lowering interface definition +// +//===----------------------------------------------------------------------===// + +#ifndef SIISELLOWERING_H +#define SIISELLOWERING_H + +#include "AMDGPUISelLowering.h" +#include "SIInstrInfo.h" + +namespace llvm { + +class SITargetLowering : public AMDGPUTargetLowering +{ + const SIInstrInfo * TII; + + /// AppendS_WAITCNT - Memory reads and writes are syncronized using the + /// S_WAITCNT instruction. This function takes the most conservative + /// approach and inserts an S_WAITCNT instruction after every read and + /// write. + void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const; + void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, unsigned Opocde) const; + void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; + void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + + SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, + unsigned VCCNode) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + +public: + SITargetLowering(TargetMachine &tm); + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, + MachineBasicBlock * BB) const; + virtual EVT getSetCCResultType(EVT VT) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + virtual const char* getTargetNodeName(unsigned Opcode) const; +}; + +} // End namespace llvm + +#endif //SIISELLOWERING_H diff --git lib/Target/AMDGPU/SIInstrFormats.td lib/Target/AMDGPU/SIInstrFormats.td new file mode 100644 index 0000000..97d54ac --- /dev/null +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -0,0 +1,146 @@ +//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SI Instruction format definitions. +// +// Instructions with _32 take 32-bit operands. +// Instructions with _64 take 64-bit operands. +// +// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit +// encoding is the standard encoding, but instruction that make use of +// any of the instruction modifiers must use the 64-bit encoding. +// +// Instructions with _e32 use the 32-bit encoding. +// Instructions with _e64 use the 64-bit encoding. +// +//===----------------------------------------------------------------------===// + +class VOP3b_2IN op, string opName, RegisterClass dstClass, + RegisterClass src0Class, RegisterClass src1Class, + list pattern> + : VOP3b ; + + +class VOP3_1_32 op, string opName, list pattern> + : VOP3b_2IN ; + +class VOP3_32 op, string opName, list pattern> + : VOP3 ; + +class VOP3_64 op, string opName, list pattern> + : VOP3 ; + + +class SOP1_32 op, string opName, list pattern> + : SOP1 ; + +class SOP1_64 op, string opName, list pattern> + : SOP1 ; + +class SOP2_32 op, string opName, list pattern> + : SOP2 ; + +class SOP2_64 op, string opName, list pattern> + : SOP2 ; + +class SOP2_VCC op, string opName, list pattern> + : SOP2 ; + +class VOP1_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> : + VOP1 < + op, (outs vrc:$dst), (ins arc:$src0), opName, pattern + >; + +multiclass VOP1_32 op, string opName, list pattern> { + def _e32: VOP1_Helper ; + def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +multiclass VOP1_64 op, string opName, list pattern> { + + def _e32 : VOP1_Helper ; + + def _e64 : VOP3_64 < + {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +class VOP2_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> : + VOP2 < + op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern + >; + +multiclass VOP2_32 op, string opName, list pattern> { + + def _e32 : VOP2_Helper ; + + def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +multiclass VOP2_64 op, string opName, list pattern> { + def _e32: VOP2_Helper ; + + def _e64 : VOP3_64 < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +class SOPK_32 op, string opName, list pattern> + : SOPK ; + +class SOPK_64 op, string opName, list pattern> + : SOPK ; + +class VOPC_Helper op, RegisterClass vrc, RegisterClass arc, + string opName, list pattern> : + VOPC < + op, (ins arc:$src0, vrc:$src1), opName, pattern + >; + +multiclass VOPC_32 op, string opName, list pattern> { + + def _e32 : VOPC_Helper < + {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + VReg_32, AllReg_32, opName, pattern + >; + + def _e64 : VOP3_1_32 < + op, + opName, pattern + >; +} + +multiclass VOPC_64 op, string opName, list pattern> { + + def _e32 : VOPC_Helper ; + + def _e64 : VOP3_64 < + {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +class SOPC_32 op, string opName, list pattern> + : SOPC ; + +class SOPC_64 op, string opName, list pattern> + : SOPC ; + diff --git lib/Target/AMDGPU/SIInstrInfo.cpp lib/Target/AMDGPU/SIInstrInfo.cpp new file mode 100644 index 0000000..bed9e77 --- /dev/null +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -0,0 +1,92 @@ +//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SI Implementation of TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + + +#include "SIInstrInfo.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" + +#include + +using namespace llvm; + +SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) + : AMDGPUInstrInfo(tm), + RI(tm, *this) + { } + +const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const +{ + return RI; +} + +void +SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const +{ + // If we are trying to copy to or from SCC, there is a bug somewhere else in + // the backend. While it may be theoretically possible to do this, it should + // never be necessary. + assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); + + if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || + AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + assert(AMDGPU::SReg_32RegClass.contains(DestReg)); + assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } +} + +MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const +{ + MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); + MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); + MachineInstrBuilder(MI).addImm(Imm); + + return MI; + +} + +bool SIInstrInfo::isMov(unsigned Opcode) const +{ + switch(Opcode) { + default: return false; + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOV_B64: + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_IMM_F32: + case AMDGPU::V_MOV_IMM_I32: + case AMDGPU::S_MOV_IMM_I32: + return true; + } +} + +bool +SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + return RC != &AMDGPU::EXECRegRegClass; +} diff --git lib/Target/AMDGPU/SIInstrInfo.h lib/Target/AMDGPU/SIInstrInfo.h new file mode 100644 index 0000000..d20c733 --- /dev/null +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -0,0 +1,62 @@ +//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface definition for SIInstrInfo. +// +//===----------------------------------------------------------------------===// + + +#ifndef SIINSTRINFO_H +#define SIINSTRINFO_H + +#include "AMDGPUInstrInfo.h" +#include "SIRegisterInfo.h" + +namespace llvm { + +class SIInstrInfo : public AMDGPUInstrInfo { +private: + const SIRegisterInfo RI; + +public: + explicit SIInstrInfo(AMDGPUTargetMachine &tm); + + const SIRegisterInfo &getRegisterInfo() const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + /// getEncodingType - Returns the encoding type of this instruction. + unsigned getEncodingType(const MachineInstr &MI) const; + + /// getEncodingBytes - Returns the size of this instructions encoding in + /// number of bytes. + unsigned getEncodingBytes(const MachineInstr &MI) const; + + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const; + + virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} + virtual bool isMov(unsigned Opcode) const; + + virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + }; + +} // End namespace llvm + +namespace SIInstrFlags { + enum Flags { + // First 4 bits are the instruction encoding + NEED_WAIT = 1 << 4 + }; +} + +#endif //SIINSTRINFO_H diff --git lib/Target/AMDGPU/SIInstrInfo.td lib/Target/AMDGPU/SIInstrInfo.td new file mode 100644 index 0000000..ea8a33f --- /dev/null +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -0,0 +1,589 @@ +//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI DAG Profiles +//===----------------------------------------------------------------------===// +def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ + SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> +]>; + +//===----------------------------------------------------------------------===// +// SI DAG Nodes +//===----------------------------------------------------------------------===// + +// and operation on 64-bit wide vcc +def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, + [SDNPCommutative, SDNPAssociative] +>; + +// Special bitcast node for sharing VCC register between VALU and SALU +def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> +>; + +// and operation on 64-bit wide vcc +def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, + [SDNPCommutative, SDNPAssociative] +>; + +// Special bitcast node for sharing VCC register between VALU and SALU +def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> +>; + +class InstSI pattern> : + AMDGPUInst { + + field bits<4> EncodingType = 0; + field bits<1> NeedWait = 0; + + let TSFlags{3-0} = EncodingType; + let TSFlags{4} = NeedWait; + +} + +class Enc32 pattern> : + InstSI { + + field bits<32> Inst; +} + +class Enc64 pattern> : + InstSI { + + field bits<64> Inst; +} + +class SIOperand : Operand { + let EncoderMethod = "encodeOperand"; + let MIOperandInfo = opInfo; +} + +def IMM16bit : ImmLeaf < + i16, + [{return isInt<16>(Imm);}] +>; + +def IMM8bit : ImmLeaf < + i32, + [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] +>; + +def IMM12bit : ImmLeaf < + i16, + [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] +>; + +def IMM32bitIn64bit : ImmLeaf < + i64, + [{return isInt<32>(Imm);}] +>; + +class GPR4Align : Operand { + let EncoderMethod = "GPR4AlignEncode"; + let MIOperandInfo = (ops rc:$reg); +} + +class GPR2Align : Operand { + let EncoderMethod = "GPR2AlignEncode"; + let MIOperandInfo = (ops rc:$reg); +} + +def SMRDmemrr : Operand { + let MIOperandInfo = (ops SReg_64, SReg_32); + let EncoderMethod = "GPR2AlignEncode"; +} + +def SMRDmemri : Operand { + let MIOperandInfo = (ops SReg_64, i32imm); + let EncoderMethod = "SMRDmemriEncode"; +} + +def ADDR_Reg : ComplexPattern; +def ADDR_Offset8 : ComplexPattern; + +let Uses = [EXEC] in { + +def EXP : Enc64< + (outs), + (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", + [] > { + + bits<4> EN; + bits<6> TGT; + bits<1> COMPR; + bits<1> DONE; + bits<1> VM; + bits<8> VSRC0; + bits<8> VSRC1; + bits<8> VSRC2; + bits<8> VSRC3; + + let Inst{3-0} = EN; + let Inst{9-4} = TGT; + let Inst{10} = COMPR; + let Inst{11} = DONE; + let Inst{12} = VM; + let Inst{31-26} = 0x3e; + let Inst{39-32} = VSRC0; + let Inst{47-40} = VSRC1; + let Inst{55-48} = VSRC2; + let Inst{63-56} = VSRC3; + let EncodingType = 0; //SIInstrEncodingType::EXP + + let NeedWait = 1; + let usesCustomInserter = 1; +} + +class MIMG op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDATA; + bits<4> DMASK; + bits<1> UNORM; + bits<1> GLC; + bits<1> DA; + bits<1> R128; + bits<1> TFE; + bits<1> LWE; + bits<1> SLC; + bits<8> VADDR; + bits<5> SRSRC; + bits<5> SSAMP; + + let Inst{11-8} = DMASK; + let Inst{12} = UNORM; + let Inst{13} = GLC; + let Inst{14} = DA; + let Inst{15} = R128; + let Inst{16} = TFE; + let Inst{17} = LWE; + let Inst{24-18} = op; + let Inst{25} = SLC; + let Inst{31-26} = 0x3c; + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{57-53} = SSAMP; + + let EncodingType = 2; //SIInstrEncodingType::MIMG + + let NeedWait = 1; + let usesCustomInserter = 1; +} + +class MTBUF op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<4> DFMT; + bits<3> NFMT; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{18-16} = op; + let Inst{22-19} = DFMT; + let Inst{25-23} = NFMT; + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + let EncodingType = 3; //SIInstrEncodingType::MTBUF + + let NeedWait = 1; + let usesCustomInserter = 1; + let neverHasSideEffects = 1; +} + +class MUBUF op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<1> LDS; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{16} = LDS; + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + let EncodingType = 4; //SIInstrEncodingType::MUBUF + + let NeedWait = 1; + let usesCustomInserter = 1; + let neverHasSideEffects = 1; +} + +} // End Uses = [EXEC] + +class SMRD op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<7> SDST; + bits<15> PTR; + bits<8> OFFSET = PTR{7-0}; + bits<1> IMM = PTR{8}; + bits<6> SBASE = PTR{14-9}; + + let Inst{7-0} = OFFSET; + let Inst{8} = IMM; + let Inst{14-9} = SBASE; + let Inst{21-15} = SDST; + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding + let EncodingType = 5; //SIInstrEncodingType::SMRD + + let NeedWait = 1; + let usesCustomInserter = 1; +} + +class SOP1 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<7> SDST; + bits<8> SSRC0; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = op; + let Inst{22-16} = SDST; + let Inst{31-23} = 0x17d; //encoding; + let EncodingType = 6; //SIInstrEncodingType::SOP1 + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOP2 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<7> SDST; + bits<8> SSRC0; + bits<8> SSRC1; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = SDST; + let Inst{29-23} = op; + let Inst{31-30} = 0x2; // encoding + let EncodingType = 7; // SIInstrEncodingType::SOP2 + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOPC op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> SSRC0; + bits<8> SSRC1; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = op; + let Inst{31-23} = 0x17e; + let EncodingType = 8; // SIInstrEncodingType::SOPC + + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOPK op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits <7> SDST; + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = SDST; + let Inst{27-23} = op; + let Inst{31-28} = 0xb; //encoding + let EncodingType = 9; // SIInstrEncodingType::SOPK + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOPP op, dag ins, string asm, list pattern> : Enc32 < + (outs), + ins, + asm, + pattern > { + + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; // encoding + let EncodingType = 10; // SIInstrEncodingType::SOPP + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +let Uses = [EXEC] in { + +class VINTRP op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> VDST; + bits<8> VSRC; + bits<2> ATTRCHAN; + bits<6> ATTR; + + let Inst{7-0} = VSRC; + let Inst{9-8} = ATTRCHAN; + let Inst{15-10} = ATTR; + let Inst{17-16} = op; + let Inst{25-18} = VDST; + let Inst{31-26} = 0x32; // encoding + let EncodingType = 11; // SIInstrEncodingType::VINTRP + + let neverHasSideEffects = 1; + let mayLoad = 1; + let mayStore = 0; +} + +class VOP1 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> VDST; + bits<9> SRC0; + + let Inst{8-0} = SRC0; + let Inst{16-9} = op; + let Inst{24-17} = VDST; + let Inst{31-25} = 0x3f; //encoding + + let EncodingType = 12; // SIInstrEncodingType::VOP1 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP2 op, dag outs, dag ins, string asm, list pattern> : + Enc32 { + + bits<8> VDST; + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = VDST; + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding + + let EncodingType = 13; // SIInstrEncodingType::VOP2 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP3 op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<3> ABS; + bits<1> CLAMP; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{10-8} = ABS; + let Inst{11} = CLAMP; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let EncodingType = 14; // SIInstrEncodingType::VOP3 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP3b op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<7> SDST; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{14-8} = SDST; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let EncodingType = 14; // SIInstrEncodingType::VOP3 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOPC op, dag ins, string asm, list pattern> : + Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { + + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; + + let EncodingType = 15; //SIInstrEncodingType::VOPC + let PostEncoderMethod = "VOPPostEncode"; + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +} // End Uses = [EXEC] + +class MIMG_Load_Helper op, string asm> : MIMG < + op, + (outs VReg_128:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, + GPR4Align:$srsrc, GPR4Align:$ssamp), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i1imm:$lds, VReg_32:$vaddr, GPR4Align:$srsrc, i1imm:$slc, + i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align:$srsrc, + i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs), + (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, + GPR4Align:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayStore = 1; + let mayLoad = 0; +} + +multiclass SMRD_Helper op, string asm, RegisterClass dstClass, + ValueType vt> { + def _IMM : SMRD < + op, + (outs dstClass:$dst), + (ins SMRDmemri:$src0), + asm, + [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] + >; + + def _SGPR : SMRD < + op, + (outs dstClass:$dst), + (ins SMRDmemrr:$src0), + asm, + [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] + >; +} + +multiclass SMRD_32 op, string asm, RegisterClass dstClass> { + defm _F32 : SMRD_Helper ; + defm _I32 : SMRD_Helper ; +} + +include "SIInstrFormats.td" +include "SIInstructions.td" diff --git lib/Target/AMDGPU/SIInstructions.td lib/Target/AMDGPU/SIInstructions.td new file mode 100644 index 0000000..cb94381 --- /dev/null +++ lib/Target/AMDGPU/SIInstructions.td @@ -0,0 +1,1256 @@ +//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def isSI : Predicate<"Subtarget.device()" + "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; + +let Predicates = [isSI] in { + +let neverHasSideEffects = 1 in { +def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; +def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; +def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; +def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; +def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; +def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; +def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; +def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; +def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; +def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; +} // End neverHasSideEffects = 1 +////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; +////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; +////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; +////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; +////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; +////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; +////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; +////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>; +//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>; +//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; +def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; +//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; +//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; +//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; +////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; +////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; +////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; +////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>; +def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>; +def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>; +def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>; +def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>; +def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>; +def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>; +def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>; +////def S_ANDN2_SAVEEXEC_B64 : SOP1_ANDN2 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>; +////def S_ORN2_SAVEEXEC_B64 : SOP1_ORN2 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>; +def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>; +def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>; +def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>; +def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>; +def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>; +def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>; +def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>; +def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>; +def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>; +//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>; +def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; +def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; +def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; +def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; +def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; + +/* +This instruction is disabled for now until we can figure out how to teach +the instruction selector to correctly use the S_CMP* vs V_CMP* +instructions. + +When this instruction is enabled the code generator sometimes produces this +invalid sequence: + +SCC = S_CMPK_EQ_I32 SGPR0, imm +VCC = COPY SCC +VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 + +def S_CMPK_EQ_I32 : SOPK < + 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1), + "S_CMPK_EQ_I32", + [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))] +>; +*/ + +def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; +def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; +def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; +def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; +def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; +def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; +def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; +def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; +def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; +def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; +def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; +def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; +def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; +//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; +def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>; +def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>; +def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; +//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; +//def EXP : EXP_ <0x00000000, "EXP", []>; + +defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; +defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), + (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), + (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), + (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), + (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), + (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; +defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; +defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; +defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; +defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; +defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; +defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; +defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; + +//Side effect is writing to EXEC +let hasSideEffects = 1 in { + +defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; +defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; +defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; +defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; +defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; +defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; +defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; +defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; +defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; +defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; +defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; +defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; +defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; +defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; +defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; +defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; + +} // End hasSideEffects = 1 + +defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; +defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; +defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; +defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; +defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; +defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; +defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; +defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; +defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; +defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; + +//Side effect is writing to EXEC +let hasSideEffects = 1 in { + +defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; +defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; +defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; +defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; +defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; +defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; +defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; +defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; +defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; +defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; +defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; +defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; +defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; +defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; +defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; +defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; + +} // End hasSideEffects = 1 + +defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; +defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; +defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; +defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; +defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; +defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; +defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; +defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; +defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; +defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; +defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; +defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; +defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; +defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; +defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; +defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; +defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; +defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; +defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; +defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; +defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; +defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; +defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; +defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; +defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; +defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; +defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; +defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; +defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; +defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; +defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; +defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; +defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; +defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; +defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; +defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; +defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; +defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; +defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; +defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; +defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; +defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; +defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; +defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; +defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; +defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; +defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; +defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; +defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; +defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; +defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; +defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; +defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; +defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; +defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; +defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; +defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; +defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; +defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; +defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; +defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; +defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; +defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; +defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; +defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; +defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), + (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), + (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), + (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), + (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), + (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; + +let hasSideEffects = 1 in { + +defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; +defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; +defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; +defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; +defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; +defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; +defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; +defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; + +} // End hasSideEffects + +defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; +defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; +defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; +defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; +defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; +defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; +defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; +defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; + +let hasSideEffects = 1 in { + +defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; +defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; +defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; +defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; +defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; +defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; +defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; +defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; + +} // End hasSideEffects + +defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; +defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; +defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; +defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; +defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; +defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; +defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; +defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; + +let hasSideEffects = 1 in { + +defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; +defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; +defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; +defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; +defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; +defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; +defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; +defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; + +} // End hasSideEffects + +defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; +defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; +defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; +defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; +defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; +defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; +defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; +defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; +defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; +defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; +defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; +defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; +defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; +defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; +defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; +defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; +defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; +defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; +defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; +defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; +//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; +//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; +//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; +def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; +//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>; +//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; +//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; +//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; +//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>; +//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; +//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; +//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; +//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; +//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; +//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; +//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; +//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; +//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; +//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; +//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; +//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; +//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; +//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; +//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; +//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; +//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>; +//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>; +//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>; +//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>; +//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>; +//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>; +//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>; +//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>; +//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>; +//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>; +//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>; +//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>; +//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>; +//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>; +//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>; +//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>; +//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>; +//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>; +//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>; +//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>; +//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>; +//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>; +//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>; +//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>; +//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>; +//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>; +//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>; +//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>; +//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>; +//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>; +//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>; +//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; +//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; +def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; +//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; +//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; +//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; +//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; + +defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; + +//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; +//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; +//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; +//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; +//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; +//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; +//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; + +//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; +//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; +//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; +//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>; +//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; +//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; +//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; +//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>; +//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>; +//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; +//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; +//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; +//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>; +//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; +//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; +//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; +//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>; +//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>; +//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>; +//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>; +//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>; +//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>; +//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>; +//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>; +//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>; +//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>; +//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>; +//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; +//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; +//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; +def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; +//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; +//def IMAGE_SAMPLE_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_D", 0x00000022>; +//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; +//def IMAGE_SAMPLE_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_L", 0x00000024>; +//def IMAGE_SAMPLE_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_B", 0x00000025>; +//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; +//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; +//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; +//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; +//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; +//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; +//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; +//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; +//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; +//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; +//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; +//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>; +//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>; +//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>; +//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>; +//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>; +//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>; +//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>; +//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>; +//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>; +//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>; +//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>; +//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>; +//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; +//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; +//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; +//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; +//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; +//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; +//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; +//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; +//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; +//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; +//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; +//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; +//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; +//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; +//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; +//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; +//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; +//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; +//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; +//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; +//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; +//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; +//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; +//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; +//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; +//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; +//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; +//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; +//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; +//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; +//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; +//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>; +//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>; +//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>; +//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>; +//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>; +//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>; +//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; +//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; + +let neverHasSideEffects = 1 in { +defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; +} // End neverHasSideEffects +defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; +//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; +//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; +defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", + [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] +>; +//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; +//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; +defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", + [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] +>; +defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; +////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; +//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; +//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; +//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; +//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; +//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; +//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; +//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; +//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; +//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; +//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; +//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; +//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; +defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", + [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] +>; +defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; +defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; +defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", + [(set VReg_32:$dst, (frint AllReg_32:$src0))] +>; +defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", + [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] +>; +defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", + [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] +>; +defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; +defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; +defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; +defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; +defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", + [(set VReg_32:$dst, (int_AMDGPU_rcp AllReg_32:$src0))] +>; +defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; +defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; +defm V_RSQ_LEGACY_F32 : VOP1_32 < + 0x0000002d, "V_RSQ_LEGACY_F32", + [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] +>; +defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; +defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; +defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; +defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; +defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; +defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; +defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; +defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; +defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; +defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; +defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; +defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; +defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>; +defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>; +//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>; +defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>; +defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>; +//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>; +defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>; +//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>; +defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; +defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; +defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; + +def V_INTERP_P1_F32 : VINTRP < + 0x00000000, + (outs VReg_32:$dst), + (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_P1_F32", + []> { + let DisableEncoding = "$m0"; +} + +def V_INTERP_P2_F32 : VINTRP < + 0x00000001, + (outs VReg_32:$dst), + (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_P2_F32", + []> { + + let Constraints = "$src0 = $dst"; + let DisableEncoding = "$src0,$m0"; + +} + +def V_INTERP_MOV_F32 : VINTRP < + 0x00000002, + (outs VReg_32:$dst), + (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_MOV_F32", + []> { + let VSRC = 0; + let DisableEncoding = "$m0"; +} + +//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; + +let isTerminator = 1 in { + +def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", + [(IL_retflag)]> { + let SIMM16 = 0; + let isBarrier = 1; + let hasCtrlDep = 1; +} + +let isBranch = 1 in { +def S_BRANCH : SOPP < + 0x00000002, (ins brtarget:$target), "S_BRANCH", + [] +>; + +let DisableEncoding = "$scc" in { +def S_CBRANCH_SCC0 : SOPP < + 0x00000004, (ins brtarget:$target, SCCReg:$scc), + "S_CBRANCH_SCC0", [] +>; +def S_CBRANCH_SCC1 : SOPP < + 0x00000005, (ins brtarget:$target, SCCReg:$scc), + "S_CBRANCH_SCC1", + [] +>; +} // End DisableEncoding = "$scc" + +def S_CBRANCH_VCCZ : SOPP < + 0x00000006, (ins brtarget:$target, VCCReg:$vcc), + "S_CBRANCH_VCCZ", + [] +>; +def S_CBRANCH_VCCNZ : SOPP < + 0x00000007, (ins brtarget:$target, VCCReg:$vcc), + "S_CBRANCH_VCCNZ", + [] +>; + +let DisableEncoding = "$exec" in { +def S_CBRANCH_EXECZ : SOPP < + 0x00000008, (ins brtarget:$target, EXECReg:$exec), + "S_CBRANCH_EXECZ", + [] +>; +def S_CBRANCH_EXECNZ : SOPP < + 0x00000009, (ins brtarget:$target, EXECReg:$exec), + "S_CBRANCH_EXECNZ", + [] +>; +} // End DisableEncoding = "$exec" + + +} // End isBranch = 1 +} // End isTerminator = 1 + +//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; +let hasSideEffects = 1 in { +def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", + [] +>; +} // End hasSideEffects +//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; +//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; +//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; +//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; +//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; +//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; +//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; +//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; +//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; +//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; + +def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), + (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", + [] +>{ + let DisableEncoding = "$vcc"; +} + +def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), + (ins AllReg_32:$src0, AllReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + "V_CNDMASK_B32_e64", + [(set (i32 VReg_32:$dst), (select SReg_1:$src2, AllReg_32:$src1, AllReg_32:$src0))] +>; + +//f32 pattern for V_CNDMASK_B32_e64 +def : Pat < + (f32 (select SReg_1:$src2, AllReg_32:$src1, AllReg_32:$src0)), + (V_CNDMASK_B32_e64 AllReg_32:$src0, AllReg_32:$src1, SReg_1:$src2) +>; + +defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; +defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; + +defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>; +def : Pat < + (f32 (fadd AllReg_32:$src0, VReg_32:$src1)), + (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1) +>; + +defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>; +def : Pat < + (f32 (fsub AllReg_32:$src0, VReg_32:$src1)), + (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; +defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; +defm V_MUL_LEGACY_F32 : VOP2_32 < + 0x00000007, "V_MUL_LEGACY_F32", + [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] +>; + +defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", + [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] +>; +//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; +//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; +//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; +//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; +defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", + [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] +>; + +defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", + [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; +defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; +defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; +defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; +defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; +defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; +defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; +defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; +defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", + [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", + [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", + [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; +defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; +defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; +defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; +//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; +//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; +//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; +let Defs = [VCC] in { // Carry-out goes to VCC +defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", + [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] +>; +defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", + [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] +>; +} // End Defs = [VCC] +defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; +defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; +defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; +defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; +defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; +////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; +////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; +////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", + [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] +>; +////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; +////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; +def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; +def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>; +def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>; +def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>; +def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>; +def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>; +def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>; +def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>; +def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>; +def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>; +def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>; +def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>; +////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; +////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; +////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; +////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; +//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; + +let neverHasSideEffects = 1 in { + +def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; +def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; +//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>; +//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>; + +} // End neverHasSideEffects +def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; +def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; +def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; +def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; +def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; +def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; +def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; +def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; +def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; +//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; +def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; +def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; +def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; +////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; +////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; +////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; +////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; +////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; +////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; +////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; +////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; +////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; +//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; +//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; +//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; +def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; +////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; +def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; +def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; +def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>; +def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>; +def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>; +def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; +def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; +def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; +def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; +def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; +def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; +def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; +def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; +def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; +def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; +def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; +def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; +//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; +//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; +//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; +def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; +def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; +def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; +def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; +def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; +def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; +def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; +def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; +def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; +def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; +def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; + +def S_CSELECT_B32 : SOP2 < + 0x0000000a, (outs SReg_32:$dst), + (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", + [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] +>; + +def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; + +// f32 pattern for S_CSELECT_B32 +def : Pat < + (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), + (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) +>; + +def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; + +def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", + [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] +>; +def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", + [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] +>; +def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; +def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; +def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; +def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; +////def S_ANDN2_B32 : SOP2_ANDN2 <0x00000014, "S_ANDN2_B32", []>; +////def S_ANDN2_B64 : SOP2_ANDN2 <0x00000015, "S_ANDN2_B64", []>; +////def S_ORN2_B32 : SOP2_ORN2 <0x00000016, "S_ORN2_B32", []>; +////def S_ORN2_B64 : SOP2_ORN2 <0x00000017, "S_ORN2_B64", []>; +def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; +def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; +def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; +def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; +def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; +def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; +def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; +def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; +def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; +def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; +def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; +def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; +def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; +def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; +def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; +def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; +def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; +def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; +def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; +//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; +def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; + +class V_MOV_IMM : InstSI < + (outs VReg_32:$dst), + (ins immType:$src0), + "V_MOV_IMM", + [(set VReg_32:$dst, (immNode:$src0))] +>; + +let isCodeGenOnly = 1, isPseudo = 1 in { + +def V_MOV_IMM_I32 : V_MOV_IMM; +def V_MOV_IMM_F32 : V_MOV_IMM; + +def S_MOV_IMM_I32 : InstSI < + (outs SReg_32:$dst), + (ins i32imm:$src0), + "S_MOV_IMM_I32", + [(set SReg_32:$dst, (imm:$src0))] +>; + +// i64 immediates aren't really supported in hardware, but LLVM will use the i64 +// type for indices on load and store instructions. The pattern for +// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, +// which the hardware can handle. +def S_MOV_IMM_I64 : InstSI < + (outs SReg_64:$dst), + (ins i64imm:$src0), + "S_MOV_IMM_I64 $dst, $src0", + [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] +>; + +} // End isCodeGenOnly, isPseudo = 1 + +class SI_LOAD_LITERAL : + Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { + + bits<32> imm; + let Inst{31-0} = imm; +} + +def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL; +def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL; + +let isCodeGenOnly = 1, isPseudo = 1 in { + +def SET_M0 : InstSI < + (outs SReg_32:$dst), + (ins i32imm:$src0), + "SET_M0", + [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] +>; + +def LOAD_CONST : AMDGPUShaderInst < + (outs GPRF32:$dst), + (ins i32imm:$src), + "LOAD_CONST $dst, $src", + [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] +>; + +let usesCustomInserter = 1 in { + +def SI_V_CNDLT : InstSI < + (outs VReg_32:$dst), + (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), + "SI_V_CNDLT $dst, $src0, $src1, $src2", + [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] +>; + +def SI_INTERP : InstSI < + (outs VReg_32:$dst), + (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), + "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params", + [] +>; + +def SI_INTERP_CONST : InstSI < + (outs VReg_32:$dst), + (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), + "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", + [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, + imm:$attr, SReg_32:$params))] +>; + +def SI_KIL : InstSI < + (outs), + (ins VReg_32:$src), + "SI_KIL $src", + [(int_AMDGPU_kill VReg_32:$src)] +>; + +def SI_WQM : InstSI < + (outs), + (ins), + "SI_WQM", + [(int_SI_wqm)] +>; + +} // end usesCustomInserter + +// SI Psuedo branch instructions. These are used by the CFG structurizer pass +// and should be lowered to ISA instructions prior to codegen. + +let isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0, + hasSideEffects = 0 in { +def SI_IF_NZ : InstSI < + (outs), + (ins brtarget:$target, SReg_1:$vcc), + "SI_BRANCH_NZ", + [(IL_brcond bb:$target, SReg_1:$vcc)] +>; + +def SI_IF_Z : InstSI < + (outs), + (ins brtarget:$target, SReg_1:$vcc), + "SI_BRANCH_Z", + [] +>; +} // end isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0, + // hasSideEffects = 0 +} // end IsCodeGenOnly, isPseudo + +/* int_SI_vs_load_input */ +def : Pat< + (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, + VReg_32:$buf_idx_vgpr), + (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, + VReg_32:$buf_idx_vgpr, SReg_128:$tlst, + 0, 0, (i32 SREG_LIT_0)) +>; + +/* int_SI_export */ +def : Pat < + (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, + VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) +>; + +/* int_SI_sample */ +def : Pat < + (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, + SReg_256:$rsrc, SReg_128:$sampler) +>; + +def CLAMP_SI : CLAMP; +def FABS_SI : FABS; +def FNEG_SI : FNEG; + +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; + +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; + +def : Vector_Build ; +def : Vector_Build ; + +def : BitConvert ; +def : BitConvert ; + +def : BitConvert ; +def : BitConvert ; + +def : Pat < + (i64 (SIsreg1_bitcast SReg_1:$vcc)), + (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64)) +>; + +def : Pat < + (i1 (SIsreg1_bitcast SReg_64:$vcc)), + (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1) +>; + +def : Pat < + (i64 (SIvcc_bitcast VCCReg:$vcc)), + (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) +>; + +def : Pat < + (i1 (SIvcc_bitcast SReg_64:$vcc)), + (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) +>; + +/********** ===================== **********/ +/********** Interpolation Paterns **********/ +/********** ===================== **********/ + +def : Pat < + (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_read_face), + (f32 FRONT_FACE) +>; + +def : Pat < + (int_SI_fs_read_pos 0), + (f32 POS_X_FLOAT) +>; + +def : Pat < + (int_SI_fs_read_pos 1), + (f32 POS_Y_FLOAT) +>; + +def : Pat < + (int_SI_fs_read_pos 2), + (f32 POS_Z_FLOAT) +>; + +def : Pat < + (int_SI_fs_read_pos 3), + (f32 POS_W_FLOAT) +>; + +/********** ================== **********/ +/********** Intrinsic Patterns **********/ +/********** ================== **********/ + +/* llvm.AMDGPU.pow */ +/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ +def : POW_Common ; + +def : Pat < + (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), + (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) +>; + +def : Pat < + (int_AMDGPU_kilp), + (SI_KIL (V_MOV_IMM_I32 0xbf800000)) +>; + +/********** ================== **********/ +/********** VOP3 Patterns **********/ +/********** ================== **********/ + +def : Pat <(f32 (IL_mad AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2)), + (V_MAD_LEGACY_F32 AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, + 0, 0, 0, 0)>; + +} // End isSI predicate diff --git lib/Target/AMDGPU/SIIntrinsics.td lib/Target/AMDGPU/SIIntrinsics.td new file mode 100644 index 0000000..f8974d8 --- /dev/null +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -0,0 +1,40 @@ +//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SI Intrinsic Definitions +// +//===----------------------------------------------------------------------===// + + +let TargetPrefix = "SI", isTarget = 1 in { + + def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + /* XXX: We may need a seperate intrinsic here for loading integer values */ + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; + def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; + def int_SI_wqm : Intrinsic <[], [], []>; + + def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + + /* Interpolation Intrinsics */ + + def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>; + class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; + + def int_SI_fs_interp_linear_center : Interp; + def int_SI_fs_interp_linear_centroid : Interp; + def int_SI_fs_interp_persp_center : Interp; + def int_SI_fs_interp_persp_centroid : Interp; + def int_SI_fs_interp_constant : Interp; + + def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; + def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; +} diff --git lib/Target/AMDGPU/SILowerFlowControl.cpp lib/Target/AMDGPU/SILowerFlowControl.cpp new file mode 100644 index 0000000..b901688 --- /dev/null +++ lib/Target/AMDGPU/SILowerFlowControl.cpp @@ -0,0 +1,192 @@ +//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF) +// to predicated instructions. +// +// All flow control (except loops) is handled using predicated instructions and +// a predicate stack. Each Scalar ALU controls the operations of 64 Vector +// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs +// by writting to the 64-bit EXEC register (each bit corresponds to a +// single vector ALU). Typically, for predicates, a vector ALU will write +// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each +// Vector ALU) and then the ScalarALU will AND the VCC register with the +// EXEC to update the predicates. +// +// For example: +// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 +// SI_IF_NZ %VCC +// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 +// ELSE +// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 +// ENDIF +// +// becomes: +// +// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask +// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask +// S_CBRANCH_EXECZ label0 // This instruction is an +// // optimization which allows us to +// // branch if all the bits of +// // EXEC are zero. +// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch +// +// label0: +// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the +// // Then block. +// %EXEC = S_AND_B64 %SGPR0, %EXEC +// S_BRANCH_EXECZ label1 // Use our branch optimization +// // instruction again. +// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block +// label1: +// S_MOV_B64 // Restore the old EXEC value +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SILowerFlowControlPass : public MachineFunctionPass { + +private: + static char ID; + const TargetInstrInfo *TII; + std::vector PredicateStack; + std::vector UnusedRegisters; + + void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + +public: + SILowerFlowControlPass(TargetMachine &tm) : + MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI Lower flow control instructions"; + } + +}; + +} // End anonymous namespace + +char SILowerFlowControlPass::ID = 0; + +FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) { + return new SILowerFlowControlPass(tm); +} + +bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) { + + // Find all the unused registers that can be used for the predicate stack. + for (TargetRegisterClass::iterator I = AMDGPU::SReg_64RegClass.begin(), + S = AMDGPU::SReg_64RegClass.end(); + I != S; ++I) { + unsigned Reg = *I; + if (!MF.getRegInfo().isPhysRegUsed(Reg)) { + UnusedRegisters.insert(UnusedRegisters.begin(), Reg); + } + } + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: break; + case AMDGPU::SI_IF_NZ: + pushExecMask(MBB, I); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), + AMDGPU::EXEC) + .addOperand(MI.getOperand(0)) // VCC + .addReg(AMDGPU::EXEC); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_XOR_B64), + PredicateStack.back()) + .addReg(PredicateStack.back()) + .addReg(AMDGPU::EXEC); + MI.eraseFromParent(); + break; + case AMDGPU::ELSE: + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), + UnusedRegisters.back()) + .addReg(AMDGPU::EXEC); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), + AMDGPU::EXEC) + .addReg(PredicateStack.back()); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), + PredicateStack.back()) + .addReg(UnusedRegisters.back()); + MI.eraseFromParent(); + break; + case AMDGPU::ENDIF: + popExecMask(MBB, I); + if (MF.getInfo()->ShaderType == ShaderType::PIXEL && + PredicateStack.empty()) { + // If the exec mask is non-zero, skip the next two instructions + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(3) + .addReg(AMDGPU::EXEC); + + // Exec mask is zero: Export to NULL target... + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0); + + // ... and terminate wavefront + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM)); + } + MI.eraseFromParent(); + break; + } + } + } + return false; +} + +void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + + assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack"); + unsigned StackReg = UnusedRegisters.back(); + UnusedRegisters.pop_back(); + PredicateStack.push_back(StackReg); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), + StackReg) + .addReg(AMDGPU::EXEC); +} + +void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + unsigned StackReg = PredicateStack.back(); + PredicateStack.pop_back(); + UnusedRegisters.push_back(StackReg); + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_OR_B64), + AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(StackReg); +} diff --git lib/Target/AMDGPU/SILowerLiteralConstants.cpp lib/Target/AMDGPU/SILowerLiteralConstants.cpp new file mode 100644 index 0000000..6f4a0d6 --- /dev/null +++ lib/Target/AMDGPU/SILowerLiteralConstants.cpp @@ -0,0 +1,106 @@ +//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass performs the following transformation on instructions with +// literal constants: +// +// %VGPR0 = V_MOV_IMM_I32 1 +// +// becomes: +// +// BUNDLE +// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT +// * SI_LOAD_LITERAL 1 +// +// The resulting sequence matches exactly how the hardware handles immediate +// operands, so this transformation greatly simplifies the code generator. +// +// Only the *_MOV_IMM_* support immediate operands at the moment, but when +// support for immediate operands is added to other instructions, they +// will be lowered here as well. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" + +using namespace llvm; + +namespace { + +class SILowerLiteralConstantsPass : public MachineFunctionPass { + +private: + static char ID; + const TargetInstrInfo *TII; + +public: + SILowerLiteralConstantsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI Lower literal constants pass"; + } +}; + +} // End anonymous namespace + +char SILowerLiteralConstantsPass::ID = 0; + +FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { + return new SILowerLiteralConstantsPass(tm); +} + +bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: break; + case AMDGPU::S_MOV_IMM_I32: + case AMDGPU::S_MOV_IMM_I64: + case AMDGPU::V_MOV_IMM_F32: + case AMDGPU::V_MOV_IMM_I32: { + unsigned MovOpcode; + unsigned LoadLiteralOpcode; + MachineOperand LiteralOp = MI.getOperand(1); + if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { + MovOpcode = AMDGPU::V_MOV_B32_e32; + } else { + MovOpcode = AMDGPU::S_MOV_B32; + } + if (LiteralOp.isImm()) { + LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; + } else { + LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; + } + MachineInstr *First = + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode), + MI.getOperand(0).getReg()) + .addReg(AMDGPU::SI_LITERAL_CONSTANT); + MachineInstr *Last = + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode)) + .addOperand(MI.getOperand(1)); + Last->setIsInsideBundle(); + llvm::finalizeBundle(MBB, First, Last); + MI.eraseFromParent(); + break; + } + } + } + } + return false; +} diff --git lib/Target/AMDGPU/SIMachineFunctionInfo.cpp lib/Target/AMDGPU/SIMachineFunctionInfo.cpp new file mode 100644 index 0000000..3cc1cd6 --- /dev/null +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -0,0 +1,19 @@ +//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "SIMachineFunctionInfo.h" + +using namespace llvm; + +SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo(), + SPIPSInputAddr(0), + ShaderType(0) + { } diff --git lib/Target/AMDGPU/SIMachineFunctionInfo.h lib/Target/AMDGPU/SIMachineFunctionInfo.h new file mode 100644 index 0000000..68097b4 --- /dev/null +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -0,0 +1,38 @@ +//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config +// register, which is to tell the hardware which interpolation parameters to +// load. +// +//===----------------------------------------------------------------------===// + + +#ifndef _SIMACHINEFUNCTIONINFO_H_ +#define _SIMACHINEFUNCTIONINFO_H_ + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class SIMachineFunctionInfo : public MachineFunctionInfo { + + private: + + public: + SIMachineFunctionInfo(const MachineFunction &MF); + unsigned SPIPSInputAddr; + unsigned ShaderType; + +}; + +} // End namespace llvm + + +#endif //_SIMACHINEFUNCTIONINFO_H_ diff --git lib/Target/AMDGPU/SIRegisterInfo.cpp lib/Target/AMDGPU/SIRegisterInfo.cpp new file mode 100644 index 0000000..3d6dc83 --- /dev/null +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -0,0 +1,50 @@ +//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SI implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + + +#include "SIRegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +using namespace llvm; + +SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPURegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } + +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + return Reserved; +} + +const TargetRegisterClass * +SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const +{ + switch (rc->getID()) { + case AMDGPU::GPRF32RegClassID: + return &AMDGPU::VReg_32RegClass; + default: return rc; + } +} + +const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( + MVT VT) const +{ + switch(VT.SimpleTy) { + default: + case MVT::i32: return &AMDGPU::VReg_32RegClass; + } +} diff --git lib/Target/AMDGPU/SIRegisterInfo.h lib/Target/AMDGPU/SIRegisterInfo.h new file mode 100644 index 0000000..b571da9 --- /dev/null +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -0,0 +1,47 @@ +//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface definition for SIRegisterInfo +// +//===----------------------------------------------------------------------===// + + +#ifndef SIREGISTERINFO_H_ +#define SIREGISTERINFO_H_ + +#include "AMDGPURegisterInfo.h" + +namespace llvm { + +class AMDGPUTargetMachine; +class TargetInstrInfo; + +struct SIRegisterInfo : public AMDGPURegisterInfo +{ + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + /// getISARegClass - rc is an AMDIL reg class. This function returns the + /// SI register class that is equivalent to the given AMDIL register class. + virtual const TargetRegisterClass * + getISARegClass(const TargetRegisterClass * rc) const; + + /// getCFGStructurizerRegClass - get the register class of the specified + /// type to use in the CFGStructurizer + virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + +}; + +} // End namespace llvm + +#endif // SIREGISTERINFO_H_ diff --git lib/Target/AMDGPU/SIRegisterInfo.td lib/Target/AMDGPU/SIRegisterInfo.td new file mode 100644 index 0000000..a3d91ae --- /dev/null +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -0,0 +1,167 @@ + +let Namespace = "AMDGPU" in { + def low : SubRegIndex; + def high : SubRegIndex; + + def sub0 : SubRegIndex; + def sub1 : SubRegIndex; + def sub2 : SubRegIndex; + def sub3 : SubRegIndex; + def sub4 : SubRegIndex; + def sub5 : SubRegIndex; + def sub6 : SubRegIndex; + def sub7 : SubRegIndex; +} + +class SIReg encoding = 0> : Register { + let Namespace = "AMDGPU"; + let HWEncoding = encoding; +} + +class SI_64 subregs, bits<16> encoding> : RegisterWithSubRegs { + let Namespace = "AMDGPU"; + let SubRegIndices = [low, high]; + let HWEncoding = encoding; +} + +class SGPR_32 num, string name> : SIReg; + +class VGPR_32 num, string name> : SIReg; + +// Special Registers +def VCC : SIReg<"VCC", 106>; +def EXEC_LO : SIReg <"EXEC LO", 126>; +def EXEC_HI : SIReg <"EXEC HI", 127>; +def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; +def SCC : SIReg<"SCC", 253>; +def SREG_LIT_0 : SIReg <"S LIT 0", 128>; +def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>; +def M0 : SIReg <"M0", 124>; + +//Interpolation registers +def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; +def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; +def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; +def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">; +def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">; +def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">; +def PERSP_I_W : SIReg <"PERSP_I_W">; +def PERSP_J_W : SIReg <"PERSP_J_W">; +def PERSP_1_W : SIReg <"PERSP_1_W">; +def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">; +def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">; +def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">; +def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">; +def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">; +def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">; +def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">; +def POS_X_FLOAT : SIReg <"POS_X_FLOAT">; +def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">; +def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">; +def POS_W_FLOAT : SIReg <"POS_W_FLOAT">; +def FRONT_FACE : SIReg <"FRONT_FACE">; +def ANCILLARY : SIReg <"ANCILLARY">; +def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; +def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; + +// SGPR 32-bit registers +foreach Index = 0-103 in { + def SGPR#Index : SGPR_32 ; +} + +def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add (sequence "SGPR%u", 0, 103))>; + +// SGPR 64-bit registers +def SGPR_64 : RegisterTuples<[low, high], + [(add (decimate SGPR_32, 2)), + (add(decimate (rotl SGPR_32, 1), 2))]>; + +// SGPR 128-bit registers +def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], + [(add (decimate SGPR_32, 4)), + (add (decimate (rotl SGPR_32, 1), 4)), + (add (decimate (rotl SGPR_32, 2), 4)), + (add (decimate (rotl SGPR_32, 3), 4))]>; + +// SGPR 256-bit registers +def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], + [(add (decimate SGPR_32, 8)), + (add (decimate (rotl SGPR_32, 1), 8)), + (add (decimate (rotl SGPR_32, 2), 8)), + (add (decimate (rotl SGPR_32, 3), 8)), + (add (decimate (rotl SGPR_32, 4), 8)), + (add (decimate (rotl SGPR_32, 5), 8)), + (add (decimate (rotl SGPR_32, 6), 8)), + (add (decimate (rotl SGPR_32, 7), 8))]>; + +// VGPR 32-bit registers +foreach Index = 0-255 in { + def VGPR#Index : VGPR_32 ; +} + +def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add (sequence "VGPR%u", 0, 255))>; + +// VGPR 64-bit registers +def VGPR_64 : RegisterTuples<[low, high], + [(add (decimate VGPR_32, 2)), + (add (decimate (rotl VGPR_32, 1), 2))]>; + +// VGPR 128-bit registers +def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], + [(add (decimate VGPR_32, 4)), + (add (decimate (rotl VGPR_32, 1), 4)), + (add (decimate (rotl VGPR_32, 2), 4)), + (add (decimate (rotl VGPR_32, 3), 4))]>; + +// Register class for all scalar registers (SGPRs + Special Registers) +def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) +>; + +def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; + +def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>; + +def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; + +def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; + +// Register class for all vector registers (VGPRs + Interploation Registers) +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add VGPR_32, + PERSP_SAMPLE_I, PERSP_SAMPLE_J, + PERSP_CENTER_I, PERSP_CENTER_J, + PERSP_CENTROID_I, PERSP_CENTROID_J, + PERSP_I_W, PERSP_J_W, PERSP_1_W, + LINEAR_SAMPLE_I, LINEAR_SAMPLE_J, + LINEAR_CENTER_I, LINEAR_CENTER_J, + LINEAR_CENTROID_I, LINEAR_CENTROID_J, + LINE_STIPPLE_TEX_COORD, + POS_X_FLOAT, + POS_Y_FLOAT, + POS_Z_FLOAT, + POS_W_FLOAT, + FRONT_FACE, + ANCILLARY, + SAMPLE_COVERAGE, + POS_FIXED_PT + ) +>; + +def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>; + +def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>; + +// AllReg_* - A set of all scalar and vector registers of a given width. +def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>; + +def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>; + +// Special register classes for predicates and the M0 register +def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; +def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; +def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; +def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; + diff --git lib/Target/AMDGPU/SISchedule.td lib/Target/AMDGPU/SISchedule.td new file mode 100644 index 0000000..28b65b8 --- /dev/null +++ lib/Target/AMDGPU/SISchedule.td @@ -0,0 +1,15 @@ +//===-- SISchedule.td - SI Scheduling definitons -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: This is just a place holder for now. +// +//===----------------------------------------------------------------------===// + + +def SI_Itin : ProcessorItineraries <[], [], []>; diff --git test/CodeGen/SI/sanity.ll test/CodeGen/SI/sanity.ll new file mode 100644 index 0000000..62cdcf5 --- /dev/null +++ test/CodeGen/SI/sanity.ll @@ -0,0 +1,37 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +; CHECK: S_ENDPGM + +define void @main() { +main_body: + call void @llvm.AMDGPU.shader.type(i32 1) + %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*) + %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0 + %2 = load <4 x i32> addrspace(2)* %1 + %3 = call i32 @llvm.SI.vs.load.buffer.index() + %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3) + %5 = extractelement <4 x float> %4, i32 0 + %6 = extractelement <4 x float> %4, i32 1 + %7 = extractelement <4 x float> %4, i32 2 + %8 = extractelement <4 x float> %4, i32 3 + %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*) + %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1 + %11 = load <4 x i32> addrspace(2)* %10 + %12 = call i32 @llvm.SI.vs.load.buffer.index() + %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12) + %14 = extractelement <4 x float> %13, i32 0 + %15 = extractelement <4 x float> %13, i32 1 + %16 = extractelement <4 x float> %13, i32 2 + %17 = extractelement <4 x float> %13, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8) + ret void +} + +declare void @llvm.AMDGPU.shader.type(i32) + +declare i32 @llvm.SI.vs.load.buffer.index() readnone + +declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32) + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)