Skip to content

Commit 8c6120d

Browse files
committed
[AIE2P] revert subreg coalescing block
1 parent 5b4a5ee commit 8c6120d

File tree

7 files changed

+83
-128
lines changed

7 files changed

+83
-128
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -618,34 +618,3 @@ bool AIE2PRegisterInfo::isFifoPhysReg(const Register Reg) const {
618618
return Reg.isPhysical() && (AIE2P::FIFO512RegClass.contains(Reg) ||
619619
AIE2P::FIFO1024RegClass.contains(Reg));
620620
}
621-
622-
bool AIE2PRegisterInfo::shouldCoalesce(
623-
MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
624-
const TargetRegisterClass *DstRC, unsigned DstSubReg,
625-
const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
626-
627-
const unsigned SrcSize = getRegSizeInBits(*SrcRC);
628-
const unsigned DstSize = getRegSizeInBits(*DstRC);
629-
MachineFunction *MF = MI->getMF();
630-
const AIEBaseInstrInfo *TII =
631-
static_cast<const AIEBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
632-
const unsigned BasicVectorSize = TII->getBasicVecRegSize();
633-
// Should not coalesce if copying from bigger source.
634-
if (!EnableCoalescingForWideCopy && SrcSize < DstSize &&
635-
(SrcSize >= BasicVectorSize || DstSize >= BasicVectorSize)) {
636-
MachineBasicBlock *MBB = MI->getParent();
637-
LiveInterval &LI = LIS.getInterval(MI->getOperand(1).getReg());
638-
const MachineInstr *FirstMI =
639-
LI.empty() ? nullptr : LIS.getInstructionFromIndex(LI.beginIndex());
640-
const MachineInstr *LastMI =
641-
LI.empty() ? nullptr : LIS.getInstructionFromIndex(LI.endIndex());
642-
// Coalescing inside the same basic block found beneficial. So, check that
643-
// the LiveInterval is not just local to MBB.
644-
if (!FirstMI || FirstMI->getParent() != MBB || !LastMI ||
645-
LastMI->getParent() != MBB)
646-
return false;
647-
}
648-
649-
return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg,
650-
NewRC, LIS);
651-
}

llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,6 @@ struct AIE2PRegisterInfo : public AIE2PGenRegisterInfo {
105105
bool isFifoPhysReg(const Register Reg) const override;
106106

107107
bool isSimplifiableReservedReg(MCRegister PhysReg) const override;
108-
109-
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
110-
unsigned SubReg, const TargetRegisterClass *DstRC,
111-
unsigned DstSubReg, const TargetRegisterClass *NewRC,
112-
LiveIntervals &LIS) const override;
113108
};
114109
} // namespace llvm
115110

llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,56 +17,54 @@
1717
define void @gelu_fn(ptr noalias %ifm, ptr noalias %ofm, ptr nonnull align 64 dereferenceable(64) %params) {
1818
; CHECK-LABEL: gelu_fn:
1919
; CHECK: // %bb.0: // %entry
20-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; nopxm
20+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; nopxm ; nops
2121
; CHECK-NEXT: movxm r0, #16544
2222
; CHECK-NEXT: vbcst.16 x6, r0
2323
; CHECK-NEXT: lda r1, [p2, #0]; movxm r0, #17280
2424
; CHECK-NEXT: mova r0, #60; vbcst.16 x2, r0
25-
; CHECK-NEXT: vadd.f dm3, dm1, dm0, r0
25+
; CHECK-NEXT: vadd.f dm1, dm1, dm0, r0
2626
; CHECK-NEXT: vconv.fp32.bf16 cml0, x6
2727
; CHECK-NEXT: nop
28+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml2, [p0], #64; movxm r2, #15821
29+
; CHECK-NEXT: movx r4, #1
30+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; movx r2, #255; vbcst.16 x4, r2
31+
; CHECK-NEXT: vconv.bf16.fp32 x8, cml1; lshl r2, r1, r4; vbcst.16 x0, r2
32+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml2, [p0], #64; movx r2, #828; mov m0, r2; vadd.f dm2, dm2, dm0, r0
33+
; CHECK-NEXT: vmul.f dm3, x8, x2, r2
34+
; CHECK-NEXT: vadd.f dm1, dm1, dm0, r0
35+
; CHECK-NEXT: nop
36+
; CHECK-NEXT: vadd.f dm2, dm2, dm0, r0
37+
; CHECK-NEXT: nop
38+
; CHECK-NEXT: vconv.bf16.fp32 x10, cml2
39+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vconv.bf16.fp32 x8, cml3
40+
; CHECK-NEXT: vconv.bf16.fp32 x1, cml1; vmul.f dm3, x10, x2, r2
41+
; CHECK-NEXT: vmul.f dm4, x8, x4, r2
42+
; CHECK-NEXT: vconv.bf16.fp32 x7, cml2; vmul.f dm3, x1, x2, r2
43+
; CHECK-NEXT: vadd.f dm1, dm1, dm0, r0
44+
; CHECK-NEXT: vmul.f dm3, x7, x2, r2
2845
; CHECK-NEXT: vlda.conv.fp32.bf16 cml2, [p0], #64
29-
; CHECK-NEXT: movxm r2, #15821
30-
; CHECK-NEXT: mova r2, #255; movx r4, #1; vbcst.16 x4, r2
31-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vconv.bf16.fp32 x8, cml3; lshl r2, r1, r4; vbcst.16 x0, r2
32-
; CHECK-NEXT: mova r2, #828; mov m0, r2; vadd.f dm3, dm2, dm0, r0
33-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml2, [p0], #64; vmul.f dm2, x8, x2, r2
34-
; CHECK-NEXT: nop
35-
; CHECK-NEXT: vadd.f dm3, dm1, dm0, r0
36-
; CHECK-NEXT: nop
37-
; CHECK-NEXT: vadd.f dm3, dm2, dm0, r0
3846
; CHECK-NEXT: vconv.bf16.fp32 x10, cml3
39-
; CHECK-NEXT: vconv.bf16.fp32 x8, cml2
40-
; CHECK-NEXT: vmul.f dm1, x10, x2, r2
41-
; CHECK-NEXT: vconv.bf16.fp32 x1, cml3
42-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vmul.f dm4, x8, x4, r2
43-
; CHECK-NEXT: vconv.bf16.fp32 x7, cml3; vmul.f dm2, x1, x2, r2
44-
; CHECK-NEXT: nop
45-
; CHECK-NEXT: vmul.f dm3, x7, x2, r2
46-
; CHECK-NEXT: vconv.bf16.fp32 x10, cml1; vadd.f dm1, dm1, dm0, r0
47-
; CHECK-NEXT: nop
48-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml2, [p0], #64; vconv.bf16.fp32 x8, cml4; movx r3, #0; vmul.f dm4, x10, x4, r2
49-
; CHECK-NEXT: vconv.bf16.fp32 x5, cml2; mov s0, r3
50-
; CHECK-NEXT: vfloor.s32.bf16 x1, wl8, s0
51-
; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; vmul.f dm4, x5, x4, r2
52-
; CHECK-NEXT: vconv.bf16.fp32 x7, cml1; movxm ls, #.LBB0_1; vadd.f dm2, dm2, dm0, r0
53-
; CHECK-NEXT: mova r4, #-5; nopb ; vfloor.s32.bf16 x3, wh8, s0; movxm le, #.L_LEnd0; vmul.f dm3, x5, x4, r2
54-
; CHECK-NEXT: mova r1, #2; nopb ; vconv.bf16.fp32 x10, cml4; lshl r4, r1, r4; vbcst.16 x6, r3; vmul.f dm4, x7, x2, r2
55-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vshuffle x1, x1, x3, r1
56-
; CHECK-NEXT: vfloor.s32.bf16 x9, wl10, s0; vmin_ge.16 x3, r16, x1, x0, vaddsign1
57-
; CHECK-NEXT: vfloor.s32.bf16 x3, wh10, s0; add.nc lc, r4, #-7
58-
; CHECK-NEXT: nopa ; nopb ; vconv.bf16.fp32 x8, cml4; nopx ; vmax_lt.16 x11, r16, x3, x6, vaddsign1; nopv
59-
; CHECK-NEXT: padda [p1], m0; nopb ; nops ; nopxm ; nopv
47+
; CHECK-NEXT: vconv.bf16.fp32 x8, cml4
48+
; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; vmul.f dm4, x10, x4, r2
49+
; CHECK-NEXT: vconv.bf16.fp32 x7, cml1; vadd.f dm2, dm2, dm0, r0
50+
; CHECK-NEXT: mova r3, #0; vconv.bf16.fp32 x5, cml3; vmul.f dm4, x5, x4, r2
51+
; CHECK-NEXT: mov s0, r3; vmul.f dm3, x7, x2, r2
52+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; vfloor.s32.bf16 x1, wl8, s0; movxm ls, #.LBB0_1; vmul.f dm4, x5, x4, r2
53+
; CHECK-NEXT: mova r4, #-5; vfloor.s32.bf16 x3, wh8, s0; movxm le, #.L_LEnd0
54+
; CHECK-NEXT: vconv.bf16.fp32 x10, cml4; lshl r4, r1, r4; vbcst.16 x6, r3
55+
; CHECK-NEXT: mova r1, #2; add.nc lc, r4, #-7
56+
; CHECK-NEXT: nopa ; nopb ; vfloor.s32.bf16 x9, wl10, s0; nopx ; vshuffle x1, x1, x3, r1; nopv
57+
; CHECK-NEXT: nopa ; nopb ; vconv.bf16.fp32 x8, cml4; nopx ; vmin_ge.16 x3, r16, x1, x0, vaddsign1; nopv
58+
; CHECK-NEXT: padda [p1], m0; nopb ; vfloor.s32.bf16 x3, wh10, s0; nopx ; vmax_lt.16 x11, r16, x3, x6, vaddsign1; nopv
6059
; CHECK-NEXT: .LBB0_1: // %for.body
6160
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
62-
; CHECK-NEXT: nopa ; nopb ; vconv.bf16.fp32 x10, cml2; nopxm ; nopv
63-
; CHECK-NEXT: nopa ; nopb ; nops ; nopxm ; vadd.f dm2, dm4, dm0, r0
64-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; vconv.bf16.fp32 x7, cml4; nopx ; vmov cml4, cml1; vmul.f dm4, x10, x2, r2
65-
; CHECK-NEXT: nopa ; nopb ; vst x11, [p1], #64; nopx ; vshuffle x1, x9, x3, r1; nopv
61+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; vconv.bf16.fp32 x7, cml2; nopxm ; vadd.f dm2, dm2, dm0, r0
62+
; CHECK-NEXT: nopa ; nopb ; vst x11, [p1], #64; nopx ; vmov cml2, cml1; nopv
63+
; CHECK-NEXT: nopa ; nopb ; vconv.bf16.fp32 x10, cml3; nopx ; vshuffle x1, x9, x3, r1; vmul.f dm3, x7, x2, r2
6664
; CHECK-NEXT: vfloor.s32.bf16 x3, wh8, s0; vmin_ge.16 x5, r16, x1, x0, vaddsign1
6765
; CHECK-NEXT: vfloor.s32.bf16 x9, wl8, s0; vmax_lt.16 x11, r16, x5, x6, vaddsign1
6866
; CHECK-NEXT: .L_LEnd0:
69-
; CHECK-NEXT: nopa ; nopb ; vconv.bf16.fp32 x8, cml3; nopxm ; vmul.f dm3, x7, x4, r2
67+
; CHECK-NEXT: nopa ; nopb ; vconv.bf16.fp32 x8, cml4; nopxm ; vmul.f dm4, x10, x4, r2
7068
; CHECK-NEXT: // %bb.2:
7169
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vshuffle x10, x9, x3, r1; nopv
7270
; CHECK-NEXT: vmin_ge.16 x10, r16, x10, x0, vaddsign1
@@ -79,15 +77,15 @@ define void @gelu_fn(ptr noalias %ifm, ptr noalias %ofm, ptr nonnull align 64 de
7977
; CHECK-NEXT: vshuffle x8, x10, x8, r1
8078
; CHECK-NEXT: vmin_ge.16 x8, r16, x8, x0, vaddsign1
8179
; CHECK-NEXT: vmax_lt.16 x8, r16, x8, x6, vaddsign1
82-
; CHECK-NEXT: vconv.bf16.fp32 x8, cml3
80+
; CHECK-NEXT: vconv.bf16.fp32 x8, cml4
8381
; CHECK-NEXT: vst x8, [p1], #64
8482
; CHECK-NEXT: vfloor.s32.bf16 x10, wl8, s0
8583
; CHECK-NEXT: vfloor.s32.bf16 x8, wh8, s0
8684
; CHECK-NEXT: nop
8785
; CHECK-NEXT: vshuffle x8, x10, x8, r1
8886
; CHECK-NEXT: vmin_ge.16 x8, r16, x8, x0, vaddsign1
8987
; CHECK-NEXT: vmax_lt.16 x8, r16, x8, x6, vaddsign1
90-
; CHECK-NEXT: vconv.bf16.fp32 x8, cml4
88+
; CHECK-NEXT: vconv.bf16.fp32 x8, cml3
9189
; CHECK-NEXT: vst x8, [p1], #64
9290
; CHECK-NEXT: vmul.f dm3, x8, x4, r2
9391
; CHECK-NEXT: nop

llvm/test/CodeGen/AIE/aie2p/end-to-end/sigmoid_int8_1.ll

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,47 +12,46 @@ define void @sigmoid_int8_1() {
1212
; CHECK-LABEL: sigmoid_int8_1:
1313
; CHECK: // %bb.0: // %entry
1414
; CHECK-NEXT: nopa ; nopb ; nops ; movxm ls, #.LBB0_1; nopv
15-
; CHECK-NEXT: movxm le, #.L_LEnd0
1615
; CHECK-NEXT: mova p0, #0; mov crunpacksize, #1
17-
; CHECK-NEXT: mova r0, #0; vldb.unpack x3, unpacksign0, [p0, #0]; mov crsrsmode, #0
16+
; CHECK-NEXT: vldb.unpack x1, unpacksign0, [p0, #0]; movxm le, #.L_LEnd0
17+
; CHECK-NEXT: mova r0, #0; mov crsrsmode, #0
1818
; CHECK-NEXT: vbcst.32 x4, r0
1919
; CHECK-NEXT: mova r1, #1; add.nc lc, r0, #-3
2020
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vbcst.16 x0, r1; nopv
21-
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vbcst.16 x2, r0; nopv
22-
; CHECK-NEXT: nopa ; vldb.unpack x3, unpacksign0, [p0, #0]; nops ; nopx ; vmov x7, x2; vclr dm0
23-
; CHECK-NEXT: nopx ; vmov x6, x2
24-
; CHECK-NEXT: vmin_ge.16 x9, r16, x3, x0, vaddsign0
25-
; CHECK-NEXT: vmax_lt.16 x8, r16, x9, x2, vaddsign0
26-
; CHECK-NEXT: vmov x9, x8
27-
; CHECK-NEXT: vldb.unpack x3, unpacksign0, [p0, #0]; mov s0, r0
28-
; CHECK-NEXT: vmov x5, x4; vmac dm3, dm0, y4, y3,r0
29-
; CHECK-NEXT: vmin_ge.16 x9, r16, x3, x0, vaddsign0
30-
; CHECK-NEXT: vmax_lt.16 x8, r16, x9, x2, vaddsign0; vmsc dm4, dm3, y2, y4,r0
31-
; CHECK-NEXT: vmov x9, x8
21+
; CHECK-NEXT: nopa ; vldb.unpack x1, unpacksign0, [p0, #0]; nops ; nopx ; vbcst.16 x2, r0; vclr dm0
22+
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vmov x3, x2; nopv
23+
; CHECK-NEXT: nopa ; nopx ; vmin_ge.16 x7, r16, x1, x0, vaddsign0
24+
; CHECK-NEXT: vmax_lt.16 x6, r16, x7, x2, vaddsign0
25+
; CHECK-NEXT: vmov x7, x6
26+
; CHECK-NEXT: vldb.unpack x1, unpacksign0, [p0, #0]; mov s0, r0
27+
; CHECK-NEXT: vmov x5, x4; vmac dm3, dm0, y3, y1,r0
28+
; CHECK-NEXT: vmin_ge.16 x7, r16, x1, x0, vaddsign0
29+
; CHECK-NEXT: vmax_lt.16 x6, r16, x7, x2, vaddsign0; vmsc dm4, dm3, y2, y3,r0
30+
; CHECK-NEXT: vmov x7, x6
3231
; CHECK-NEXT: .LBB0_1: // %for.body
3332
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
34-
; CHECK-NEXT: nopa ; vldb.unpack x3, unpacksign0, [p0, #0]; nops ; nopxm ; nopv
35-
; CHECK-NEXT: vmac dm3, dm0, y4, y3,r0
36-
; CHECK-NEXT: vmin_ge.16 x9, r16, x3, x0, vaddsign0
37-
; CHECK-NEXT: vmax_lt.16 x8, r16, x9, x2, vaddsign0; vmsc dm4, dm3, y2, y4,r0
33+
; CHECK-NEXT: nopa ; vldb.unpack x1, unpacksign0, [p0, #0]; nops ; nopxm ; nopv
34+
; CHECK-NEXT: vmac dm3, dm0, y3, y1,r0
35+
; CHECK-NEXT: vmin_ge.16 x7, r16, x1, x0, vaddsign0
36+
; CHECK-NEXT: vmax_lt.16 x6, r16, x7, x2, vaddsign0; vmsc dm4, dm3, y2, y3,r0
3837
; CHECK-NEXT: .L_LEnd0:
39-
; CHECK-NEXT: nopa ; nopb ; vsrs.4x wh11, cml4, s0, srssign0; nopx ; vmov x9, x8; nopv
38+
; CHECK-NEXT: nopa ; nopb ; vsrs.4x wh9, cml4, s0, srssign0; nopx ; vmov x7, x6; nopv
4039
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
4140
; CHECK-NEXT: nopa ; nopb ; nopxm ; nops
42-
; CHECK-NEXT: vmac dm3, dm0, y4, y3,r0
43-
; CHECK-NEXT: vmin_ge.16 x9, r16, x3, x0, vaddsign0
44-
; CHECK-NEXT: vmax_lt.16 x8, r16, x9, x2, vaddsign0; vmsc dm4, dm3, y2, y4,r0
45-
; CHECK-NEXT: vsrs.4x wh11, cml4, s0, srssign0; vmov x9, x8
41+
; CHECK-NEXT: vmac dm3, dm0, y3, y1,r0
42+
; CHECK-NEXT: vmin_ge.16 x7, r16, x1, x0, vaddsign0
43+
; CHECK-NEXT: vmax_lt.16 x6, r16, x7, x2, vaddsign0; vmsc dm4, dm3, y2, y3,r0
44+
; CHECK-NEXT: vsrs.4x wh9, cml4, s0, srssign0; vmov x7, x6
4645
; CHECK-NEXT: nop
47-
; CHECK-NEXT: vmac dm3, dm0, y4, y3,r0
46+
; CHECK-NEXT: vmac dm3, dm0, y3, y1,r0
4847
; CHECK-NEXT: nop
49-
; CHECK-NEXT: vmsc dm4, dm3, y2, y4,r0
50-
; CHECK-NEXT: vsrs.4x wh11, cml4, s0, srssign0
48+
; CHECK-NEXT: vmsc dm4, dm3, y2, y3,r0
49+
; CHECK-NEXT: vsrs.4x wh9, cml4, s0, srssign0
5150
; CHECK-NEXT: nop
5251
; CHECK-NEXT: nop
5352
; CHECK-NEXT: nop
5453
; CHECK-NEXT: nop
55-
; CHECK-NEXT: vsrs.4x wh11, cml4, s0, srssign0
54+
; CHECK-NEXT: vsrs.4x wh9, cml4, s0, srssign0
5655
; CHECK-NEXT: ret lr
5756
; CHECK-NEXT: nop // Delay Slot 5
5857
; CHECK-NEXT: nop // Delay Slot 4

llvm/test/CodeGen/AIE/aie2p/ra/coalesce-widen-copy.mir

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,13 @@ body: |
1919
; CHECK-NEXT: {{ $}}
2020
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 7
2121
; CHECK-NEXT: [[VBCST_16_:%[0-9]+]]:vec512 = VBCST_16 [[MOV_RLC_imm11_pseudo]]
22-
; CHECK-NEXT: [[VCONV_fp32_bf16_mv_ups_xbf:%[0-9]+]]:ecml = VCONV_fp32_bf16_mv_ups_xbf [[VBCST_16_]]
22+
; CHECK-NEXT: undef [[VCONV_fp32_bf16_mv_ups_xbf:%[0-9]+]].sub_1024_acc_lo:acc2048 = VCONV_fp32_bf16_mv_ups_xbf [[VBCST_16_]]
2323
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0
2424
; CHECK-NEXT: {{ $}}
2525
; CHECK-NEXT: bb.1:
2626
; CHECK-NEXT: successors: %bb.1(0x80000000)
2727
; CHECK-NEXT: {{ $}}
28-
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_1024_acc_lo:acc2048 = COPY [[VCONV_fp32_bf16_mv_ups_xbf]]
29-
; CHECK-NEXT: dead [[VADD_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_vmac_cm2_add_reg [[COPY]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
28+
; CHECK-NEXT: dead [[VADD_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_vmac_cm2_add_reg [[VCONV_fp32_bf16_mv_ups_xbf]], [[VCONV_fp32_bf16_mv_ups_xbf]], [[MOV_RLC_imm11_pseudo1]]
3029
; CHECK-NEXT: PseudoRET implicit $lr
3130
bb.0.entry:
3231
successors: %bb.1
@@ -81,14 +80,13 @@ body: |
8180
; CHECK-NEXT: {{ $}}
8281
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 7
8382
; CHECK-NEXT: [[VBCST_16_:%[0-9]+]]:vec512 = VBCST_16 [[MOV_RLC_imm11_pseudo]]
84-
; CHECK-NEXT: [[VCONV_fp32_bf16_mv_ups_xbf:%[0-9]+]]:ecml = VCONV_fp32_bf16_mv_ups_xbf [[VBCST_16_]]
83+
; CHECK-NEXT: undef [[VCONV_fp32_bf16_mv_ups_xbf:%[0-9]+]].sub_1024_acc_lo:acc2048 = VCONV_fp32_bf16_mv_ups_xbf [[VBCST_16_]]
8584
; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0
8685
; CHECK-NEXT: {{ $}}
8786
; CHECK-NEXT: bb.1:
8887
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
8988
; CHECK-NEXT: {{ $}}
90-
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_1024_acc_lo:acc2048 = COPY [[VCONV_fp32_bf16_mv_ups_xbf]]
91-
; CHECK-NEXT: dead [[VADD_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_vmac_cm2_add_reg [[COPY]], [[COPY]], [[MOV_RLC_imm11_pseudo1]]
89+
; CHECK-NEXT: dead [[VADD_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_vmac_cm2_add_reg [[VCONV_fp32_bf16_mv_ups_xbf]], [[VCONV_fp32_bf16_mv_ups_xbf]], [[MOV_RLC_imm11_pseudo1]]
9290
; CHECK-NEXT: PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
9391
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
9492
; CHECK-NEXT: {{ $}}

llvm/test/CodeGen/AIE/aie2p/ra/reg-coalescing-regression.mir

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ body: |
2222
; CHECK-NEXT: successors: %bb.2(0x80000000)
2323
; CHECK-NEXT: {{ $}}
2424
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:er = IMPLICIT_DEF
25-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:exe = IMPLICIT_DEF
2625
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
2726
; CHECK-NEXT: {{ $}}
2827
; CHECK-NEXT: bb.1:
@@ -37,8 +36,6 @@ body: |
3736
; CHECK-NEXT: bb.3:
3837
; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000)
3938
; CHECK-NEXT: {{ $}}
40-
; CHECK-NEXT: dead undef [[COPY:%[0-9]+]].sub_512_lo:vec1024 = COPY [[DEF1]]
41-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:exe = IMPLICIT_DEF
4239
; CHECK-NEXT: PseudoJZ undef [[DEF]], %bb.4
4340
; CHECK-NEXT: PseudoJ_jump_imm %bb.3
4441
; CHECK-NEXT: {{ $}}

llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-cycle-in-bundle.ll

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0
4545
; FINE-GRAINED-NEXT: .LBB0_1: // %for.body.i
4646
; FINE-GRAINED-NEXT: // =>This Loop Header: Depth=1
4747
; FINE-GRAINED-NEXT: // Child Loop BB0_2 Depth 2
48-
; FINE-GRAINED-NEXT: nopa ; nopb ; nops ; nopx ; mov dn2, r3; nopv
49-
; FINE-GRAINED-NEXT: movs dj2, p6; nopx ; mov dn6, r3
48+
; FINE-GRAINED-NEXT: mov dn2, r3
49+
; FINE-GRAINED-NEXT: movs dj2, p6; mov dn6, r3
5050
; FINE-GRAINED-NEXT: movs dj6, p6; mov m2, m4
5151
; FINE-GRAINED-NEXT: mova p1, #0; movs dc6, r4; mov r25, r18
5252
; FINE-GRAINED-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d2]
@@ -56,18 +56,17 @@ define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0
5656
; FINE-GRAINED-NEXT: movs dn5, r3; vmov lfh1, lfh0
5757
; FINE-GRAINED-NEXT: mova p0, #0; movs dj5, m5; mov dc5, r19
5858
; FINE-GRAINED-NEXT: paddb.3d [p0], d1
59-
; FINE-GRAINED-NEXT: mova p0, #0; mov r19, dc5
59+
; FINE-GRAINED-NEXT: mov r19, dc5
6060
; FINE-GRAINED-NEXT: .LBB0_2: // %for.body125.i
6161
; FINE-GRAINED-NEXT: // Parent Loop BB0_1 Depth=1
6262
; FINE-GRAINED-NEXT: // => This Inner Loop Header: Depth=2
63-
; FINE-GRAINED-NEXT: nopa ; nopb ; nopx ; mov dc6, dc0
64-
; FINE-GRAINED-NEXT: mov dn2, r3
65-
; FINE-GRAINED-NEXT: movs dc2, dc0; mov dj2, r0
66-
; FINE-GRAINED-NEXT: movs m2, r8; mov dj6, r13
67-
; FINE-GRAINED-NEXT: movs dn6, r1; mov r25, r18
68-
; FINE-GRAINED-NEXT: movs p1, p0; vmov lfl1, x2
63+
; FINE-GRAINED-NEXT: nopa ; nopb ; movs dc6, dc0; nopx ; mov dc2, dc0; nopv
64+
; FINE-GRAINED-NEXT: movs dj2, r0; mov dn2, r3
65+
; FINE-GRAINED-NEXT: movs m2, r8; mov dn6, r1
66+
; FINE-GRAINED-NEXT: mova p1, #0; movs dj6, r13; mov r25, r18
67+
; FINE-GRAINED-NEXT: vldb.pop.576.3d ex4, [p1, lf1, r25, d2]; vmov lfl1, x2
6968
; FINE-GRAINED-NEXT: .L_LEnd0:
70-
; FINE-GRAINED-NEXT: nopa ; vldb.pop.576.3d ex4, [p1, lf1, r25, d2]; nops ; nopx ; vmov lfh1, x3; nopv
69+
; FINE-GRAINED-NEXT: nopa ; nopb ; nops ; nopx ; vmov lfh1, x3; nopv
7170
; FINE-GRAINED-NEXT: // %bb.3: // %for.cond.cleanup124.i
7271
; FINE-GRAINED-NEXT: // in Loop: Header=BB0_1 Depth=1
7372
; FINE-GRAINED-NEXT: nopa ; nopb ; nops ; nopx ; mov m0, m5; nopv
@@ -167,7 +166,7 @@ define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0
167166
; COARSE-GRAINED-NEXT: .LBB0_1: // %for.body.i
168167
; COARSE-GRAINED-NEXT: // =>This Loop Header: Depth=1
169168
; COARSE-GRAINED-NEXT: // Child Loop BB0_2 Depth 2
170-
; COARSE-GRAINED-NEXT: lda m0, [sp, #-344]; nopb ; nopx // 4-byte Folded Reload
169+
; COARSE-GRAINED-NEXT: lda m0, [sp, #-344]; nopxm // 4-byte Folded Reload
171170
; COARSE-GRAINED-NEXT: lda dc0, [sp, #-332] // 4-byte Folded Reload
172171
; COARSE-GRAINED-NEXT: lda dj4, [sp, #-320] // 4-byte Folded Reload
173172
; COARSE-GRAINED-NEXT: nop
@@ -195,16 +194,16 @@ define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0
195194
; COARSE-GRAINED-NEXT: mova p0, #0; st dn5, [sp, #-228] // 4-byte Folded Spill
196195
; COARSE-GRAINED-NEXT: paddb.3d [p0], d1; st dj5, [sp, #-224] // 4-byte Folded Spill
197196
; COARSE-GRAINED-NEXT: st dc1, [sp, #-236] // 4-byte Folded Spill
198-
; COARSE-GRAINED-NEXT: mova p0, #0; st dc5, [sp, #-220] // 4-byte Folded Spill
197+
; COARSE-GRAINED-NEXT: st dc5, [sp, #-220] // 4-byte Folded Spill
199198
; COARSE-GRAINED-NEXT: .LBB0_2: // %for.body125.i
200199
; COARSE-GRAINED-NEXT: // Parent Loop BB0_1 Depth=1
201200
; COARSE-GRAINED-NEXT: // => This Inner Loop Header: Depth=2
202-
; COARSE-GRAINED-NEXT: nops ; mov dn1, dn3
203-
; COARSE-GRAINED-NEXT: movs m1, m3; mov dj1, dj3
204-
; COARSE-GRAINED-NEXT: movs dc1, dc3; mov dn5, dn7
205-
; COARSE-GRAINED-NEXT: movs m5, m7; mov dc5, dc7
206-
; COARSE-GRAINED-NEXT: movs dj5, dj7; mov r25, r3
207-
; COARSE-GRAINED-NEXT: movs p1, p0; vmov lfl1, x2
201+
; COARSE-GRAINED-NEXT: nopa ; nopx ; mov m1, m3
202+
; COARSE-GRAINED-NEXT: mov dj1, dj3
203+
; COARSE-GRAINED-NEXT: movs dn1, dn3; mov dc1, dc3
204+
; COARSE-GRAINED-NEXT: movs m5, m7; mov dj5, dj7
205+
; COARSE-GRAINED-NEXT: movs dn5, dn7; mov r25, r3
206+
; COARSE-GRAINED-NEXT: mova p1, #0; movs dc5, dc7; vmov lfl1, x2
208207
; COARSE-GRAINED-NEXT: .L_LEnd0:
209208
; COARSE-GRAINED-NEXT: nopa ; vldb.pop.576.3d ex4, [p1, lf1, r25, d1]; nops ; nopx ; vmov lfh1, x3; nopv
210209
; COARSE-GRAINED-NEXT: // %bb.3: // %for.cond.cleanup124.i

0 commit comments

Comments
 (0)