From: Balaram Makam Date: Wed, 30 Aug 2017 14:57:12 +0000 (+0000) Subject: Re-land MachineInstr: Reason locally about some memory objects before going to AA. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b3e25cea9cef3be346cd5db31e8e76ba734188aa;p=llvm Re-land MachineInstr: Reason locally about some memory objects before going to AA. Summary: Reverts r311008 to reinstate r310825 with a fix. Refine alias checking for pseudo vs value to be conservative. This fixes the original failure in builtbot unittest SingleSource/UnitTests/2003-07-09-SignedArgs. Reviewers: hfinkel, nemanjai, efriedma Reviewed By: efriedma Subscribers: bjope, mcrosier, nhaehnle, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D36900 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312126 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 8bf1eb9e33d..47926d6cf0e 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1663,6 +1663,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA) { const MachineFunction *MF = getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); // If neither instruction stores to memory, they can't alias in any // meaningful way, even if they read from the same address. @@ -1673,9 +1674,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) return false; - if (!AA) - return true; - // FIXME: Need to handle multiple memory operands to support all targets. if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; @@ -1683,9 +1681,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, MachineMemOperand *MMOa = *memoperands_begin(); MachineMemOperand *MMOb = *Other.memoperands_begin(); - if (!MMOa->getValue() || !MMOb->getValue()) - return true; - // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -1698,22 +1693,53 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // FIXME: Even before we go to AA we can reason locally about some + // Even before we go to AA we can reason locally about some // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + + int64_t MinOffset = std::min(OffsetA, OffsetB); + int64_t WidthA = MMOa->getSize(); + int64_t WidthB = MMOb->getSize(); + const Value *ValA = MMOa->getValue(); + const Value *ValB = MMOb->getValue(); + bool SameVal = (ValA && ValB && (ValA == ValB)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && ValB && !PSVa->mayAlias(&MFI)) + return false; + if (PSVb && ValA && !PSVb->mayAlias(&MFI)) + return false; + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } + + if (SameVal) { + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } + + if (!AA) + return true; + + if (!ValA || !ValB) + return true; + + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); - int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; - int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + int64_t Overlapa = WidthA + OffsetA - MinOffset; + int64_t Overlapb = WidthB + OffsetB - MinOffset; - AliasResult AAResult = - AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + AliasResult AAResult = AA->alias( + MemoryLocation(ValA, Overlapa, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, Overlapb, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); } diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 40ed607b06c..54d38a91c38 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -130,11 +130,11 @@ define void @check_i128_align() { i32 42, i128 %val) ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] -; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16] +; CHECK: stp [[I128HI]], {{x[0-9]+}}, [sp, #24] ; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] -; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] +; CHECK-NONEON: stp [[I128HI]], {{x[0-9]+}}, [sp, #24] ; CHECK: bl check_i128_stackalign call void @check_i128_regalign(i32 0, i128 42) diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 975e5ae8b95..9307b6a3e47 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1531,7 +1531,7 @@ define void @merge_zr64_unalign(<2 x i64>* %p) { ; CHECK-LABEL: merge_zr64_unalign: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN: strb wzr, +; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb diff --git a/test/CodeGen/AMDGPU/call-argument-types.ll b/test/CodeGen/AMDGPU/call-argument-types.ll index 589b333e608..740a74a9d40 100644 --- a/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/test/CodeGen/AMDGPU/call-argument-types.ll @@ -452,15 +452,15 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12 -; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 ; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4 +; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12 -; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 ; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4 +; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 ; GCN-NEXT: s_swappc_b64 ; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200 @@ -487,8 +487,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12 ; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200 -; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 +; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 ; GCN-NEXT: s_swappc_b64 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20 diff --git a/test/CodeGen/AMDGPU/load-global-i16.ll b/test/CodeGen/AMDGPU/load-global-i16.ll index cb2495d5fdc..6d243340395 100644 --- a/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/test/CodeGen/AMDGPU/load-global-i16.ll @@ -179,8 +179,8 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1) ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 @@ -188,8 +188,6 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1) ; TODO: This should use DST, but for some there are redundant MOVs ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal ; EGCM: 16 -; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal -; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in @@ -202,8 +200,8 @@ entry: ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 diff --git a/test/CodeGen/AMDGPU/load-global-i8.ll b/test/CodeGen/AMDGPU/load-global-i8.ll index 3fe6bd26be1..d7ebd46bc3e 100644 --- a/test/CodeGen/AMDGPU/load-global-i8.ll +++ b/test/CodeGen/AMDGPU/load-global-i8.ll @@ -352,22 +352,22 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace( ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 ; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal ; EG-DAG: 8 ; EG-DAG: 8 ; EG-DAG: 8 diff --git a/test/CodeGen/AMDGPU/load-local-i16.ll b/test/CodeGen/AMDGPU/load-local-i16.ll index 7de3f3b28c6..875af807ad4 100644 --- a/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/test/CodeGen/AMDGPU/load-local-i16.ll @@ -530,7 +530,6 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace( ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { %a = load i16, i16 addrspace(3)* %in %ext = zext i16 %a to i64 @@ -572,7 +571,6 @@ define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(3)* %in %ext = zext <1 x i16> %load to <1 x i64> diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll index 39f3292e260..98a89a07af6 100644 --- a/test/CodeGen/ARM/2009-10-27-double-align.ll +++ b/test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,13 +1,15 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC @.str = private constant [1 x i8] zeroinitializer, align 1 define void @g() { entry: ;CHECK: [sp, #8] -;CHECK: [sp, #12] -;CHECK: [sp] +;NOREGALLOC: [sp, #12] +;NOREGALLOC: [sp] +;REGALLOC: [sp] +;REGALLOC: [sp, #12] tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00) ret void } diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index a633c0291c6..6d62fd31f97 100644 --- a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -124,10 +124,10 @@ define void @i56_and_or(i56* %a) { ; BE-LABEL: i56_and_or: ; BE: @ BB#0: ; BE-NEXT: mov r1, r0 -; BE-NEXT: mov r3, #128 +; BE-NEXT: ldr r12, [r0] ; BE-NEXT: ldrh r2, [r1, #4]! +; BE-NEXT: mov r3, #128 ; BE-NEXT: strb r3, [r1, #2] -; BE-NEXT: ldr r12, [r0] ; BE-NEXT: lsl r2, r2, #8 ; BE-NEXT: orr r2, r2, r12, lsl #24 ; BE-NEXT: orr r2, r2, #384 diff --git a/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/test/CodeGen/X86/illegal-bitfield-loadstore.ll index 5425670fbb1..8059e4acbb1 100644 --- a/test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -118,17 +118,17 @@ define void @i56_or(i56* %a) { ; X64: # BB#0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx -; X64-NEXT: movl (%rdi), %edx ; X64-NEXT: movb %cl, 6(%rdi) ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: orq $384, %rdx # imm = 0x180 -; X64-NEXT: movl %edx, (%rdi) -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: movw %dx, 4(%rdi) +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: orq $384, %rax # imm = 0x180 +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: shrq $32, %rax +; X64-NEXT: movw %ax, 4(%rdi) ; X64-NEXT: retq %aa = load i56, i56* %a, align 1 %b = or i56 %aa, 384 @@ -150,19 +150,19 @@ define void @i56_and_or(i56* %a) { ; X64: # BB#0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx -; X64-NEXT: movl (%rdi), %edx ; X64-NEXT: movb %cl, 6(%rdi) ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: orq $384, %rdx # imm = 0x180 -; X64-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80 -; X64-NEXT: andq %rdx, %rax -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: shrq $32, %rax -; X64-NEXT: movw %ax, 4(%rdi) +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: orq $384, %rax # imm = 0x180 +; X64-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80 +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: movl %ecx, (%rdi) +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: movw %cx, 4(%rdi) ; X64-NEXT: retq %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 @@ -188,20 +188,20 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: movzwl 4(%rdi), %ecx ; X64-NEXT: movzbl 6(%rdi), %edx -; X64-NEXT: movl (%rdi), %esi ; X64-NEXT: movb %dl, 6(%rdi) ; X64-NEXT: # kill: %EDX %EDX %RDX %RDX ; X64-NEXT: shll $16, %edx ; X64-NEXT: orl %ecx, %edx ; X64-NEXT: shlq $32, %rdx -; X64-NEXT: orq %rdx, %rsi +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: orq %rdx, %rcx ; X64-NEXT: shlq $13, %rax -; X64-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF -; X64-NEXT: andq %rsi, %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: movw %cx, 4(%rdi) +; X64-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF +; X64-NEXT: andq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx +; X64-NEXT: movl %edx, (%rdi) +; X64-NEXT: shrq $32, %rdx +; X64-NEXT: movw %dx, 4(%rdi) ; X64-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index 7ef61c9a677..bd8f6e91fa3 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -12,23 +12,23 @@ define void @t1(i32 %argc, i8** %argv) nounwind { entry: ; SSE2-Darwin-LABEL: t1: -; SSE2-Darwin: movsd _.str+16, %xmm0 -; SSE2-Darwin: movsd %xmm0, 16(%esp) ; SSE2-Darwin: movaps _.str, %xmm0 ; SSE2-Darwin: movaps %xmm0 +; SSE2-Darwin: movsd _.str+16, %xmm0 +; SSE2-Darwin: movsd %xmm0, 16(%esp) ; SSE2-Darwin: movb $0, 24(%esp) ; SSE2-Mingw32-LABEL: t1: -; SSE2-Mingw32: movsd _.str+16, %xmm0 -; SSE2-Mingw32: movsd %xmm0, 16(%esp) ; SSE2-Mingw32: movaps _.str, %xmm0 ; SSE2-Mingw32: movups %xmm0 +; SSE2-Mingw32: movsd _.str+16, %xmm0 +; SSE2-Mingw32: movsd %xmm0, 16(%esp) ; SSE2-Mingw32: movb $0, 24(%esp) ; SSE1-LABEL: t1: ; SSE1: movaps _.str, %xmm0 -; SSE1: movaps %xmm0 ; SSE1: movb $0, 24(%esp) +; SSE1: movaps %xmm0 ; SSE1: movl $0, 20(%esp) ; SSE1: movl $0, 16(%esp) diff --git a/test/CodeGen/X86/pr34088.ll b/test/CodeGen/X86/pr34088.ll index d3667e3884d..259c7355339 100644 --- a/test/CodeGen/X86/pr34088.ll +++ b/test/CodeGen/X86/pr34088.ll @@ -25,8 +25,8 @@ define i32 @pr34088() local_unnamed_addr { ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movaps %xmm0, (%esp) ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movaps %xmm1, (%esp) ; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD +; CHECK-NEXT: movaps %xmm1, (%esp) ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index 34a2d2203f4..8c5c0edd111 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -349,8 +349,8 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) ; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; ATOM-NEXT: paddd %xmm2, %xmm0 ; ATOM-NEXT: paddd %xmm2, %xmm1 -; ATOM-NEXT: movq %xmm1, 16(%rsi) ; ATOM-NEXT: movdqa %xmm0, (%rsi) +; ATOM-NEXT: movq %xmm1, 16(%rsi) ; ATOM-NEXT: retq ; ATOM-NEXT: ## -- End function ; diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll index e363a82a2b9..d53e8285922 100644 --- a/test/CodeGen/X86/widen_arith-3.ll +++ b/test/CodeGen/X86/widen_arith-3.ll @@ -16,9 +16,9 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl {{\.LCPI.*}}, %eax ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) -; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # %forbody