From e7cfaf6ba1612cb27dd73aaee434d04317eeedf5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 3 Jul 2019 00:30:47 +0000 Subject: [PATCH] CodeGen: Set hasSideEffects = 0 on BUNDLE The BUNDLE itself should not have side effects, and this is a property of instructions inside the bundle. The hasProperty check already searches for any member instructions, which was pointless since it was overridden by this bit. Allows me to distinguish bundles that have side effects vs. do not in a future patch. Also fixes an unnecessary scheduling barrier in the bundle AMDGPU uses to get PC relative addresses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364984 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/Target.td | 2 +- test/CodeGen/AMDGPU/byval-frame-setup.ll | 39 +++++++++++-------- test/CodeGen/AMDGPU/call-argument-types.ll | 17 ++++---- .../AMDGPU/call-preserved-registers.ll | 23 +++++------ test/CodeGen/AMDGPU/call-waitcnt.ll | 8 ++-- test/CodeGen/AMDGPU/sibling-call.ll | 9 ++--- test/CodeGen/ARM/Windows/tls.ll | 21 ++++------ 7 files changed, 60 insertions(+), 59 deletions(-) diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index f090d798639..43b2a2a2858 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -1070,7 +1070,7 @@ def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "BUNDLE"; - let hasSideEffects = 1; + let hasSideEffects = 0; } def LIFETIME_START : StandardPseudoInstruction { let OutOperandList = (outs); diff --git a/test/CodeGen/AMDGPU/byval-frame-setup.ll b/test/CodeGen/AMDGPU/byval-frame-setup.ll index 60694ba1f2f..c272cba6e6f 100644 --- a/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -171,20 +171,22 @@ entry: ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: ; GCN: s_mov_b32 s33, s7 -; GCN: s_add_u32 s32, s33, 0xc00{{$}} +; GCN-NOT: s_add_u32 s32, s32, 0x800 -; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 -; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 -; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 +; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 ; GCN-NOT: s_add_u32 s32, s32, 0x800 - ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 +; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 +; GCN: s_getpc_b64 + ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 @@ -249,24 +251,27 @@ entry: ; Make sure the byval alignment is respected in the call frame setup ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel: ; GCN: s_mov_b32 s33, s7 -; GCN: s_add_u32 s32, s33, 0xc00{{$}} +; GCN-NOT: s_add_u32 s32, s32, 0x800 -; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 -; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 -; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 +; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 + ; GCN-NOT: s_add_u32 s32, s32, 0x800 -; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 -; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 -; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 -; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 +; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 +; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 +; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} +; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 +; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 + +; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 +; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 +; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 -; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 -; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 diff --git a/test/CodeGen/AMDGPU/call-argument-types.ll b/test/CodeGen/AMDGPU/call-argument-types.ll index 4c8791303b7..0e07a3419fa 100644 --- a/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/test/CodeGen/AMDGPU/call-argument-types.ll @@ -83,14 +83,14 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; HSA: buffer_load_ubyte [[VAR:v[0-9]+]] ; HSA: s_mov_b32 s32, s33 +; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]] +; MESA-DAG: s_mov_b32 s32, s33{{$}} + ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+4 -; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]] -; MESA-DAG: s_mov_b32 s32, s33{{$}} - ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} @@ -108,12 +108,13 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; HSA: buffer_load_ubyte v0 ; HSA-DAG: s_mov_b32 s32, s33{{$}} +; MESA: buffer_load_ubyte v0 +; MESA-DAG: s_mov_b32 s32, s33{{$}} + ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+4 -; MESA: buffer_load_ubyte v0 -; MESA-DAG: s_mov_b32 s32, s33{{$}} ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 @@ -770,9 +771,11 @@ entry: ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20 + +; GCN: s_getpc_b64 + ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} -; GCN: s_getpc_b64 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-NOT: s32 @@ -790,9 +793,9 @@ entry: ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:4 +; GCN: s_getpc_b64 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GCN: s_getpc_b64 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-NOT: s32 diff --git a/test/CodeGen/AMDGPU/call-preserved-registers.ll b/test/CodeGen/AMDGPU/call-preserved-registers.ll index 05af715eb78..da3accea969 100644 --- a/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -130,12 +130,12 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: ; GCN: s_mov_b32 s33, s9 ; GCN: s_mov_b32 s32, s33 -; GCN: #ASMSTART -; GCN-NEXT: ; def s33 -; GCN-NEXT: #ASMEND ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4 +; GCN: #ASMSTART +; GCN-NEXT: ; def s33 +; GCN-NEXT: #ASMEND ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN: ;;#ASMSTART ; GCN-NEXT: ; use s33 @@ -152,16 +152,17 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace( ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: ; GCN: s_mov_b32 s33, s9 ; GCN-NOT: s34 -; GCN: ;;#ASMSTART -; GCN-NEXT: ; def s34 -; GCN-NEXT: ;;#ASMEND - ; GCN-NOT: s34 ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4 +; GCN-NOT: s34 +; GCN: ;;#ASMSTART +; GCN-NEXT: ; def s34 +; GCN-NEXT: ;;#ASMEND + ; GCN-NOT: s34 ; GCN: s_swappc_b64 s[30:31], s[4:5] @@ -181,10 +182,6 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s34(i32 addrspace( ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32: ; GCN: s_mov_b32 s33, s9 -; GCN: ;;#ASMSTART -; GCN-NEXT: ; def v32 -; GCN-NEXT: ;;#ASMEND - ; GCN-NOT: v32 ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -192,6 +189,10 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s34(i32 addrspace( ; GCN-NOT: v32 ; GCN-DAG: s_mov_b32 s32, s33 +; GCN: ;;#ASMSTART +; GCN-NEXT: ; def v32 +; GCN-NEXT: ;;#ASMEND + ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN-NOT: v32 diff --git a/test/CodeGen/AMDGPU/call-waitcnt.ll b/test/CodeGen/AMDGPU/call-waitcnt.ll index 58fb6ead925..37c656d4b68 100644 --- a/test/CodeGen/AMDGPU/call-waitcnt.ll +++ b/test/CodeGen/AMDGPU/call-waitcnt.ll @@ -30,16 +30,16 @@ define amdgpu_kernel void @call_memory_no_dep(i32 addrspace(1)* %ptr, i32) #0 { ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GCN-NEXT: s_mov_b32 s33, s9 ; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+4 -; GCN-NEXT: global_store_dword v[0:1], v2, off -; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_endpgm @@ -135,10 +135,10 @@ define void @tail_call_memory_arg_load(i32 addrspace(3)* %ptr, i32) #0 { ; GCN-LABEL: tail_call_memory_arg_load: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: ds_read_b32 v0, v0 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+4 -; GCN-NEXT: ds_read_b32 v0, v0 ; GCN-NEXT: s_setpc_b64 s[6:7] %vgpr = load volatile i32, i32 addrspace(3)* %ptr tail call void @func(i32 %vgpr) diff --git a/test/CodeGen/AMDGPU/sibling-call.ll b/test/CodeGen/AMDGPU/sibling-call.ll index e156a952a30..220a8e9e0e5 100644 --- a/test/CodeGen/AMDGPU/sibling-call.ll +++ b/test/CodeGen/AMDGPU/sibling-call.ll @@ -208,24 +208,23 @@ entry: ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:8 ; GCN-NEXT: s_mov_b64 exec +; GCN-DAG: s_getpc_b64 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill ; GCN: buffer_store_dword v33, off, s[0:3], s5 ; 4-byte Folded Spill ; GCN-DAG: v_writelane_b32 v34, s34, 0 ; GCN-DAG: v_writelane_b32 v34, s35, 1 -; GCN-DAG: s_getpc_b64 ; GCN: s_swappc_b64 -; GCN: s_getpc_b64 s[6:7] -; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 -; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 - ; GCN-DAG: v_readlane_b32 s34, v34, 0 ; GCN-DAG: v_readlane_b32 s35, v34, 1 ; GCN: buffer_load_dword v33, off, s[0:3], s5 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload +; GCN: s_getpc_b64 s[6:7] +; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 +; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:8 ; GCN-NEXT: s_mov_b64 exec diff --git a/test/CodeGen/ARM/Windows/tls.ll b/test/CodeGen/ARM/Windows/tls.ll index 2c38ad3e58f..931621a5d39 100644 --- a/test/CodeGen/ARM/Windows/tls.ll +++ b/test/CodeGen/ARM/Windows/tls.ll @@ -15,11 +15,10 @@ define i32 @f() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -36,11 +35,10 @@ define i32 @e() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -57,11 +55,10 @@ define i32 @d() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -78,11 +75,10 @@ define i32 @c() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -99,11 +95,10 @@ define i32 @b() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -120,11 +115,10 @@ define i16 @a() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -141,11 +135,10 @@ define i8 @Z() { ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] -- 2.50.1