ret void
}
+define i128 @load_i128(i128* %ptr) {
+; CHECK-O0-LABEL: load_i128:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rbx
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT: .cfi_offset %rbx, -16
+; CHECK-O0-NEXT: xorl %eax, %eax
+; CHECK-O0-NEXT: movl %eax, %ecx
+; CHECK-O0-NEXT: movq %rcx, %rax
+; CHECK-O0-NEXT: movq %rcx, %rdx
+; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; CHECK-O0-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-O0-NEXT: popq %rbx
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i128:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rbx
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
+; CHECK-O3-NEXT: .cfi_offset %rbx, -16
+; CHECK-O3-NEXT: xorl %eax, %eax
+; CHECK-O3-NEXT: xorl %edx, %edx
+; CHECK-O3-NEXT: xorl %ecx, %ecx
+; CHECK-O3-NEXT: xorl %ebx, %ebx
+; CHECK-O3-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-O3-NEXT: popq %rbx
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O3-NEXT: retq
+ %v = load atomic i128, i128* %ptr unordered, align 16
+ ret i128 %v
+}
+
+define void @store_i128(i128* %ptr, i128 %v) {
+; CHECK-O0-LABEL: store_i128:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rbx
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT: .cfi_offset %rbx, -16
+; CHECK-O0-NEXT: movq (%rdi), %rax
+; CHECK-O0-NEXT: movq 8(%rdi), %rcx
+; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: jmp .LBB16_1
+; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start
+; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, %rax
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; CHECK-O0-NEXT: lock cmpxchg16b (%rsi)
+; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: jne .LBB16_1
+; CHECK-O0-NEXT: jmp .LBB16_2
+; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end
+; CHECK-O0-NEXT: popq %rbx
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: store_i128:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rbx
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
+; CHECK-O3-NEXT: .cfi_offset %rbx, -16
+; CHECK-O3-NEXT: movq %rdx, %rcx
+; CHECK-O3-NEXT: movq %rsi, %rbx
+; CHECK-O3-NEXT: movq (%rdi), %rax
+; CHECK-O3-NEXT: movq 8(%rdi), %rdx
+; CHECK-O3-NEXT: .p2align 4, 0x90
+; CHECK-O3-NEXT: .LBB16_1: # %atomicrmw.start
+; CHECK-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-O3-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-O3-NEXT: jne .LBB16_1
+; CHECK-O3-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-O3-NEXT: popq %rbx
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O3-NEXT: retq
+ store atomic i128 %v, i128* %ptr unordered, align 16
+ ret void
+}
+
+define i256 @load_i256(i256* %ptr) {
+; CHECK-O0-LABEL: load_i256:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: subq $56, %rsp
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 64
+; CHECK-O0-NEXT: movq %rdi, %rax
+; CHECK-O0-NEXT: movl $32, %ecx
+; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CHECK-O0-NEXT: xorl %r8d, %r8d
+; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, %rdi
+; CHECK-O0-NEXT: movl %r8d, %ecx
+; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: callq __atomic_load
+; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; CHECK-O0-NEXT: movq %rdi, 24(%r9)
+; CHECK-O0-NEXT: movq %rsi, 16(%r9)
+; CHECK-O0-NEXT: movq %rdx, 8(%r9)
+; CHECK-O0-NEXT: movq %rax, (%r9)
+; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-O0-NEXT: addq $56, %rsp
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i256:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rbx
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
+; CHECK-O3-NEXT: subq $32, %rsp
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 48
+; CHECK-O3-NEXT: .cfi_offset %rbx, -16
+; CHECK-O3-NEXT: movq %rdi, %rbx
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $32, %edi
+; CHECK-O3-NEXT: xorl %ecx, %ecx
+; CHECK-O3-NEXT: callq __atomic_load
+; CHECK-O3-NEXT: vmovups (%rsp), %ymm0
+; CHECK-O3-NEXT: vmovups %ymm0, (%rbx)
+; CHECK-O3-NEXT: movq %rbx, %rax
+; CHECK-O3-NEXT: addq $32, %rsp
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
+; CHECK-O3-NEXT: popq %rbx
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O3-NEXT: vzeroupper
+; CHECK-O3-NEXT: retq
+ %v = load atomic i256, i256* %ptr unordered, align 16
+ ret i256 %v
+}
+
+define void @store_i256(i256* %ptr, i256 %v) {
+; CHECK-O0-LABEL: store_i256:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: subq $40, %rsp
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 48
+; CHECK-O0-NEXT: xorl %eax, %eax
+; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9
+; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movl $32, %ecx
+; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rcx, %rdi
+; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload
+; CHECK-O0-NEXT: movq %r9, %rdx
+; CHECK-O0-NEXT: movl %eax, %ecx
+; CHECK-O0-NEXT: callq __atomic_store
+; CHECK-O0-NEXT: addq $40, %rsp
+; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: store_i256:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: subq $40, %rsp
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 48
+; CHECK-O3-NEXT: movq %rdi, %rax
+; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp)
+; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CHECK-O3-NEXT: movl $32, %edi
+; CHECK-O3-NEXT: movq %rax, %rsi
+; CHECK-O3-NEXT: xorl %ecx, %ecx
+; CHECK-O3-NEXT: callq __atomic_store
+; CHECK-O3-NEXT: addq $40, %rsp
+; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
+; CHECK-O3-NEXT: retq
+ store atomic i256 %v, i256* %ptr unordered, align 16
+ ret void
+}
+
; Legal if wider type is also atomic (TODO)
define void @vec_store(i32* %p0, <2 x i32> %vec) {
; CHECK-O0-LABEL: vec_store:
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB31_1
+; CHECK-O3-NEXT: je .LBB35_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB31_1:
+; CHECK-O3-NEXT: .LBB35_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
-; CHECK-O3-NEXT: je .LBB32_1
+; CHECK-O3-NEXT: je .LBB36_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rcx
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB32_1:
+; CHECK-O3-NEXT: .LBB36_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB34_1
+; CHECK-O3-NEXT: je .LBB38_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB34_1:
+; CHECK-O3-NEXT: .LBB38_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
-; CHECK-O3-NEXT: je .LBB35_1
+; CHECK-O3-NEXT: je .LBB39_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rcx
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB35_1:
+; CHECK-O3-NEXT: .LBB39_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB37_1
+; CHECK-O3-NEXT: je .LBB41_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB37_1:
+; CHECK-O3-NEXT: .LBB41_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
-; CHECK-O3-NEXT: je .LBB38_1
+; CHECK-O3-NEXT: je .LBB42_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rcx
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB38_1:
+; CHECK-O3-NEXT: .LBB42_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB40_1
+; CHECK-O3-NEXT: je .LBB44_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB40_1:
+; CHECK-O3-NEXT: .LBB44_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rdx
; CHECK-O3-NEXT: orq %rcx, %rdx
; CHECK-O3-NEXT: shrq $32, %rdx
-; CHECK-O3-NEXT: je .LBB41_1
+; CHECK-O3-NEXT: je .LBB45_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rcx
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB41_1:
+; CHECK-O3-NEXT: .LBB45_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB70_1
+; CHECK-O3-NEXT: je .LBB74_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB70_1:
+; CHECK-O3-NEXT: .LBB74_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB72_1
+; CHECK-O3-NEXT: je .LBB76_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB72_1:
+; CHECK-O3-NEXT: .LBB76_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB74_1
+; CHECK-O3-NEXT: je .LBB78_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rsi
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB74_1:
+; CHECK-O3-NEXT: .LBB78_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movq %rax, %rcx
; CHECK-O3-NEXT: orq %rsi, %rcx
; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: je .LBB76_1
+; CHECK-O3-NEXT: je .LBB80_1
; CHECK-O3-NEXT: # %bb.2:
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rsi
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
-; CHECK-O3-NEXT: .LBB76_1:
+; CHECK-O3-NEXT: .LBB80_1:
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divl %esi