;
; MIPS64R5EB-LABEL: i8_2:
; MIPS64R5EB: # %bb.0:
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -64
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 64
-; MIPS64R5EB-NEXT: sd $4, 56($sp)
-; MIPS64R5EB-NEXT: ldi.b $w0, 0
-; MIPS64R5EB-NEXT: lbu $1, 57($sp)
-; MIPS64R5EB-NEXT: lbu $2, 56($sp)
-; MIPS64R5EB-NEXT: move.v $w1, $w0
-; MIPS64R5EB-NEXT: insert.h $w1[0], $2
-; MIPS64R5EB-NEXT: insert.h $w1[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 58($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 59($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 60($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 61($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 63($sp)
-; MIPS64R5EB-NEXT: lbu $2, 62($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[6], $2
-; MIPS64R5EB-NEXT: insert.h $w1[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT: sd $5, 48($sp)
-; MIPS64R5EB-NEXT: lbu $3, 48($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[0], $3
-; MIPS64R5EB-NEXT: lbu $3, 49($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[1], $3
-; MIPS64R5EB-NEXT: lbu $3, 50($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[2], $3
-; MIPS64R5EB-NEXT: lbu $3, 51($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[3], $3
-; MIPS64R5EB-NEXT: lbu $3, 52($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[4], $3
-; MIPS64R5EB-NEXT: lbu $3, 53($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[5], $3
-; MIPS64R5EB-NEXT: lbu $3, 55($sp)
-; MIPS64R5EB-NEXT: lbu $4, 54($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[6], $4
-; MIPS64R5EB-NEXT: insert.h $w0[7], $3
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -96
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 96
+; MIPS64R5EB-NEXT: sd $4, 88($sp)
+; MIPS64R5EB-NEXT: lbu $1, 89($sp)
+; MIPS64R5EB-NEXT: sh $1, 2($sp)
+; MIPS64R5EB-NEXT: lbu $1, 88($sp)
+; MIPS64R5EB-NEXT: sh $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT: sd $5, 80($sp)
+; MIPS64R5EB-NEXT: lbu $3, 81($sp)
+; MIPS64R5EB-NEXT: sh $3, 18($sp)
+; MIPS64R5EB-NEXT: lbu $3, 80($sp)
+; MIPS64R5EB-NEXT: sh $3, 16($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 16($sp)
; MIPS64R5EB-NEXT: copy_s.h $3, $w0[0]
; MIPS64R5EB-NEXT: copy_s.h $4, $w0[1]
-; MIPS64R5EB-NEXT: sw $4, 28($sp)
-; MIPS64R5EB-NEXT: sw $3, 20($sp)
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 4($sp)
-; MIPS64R5EB-NEXT: ld.d $w0, 16($sp)
-; MIPS64R5EB-NEXT: ld.d $w1, 0($sp)
+; MIPS64R5EB-NEXT: sw $4, 60($sp)
+; MIPS64R5EB-NEXT: sw $3, 52($sp)
+; MIPS64R5EB-NEXT: sw $2, 44($sp)
+; MIPS64R5EB-NEXT: sw $1, 36($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 48($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 32($sp)
; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT: sb $2, 45($sp)
-; MIPS64R5EB-NEXT: sb $1, 44($sp)
-; MIPS64R5EB-NEXT: lh $2, 44($sp)
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 64
+; MIPS64R5EB-NEXT: sb $2, 77($sp)
+; MIPS64R5EB-NEXT: sb $1, 76($sp)
+; MIPS64R5EB-NEXT: lh $2, 76($sp)
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 96
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
;
; MIPS64R5EL-LABEL: i8_2:
; MIPS64R5EL: # %bb.0:
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -64
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 64
-; MIPS64R5EL-NEXT: sd $4, 56($sp)
-; MIPS64R5EL-NEXT: ldi.b $w0, 0
-; MIPS64R5EL-NEXT: lbu $1, 57($sp)
-; MIPS64R5EL-NEXT: lbu $2, 56($sp)
-; MIPS64R5EL-NEXT: move.v $w1, $w0
-; MIPS64R5EL-NEXT: insert.h $w1[0], $2
-; MIPS64R5EL-NEXT: insert.h $w1[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 58($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 59($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 60($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 61($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 63($sp)
-; MIPS64R5EL-NEXT: lbu $2, 62($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[6], $2
-; MIPS64R5EL-NEXT: insert.h $w1[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT: sd $5, 48($sp)
-; MIPS64R5EL-NEXT: lbu $3, 48($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[0], $3
-; MIPS64R5EL-NEXT: lbu $3, 49($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[1], $3
-; MIPS64R5EL-NEXT: lbu $3, 50($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[2], $3
-; MIPS64R5EL-NEXT: lbu $3, 51($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[3], $3
-; MIPS64R5EL-NEXT: lbu $3, 52($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[4], $3
-; MIPS64R5EL-NEXT: lbu $3, 53($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[5], $3
-; MIPS64R5EL-NEXT: lbu $3, 55($sp)
-; MIPS64R5EL-NEXT: lbu $4, 54($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[6], $4
-; MIPS64R5EL-NEXT: insert.h $w0[7], $3
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -96
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 96
+; MIPS64R5EL-NEXT: sd $4, 88($sp)
+; MIPS64R5EL-NEXT: lbu $1, 89($sp)
+; MIPS64R5EL-NEXT: sh $1, 2($sp)
+; MIPS64R5EL-NEXT: lbu $1, 88($sp)
+; MIPS64R5EL-NEXT: sh $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT: sd $5, 80($sp)
+; MIPS64R5EL-NEXT: lbu $3, 81($sp)
+; MIPS64R5EL-NEXT: sh $3, 18($sp)
+; MIPS64R5EL-NEXT: lbu $3, 80($sp)
+; MIPS64R5EL-NEXT: sh $3, 16($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 16($sp)
; MIPS64R5EL-NEXT: copy_s.h $3, $w0[0]
; MIPS64R5EL-NEXT: copy_s.h $4, $w0[1]
-; MIPS64R5EL-NEXT: sw $4, 24($sp)
-; MIPS64R5EL-NEXT: sw $3, 16($sp)
-; MIPS64R5EL-NEXT: sw $2, 8($sp)
-; MIPS64R5EL-NEXT: sw $1, 0($sp)
-; MIPS64R5EL-NEXT: ld.d $w0, 16($sp)
-; MIPS64R5EL-NEXT: ld.d $w1, 0($sp)
+; MIPS64R5EL-NEXT: sw $4, 56($sp)
+; MIPS64R5EL-NEXT: sw $3, 48($sp)
+; MIPS64R5EL-NEXT: sw $2, 40($sp)
+; MIPS64R5EL-NEXT: sw $1, 32($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 48($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 32($sp)
; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT: sb $2, 45($sp)
-; MIPS64R5EL-NEXT: sb $1, 44($sp)
-; MIPS64R5EL-NEXT: lh $2, 44($sp)
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 64
+; MIPS64R5EL-NEXT: sb $2, 77($sp)
+; MIPS64R5EL-NEXT: sb $1, 76($sp)
+; MIPS64R5EL-NEXT: lh $2, 76($sp)
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 96
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
%1 = add <2 x i8> %a, %b
;
; MIPS64R5EB-LABEL: i8x2_7:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -176
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 176
-; MIPS64R5EB-NEXT: sd $4, 168($sp)
-; MIPS64R5EB-NEXT: ldi.b $w0, 0
-; MIPS64R5EB-NEXT: lbu $1, 169($sp)
-; MIPS64R5EB-NEXT: lbu $2, 168($sp)
-; MIPS64R5EB-NEXT: move.v $w1, $w0
-; MIPS64R5EB-NEXT: insert.h $w1[0], $2
-; MIPS64R5EB-NEXT: insert.h $w1[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 170($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 171($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 172($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 173($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 175($sp)
-; MIPS64R5EB-NEXT: lbu $2, 174($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[6], $2
-; MIPS64R5EB-NEXT: insert.h $w1[7], $1
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -288
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 288
+; MIPS64R5EB-NEXT: sd $4, 280($sp)
+; MIPS64R5EB-NEXT: lbu $1, 281($sp)
+; MIPS64R5EB-NEXT: sh $1, 2($sp)
+; MIPS64R5EB-NEXT: lbu $1, 280($sp)
+; MIPS64R5EB-NEXT: sh $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EB-NEXT: sd $5, 272($sp)
+; MIPS64R5EB-NEXT: lbu $3, 273($sp)
+; MIPS64R5EB-NEXT: sh $3, 18($sp)
+; MIPS64R5EB-NEXT: lbu $3, 272($sp)
+; MIPS64R5EB-NEXT: sh $3, 16($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EB-NEXT: copy_s.h $3, $w0[0]
+; MIPS64R5EB-NEXT: copy_s.h $4, $w0[1]
+; MIPS64R5EB-NEXT: sw $4, 140($sp)
+; MIPS64R5EB-NEXT: sw $3, 132($sp)
+; MIPS64R5EB-NEXT: sw $2, 124($sp)
+; MIPS64R5EB-NEXT: sw $1, 116($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 128($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 112($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
+; MIPS64R5EB-NEXT: sd $6, 264($sp)
+; MIPS64R5EB-NEXT: lbu $1, 265($sp)
+; MIPS64R5EB-NEXT: sh $1, 34($sp)
+; MIPS64R5EB-NEXT: lbu $1, 264($sp)
+; MIPS64R5EB-NEXT: sh $1, 32($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 32($sp)
; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EB-NEXT: sd $5, 160($sp)
-; MIPS64R5EB-NEXT: lbu $3, 161($sp)
-; MIPS64R5EB-NEXT: lbu $4, 160($sp)
-; MIPS64R5EB-NEXT: move.v $w1, $w0
-; MIPS64R5EB-NEXT: insert.h $w1[0], $4
-; MIPS64R5EB-NEXT: insert.h $w1[1], $3
-; MIPS64R5EB-NEXT: lbu $3, 162($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[2], $3
-; MIPS64R5EB-NEXT: lbu $3, 163($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[3], $3
-; MIPS64R5EB-NEXT: lbu $3, 164($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[4], $3
-; MIPS64R5EB-NEXT: lbu $3, 165($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[5], $3
-; MIPS64R5EB-NEXT: lbu $3, 167($sp)
-; MIPS64R5EB-NEXT: lbu $4, 166($sp)
-; MIPS64R5EB-NEXT: insert.h $w1[6], $4
-; MIPS64R5EB-NEXT: insert.h $w1[7], $3
+; MIPS64R5EB-NEXT: sw $2, 156($sp)
+; MIPS64R5EB-NEXT: sw $1, 148($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 144($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sd $7, 256($sp)
+; MIPS64R5EB-NEXT: lbu $1, 257($sp)
+; MIPS64R5EB-NEXT: sh $1, 50($sp)
+; MIPS64R5EB-NEXT: lbu $1, 256($sp)
+; MIPS64R5EB-NEXT: sh $1, 48($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 48($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT: sw $2, 172($sp)
+; MIPS64R5EB-NEXT: sw $1, 164($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 160($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sd $8, 248($sp)
+; MIPS64R5EB-NEXT: lbu $1, 249($sp)
+; MIPS64R5EB-NEXT: sh $1, 66($sp)
+; MIPS64R5EB-NEXT: lbu $1, 248($sp)
+; MIPS64R5EB-NEXT: sh $1, 64($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 64($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT: sw $2, 188($sp)
+; MIPS64R5EB-NEXT: sw $1, 180($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 176($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sd $10, 232($sp)
+; MIPS64R5EB-NEXT: lbu $1, 233($sp)
+; MIPS64R5EB-NEXT: sh $1, 98($sp)
+; MIPS64R5EB-NEXT: lbu $1, 232($sp)
+; MIPS64R5EB-NEXT: sh $1, 96($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 96($sp)
+; MIPS64R5EB-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EB-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EB-NEXT: sd $9, 240($sp)
+; MIPS64R5EB-NEXT: lbu $3, 241($sp)
+; MIPS64R5EB-NEXT: sh $3, 82($sp)
+; MIPS64R5EB-NEXT: lbu $3, 240($sp)
+; MIPS64R5EB-NEXT: sh $3, 80($sp)
+; MIPS64R5EB-NEXT: ld.h $w1, 80($sp)
; MIPS64R5EB-NEXT: copy_s.h $3, $w1[0]
; MIPS64R5EB-NEXT: copy_s.h $4, $w1[1]
-; MIPS64R5EB-NEXT: sw $4, 28($sp)
-; MIPS64R5EB-NEXT: sw $3, 20($sp)
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 4($sp)
-; MIPS64R5EB-NEXT: ld.d $w1, 16($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 0($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w2, $w1
-; MIPS64R5EB-NEXT: sd $6, 152($sp)
-; MIPS64R5EB-NEXT: lbu $1, 153($sp)
-; MIPS64R5EB-NEXT: lbu $2, 152($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $2
-; MIPS64R5EB-NEXT: insert.h $w2[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 154($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 155($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 156($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 157($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 159($sp)
-; MIPS64R5EB-NEXT: lbu $2, 158($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $2
-; MIPS64R5EB-NEXT: insert.h $w2[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT: sw $2, 44($sp)
-; MIPS64R5EB-NEXT: sw $1, 36($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 32($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT: sd $7, 144($sp)
-; MIPS64R5EB-NEXT: lbu $1, 145($sp)
-; MIPS64R5EB-NEXT: lbu $2, 144($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $2
-; MIPS64R5EB-NEXT: insert.h $w2[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 146($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 147($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 148($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 149($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 151($sp)
-; MIPS64R5EB-NEXT: lbu $2, 150($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $2
-; MIPS64R5EB-NEXT: insert.h $w2[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT: sw $2, 60($sp)
-; MIPS64R5EB-NEXT: sw $1, 52($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 48($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT: sd $8, 136($sp)
-; MIPS64R5EB-NEXT: lbu $1, 137($sp)
-; MIPS64R5EB-NEXT: lbu $2, 136($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $2
-; MIPS64R5EB-NEXT: insert.h $w2[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 138($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 139($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 140($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 141($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 143($sp)
-; MIPS64R5EB-NEXT: lbu $2, 142($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $2
-; MIPS64R5EB-NEXT: insert.h $w2[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EB-NEXT: sd $10, 120($sp)
-; MIPS64R5EB-NEXT: lbu $3, 121($sp)
-; MIPS64R5EB-NEXT: lbu $4, 120($sp)
-; MIPS64R5EB-NEXT: move.v $w2, $w0
-; MIPS64R5EB-NEXT: insert.h $w2[0], $4
-; MIPS64R5EB-NEXT: insert.h $w2[1], $3
-; MIPS64R5EB-NEXT: lbu $3, 122($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[2], $3
-; MIPS64R5EB-NEXT: lbu $3, 123($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[3], $3
-; MIPS64R5EB-NEXT: lbu $3, 124($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[4], $3
-; MIPS64R5EB-NEXT: lbu $3, 125($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[5], $3
-; MIPS64R5EB-NEXT: lbu $3, 127($sp)
-; MIPS64R5EB-NEXT: lbu $4, 126($sp)
-; MIPS64R5EB-NEXT: insert.h $w2[6], $4
-; MIPS64R5EB-NEXT: insert.h $w2[7], $3
-; MIPS64R5EB-NEXT: copy_s.h $3, $w2[0]
-; MIPS64R5EB-NEXT: copy_s.h $4, $w2[1]
-; MIPS64R5EB-NEXT: sw $2, 76($sp)
-; MIPS64R5EB-NEXT: sw $1, 68($sp)
-; MIPS64R5EB-NEXT: ld.d $w2, 64($sp)
-; MIPS64R5EB-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EB-NEXT: sd $9, 128($sp)
-; MIPS64R5EB-NEXT: lbu $1, 128($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[0], $1
-; MIPS64R5EB-NEXT: lbu $1, 129($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 130($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 131($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 132($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 133($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 135($sp)
-; MIPS64R5EB-NEXT: lbu $2, 134($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[6], $2
-; MIPS64R5EB-NEXT: insert.h $w0[7], $1
-; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
-; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT: sw $2, 92($sp)
-; MIPS64R5EB-NEXT: sw $1, 84($sp)
-; MIPS64R5EB-NEXT: ld.d $w0, 80($sp)
-; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EB-NEXT: sw $4, 108($sp)
-; MIPS64R5EB-NEXT: sw $3, 100($sp)
-; MIPS64R5EB-NEXT: ld.d $w1, 96($sp)
+; MIPS64R5EB-NEXT: sw $4, 204($sp)
+; MIPS64R5EB-NEXT: sw $3, 196($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 192($sp)
+; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EB-NEXT: sw $2, 220($sp)
+; MIPS64R5EB-NEXT: sw $1, 212($sp)
+; MIPS64R5EB-NEXT: ld.d $w1, 208($sp)
; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EB-NEXT: sb $2, 117($sp)
-; MIPS64R5EB-NEXT: sb $1, 116($sp)
-; MIPS64R5EB-NEXT: lh $2, 116($sp)
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 176
+; MIPS64R5EB-NEXT: sb $2, 229($sp)
+; MIPS64R5EB-NEXT: sb $1, 228($sp)
+; MIPS64R5EB-NEXT: lh $2, 228($sp)
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 288
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
;
; MIPS64R5EL-LABEL: i8x2_7:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -176
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 176
-; MIPS64R5EL-NEXT: sd $4, 168($sp)
-; MIPS64R5EL-NEXT: ldi.b $w0, 0
-; MIPS64R5EL-NEXT: lbu $1, 169($sp)
-; MIPS64R5EL-NEXT: lbu $2, 168($sp)
-; MIPS64R5EL-NEXT: move.v $w1, $w0
-; MIPS64R5EL-NEXT: insert.h $w1[0], $2
-; MIPS64R5EL-NEXT: insert.h $w1[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 170($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 171($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 172($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 173($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 175($sp)
-; MIPS64R5EL-NEXT: lbu $2, 174($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[6], $2
-; MIPS64R5EL-NEXT: insert.h $w1[7], $1
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -288
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 288
+; MIPS64R5EL-NEXT: sd $4, 280($sp)
+; MIPS64R5EL-NEXT: lbu $1, 281($sp)
+; MIPS64R5EL-NEXT: sh $1, 2($sp)
+; MIPS64R5EL-NEXT: lbu $1, 280($sp)
+; MIPS64R5EL-NEXT: sh $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 0($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
+; MIPS64R5EL-NEXT: sd $5, 272($sp)
+; MIPS64R5EL-NEXT: lbu $3, 273($sp)
+; MIPS64R5EL-NEXT: sh $3, 18($sp)
+; MIPS64R5EL-NEXT: lbu $3, 272($sp)
+; MIPS64R5EL-NEXT: sh $3, 16($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 16($sp)
+; MIPS64R5EL-NEXT: copy_s.h $3, $w0[0]
+; MIPS64R5EL-NEXT: copy_s.h $4, $w0[1]
+; MIPS64R5EL-NEXT: sw $4, 136($sp)
+; MIPS64R5EL-NEXT: sw $3, 128($sp)
+; MIPS64R5EL-NEXT: sw $2, 120($sp)
+; MIPS64R5EL-NEXT: sw $1, 112($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 128($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 112($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
+; MIPS64R5EL-NEXT: sd $6, 264($sp)
+; MIPS64R5EL-NEXT: lbu $1, 265($sp)
+; MIPS64R5EL-NEXT: sh $1, 34($sp)
+; MIPS64R5EL-NEXT: lbu $1, 264($sp)
+; MIPS64R5EL-NEXT: sh $1, 32($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 32($sp)
; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
-; MIPS64R5EL-NEXT: sd $5, 160($sp)
-; MIPS64R5EL-NEXT: lbu $3, 161($sp)
-; MIPS64R5EL-NEXT: lbu $4, 160($sp)
-; MIPS64R5EL-NEXT: move.v $w1, $w0
-; MIPS64R5EL-NEXT: insert.h $w1[0], $4
-; MIPS64R5EL-NEXT: insert.h $w1[1], $3
-; MIPS64R5EL-NEXT: lbu $3, 162($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[2], $3
-; MIPS64R5EL-NEXT: lbu $3, 163($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[3], $3
-; MIPS64R5EL-NEXT: lbu $3, 164($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[4], $3
-; MIPS64R5EL-NEXT: lbu $3, 165($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[5], $3
-; MIPS64R5EL-NEXT: lbu $3, 167($sp)
-; MIPS64R5EL-NEXT: lbu $4, 166($sp)
-; MIPS64R5EL-NEXT: insert.h $w1[6], $4
-; MIPS64R5EL-NEXT: insert.h $w1[7], $3
+; MIPS64R5EL-NEXT: sw $2, 152($sp)
+; MIPS64R5EL-NEXT: sw $1, 144($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 144($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sd $7, 256($sp)
+; MIPS64R5EL-NEXT: lbu $1, 257($sp)
+; MIPS64R5EL-NEXT: sh $1, 50($sp)
+; MIPS64R5EL-NEXT: lbu $1, 256($sp)
+; MIPS64R5EL-NEXT: sh $1, 48($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 48($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT: sw $2, 168($sp)
+; MIPS64R5EL-NEXT: sw $1, 160($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 160($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sd $8, 248($sp)
+; MIPS64R5EL-NEXT: lbu $1, 249($sp)
+; MIPS64R5EL-NEXT: sh $1, 66($sp)
+; MIPS64R5EL-NEXT: lbu $1, 248($sp)
+; MIPS64R5EL-NEXT: sh $1, 64($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 64($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT: sw $2, 184($sp)
+; MIPS64R5EL-NEXT: sw $1, 176($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 176($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sd $10, 232($sp)
+; MIPS64R5EL-NEXT: lbu $1, 233($sp)
+; MIPS64R5EL-NEXT: sh $1, 98($sp)
+; MIPS64R5EL-NEXT: lbu $1, 232($sp)
+; MIPS64R5EL-NEXT: sh $1, 96($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 96($sp)
+; MIPS64R5EL-NEXT: copy_s.h $1, $w1[0]
+; MIPS64R5EL-NEXT: copy_s.h $2, $w1[1]
+; MIPS64R5EL-NEXT: sd $9, 240($sp)
+; MIPS64R5EL-NEXT: lbu $3, 241($sp)
+; MIPS64R5EL-NEXT: sh $3, 82($sp)
+; MIPS64R5EL-NEXT: lbu $3, 240($sp)
+; MIPS64R5EL-NEXT: sh $3, 80($sp)
+; MIPS64R5EL-NEXT: ld.h $w1, 80($sp)
; MIPS64R5EL-NEXT: copy_s.h $3, $w1[0]
; MIPS64R5EL-NEXT: copy_s.h $4, $w1[1]
-; MIPS64R5EL-NEXT: sw $4, 24($sp)
-; MIPS64R5EL-NEXT: sw $3, 16($sp)
-; MIPS64R5EL-NEXT: sw $2, 8($sp)
-; MIPS64R5EL-NEXT: sw $1, 0($sp)
-; MIPS64R5EL-NEXT: ld.d $w1, 16($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 0($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w2, $w1
-; MIPS64R5EL-NEXT: sd $6, 152($sp)
-; MIPS64R5EL-NEXT: lbu $1, 153($sp)
-; MIPS64R5EL-NEXT: lbu $2, 152($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $2
-; MIPS64R5EL-NEXT: insert.h $w2[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 154($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 155($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 156($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 157($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 159($sp)
-; MIPS64R5EL-NEXT: lbu $2, 158($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $2
-; MIPS64R5EL-NEXT: insert.h $w2[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT: sw $2, 40($sp)
-; MIPS64R5EL-NEXT: sw $1, 32($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 32($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT: sd $7, 144($sp)
-; MIPS64R5EL-NEXT: lbu $1, 145($sp)
-; MIPS64R5EL-NEXT: lbu $2, 144($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $2
-; MIPS64R5EL-NEXT: insert.h $w2[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 146($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 147($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 148($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 149($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 151($sp)
-; MIPS64R5EL-NEXT: lbu $2, 150($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $2
-; MIPS64R5EL-NEXT: insert.h $w2[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT: sw $2, 56($sp)
-; MIPS64R5EL-NEXT: sw $1, 48($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 48($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT: sd $8, 136($sp)
-; MIPS64R5EL-NEXT: lbu $1, 137($sp)
-; MIPS64R5EL-NEXT: lbu $2, 136($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $2
-; MIPS64R5EL-NEXT: insert.h $w2[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 138($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 139($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 140($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 141($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 143($sp)
-; MIPS64R5EL-NEXT: lbu $2, 142($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $2
-; MIPS64R5EL-NEXT: insert.h $w2[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w2[1]
-; MIPS64R5EL-NEXT: sd $10, 120($sp)
-; MIPS64R5EL-NEXT: lbu $3, 121($sp)
-; MIPS64R5EL-NEXT: lbu $4, 120($sp)
-; MIPS64R5EL-NEXT: move.v $w2, $w0
-; MIPS64R5EL-NEXT: insert.h $w2[0], $4
-; MIPS64R5EL-NEXT: insert.h $w2[1], $3
-; MIPS64R5EL-NEXT: lbu $3, 122($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[2], $3
-; MIPS64R5EL-NEXT: lbu $3, 123($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[3], $3
-; MIPS64R5EL-NEXT: lbu $3, 124($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[4], $3
-; MIPS64R5EL-NEXT: lbu $3, 125($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[5], $3
-; MIPS64R5EL-NEXT: lbu $3, 127($sp)
-; MIPS64R5EL-NEXT: lbu $4, 126($sp)
-; MIPS64R5EL-NEXT: insert.h $w2[6], $4
-; MIPS64R5EL-NEXT: insert.h $w2[7], $3
-; MIPS64R5EL-NEXT: copy_s.h $3, $w2[0]
-; MIPS64R5EL-NEXT: copy_s.h $4, $w2[1]
-; MIPS64R5EL-NEXT: sw $2, 72($sp)
-; MIPS64R5EL-NEXT: sw $1, 64($sp)
-; MIPS64R5EL-NEXT: ld.d $w2, 64($sp)
-; MIPS64R5EL-NEXT: addv.d $w1, $w1, $w2
-; MIPS64R5EL-NEXT: sd $9, 128($sp)
-; MIPS64R5EL-NEXT: lbu $1, 128($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[0], $1
-; MIPS64R5EL-NEXT: lbu $1, 129($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 130($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 131($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 132($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 133($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 135($sp)
-; MIPS64R5EL-NEXT: lbu $2, 134($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[6], $2
-; MIPS64R5EL-NEXT: insert.h $w0[7], $1
-; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
-; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT: sw $2, 88($sp)
-; MIPS64R5EL-NEXT: sw $1, 80($sp)
-; MIPS64R5EL-NEXT: ld.d $w0, 80($sp)
-; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0
-; MIPS64R5EL-NEXT: sw $4, 104($sp)
-; MIPS64R5EL-NEXT: sw $3, 96($sp)
-; MIPS64R5EL-NEXT: ld.d $w1, 96($sp)
+; MIPS64R5EL-NEXT: sw $4, 200($sp)
+; MIPS64R5EL-NEXT: sw $3, 192($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 192($sp)
+; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
+; MIPS64R5EL-NEXT: sw $2, 216($sp)
+; MIPS64R5EL-NEXT: sw $1, 208($sp)
+; MIPS64R5EL-NEXT: ld.d $w1, 208($sp)
; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
-; MIPS64R5EL-NEXT: sb $2, 117($sp)
-; MIPS64R5EL-NEXT: sb $1, 116($sp)
-; MIPS64R5EL-NEXT: lh $2, 116($sp)
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 176
+; MIPS64R5EL-NEXT: sb $2, 229($sp)
+; MIPS64R5EL-NEXT: sb $1, 228($sp)
+; MIPS64R5EL-NEXT: lh $2, 228($sp)
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 288
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
;
; MIPS64R5EB-LABEL: call_i8_2:
; MIPS64R5EB: # %bb.0: # %entry
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, -48
-; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 48
-; MIPS64R5EB-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill
-; MIPS64R5EB-NEXT: sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -64
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 64
+; MIPS64R5EB-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 48($sp) # 8-byte Folded Spill
; MIPS64R5EB-NEXT: .cfi_offset 31, -8
; MIPS64R5EB-NEXT: .cfi_offset 28, -16
; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EB-NEXT: daddu $1, $1, $25
; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EB-NEXT: addiu $1, $zero, 1543
-; MIPS64R5EB-NEXT: sh $1, 24($sp)
+; MIPS64R5EB-NEXT: sh $1, 40($sp)
; MIPS64R5EB-NEXT: addiu $1, $zero, 3080
-; MIPS64R5EB-NEXT: sh $1, 28($sp)
+; MIPS64R5EB-NEXT: sh $1, 44($sp)
; MIPS64R5EB-NEXT: ld $25, %call16(i8_2)($gp)
-; MIPS64R5EB-NEXT: lh $4, 24($sp)
-; MIPS64R5EB-NEXT: lh $5, 28($sp)
+; MIPS64R5EB-NEXT: lh $4, 40($sp)
+; MIPS64R5EB-NEXT: lh $5, 44($sp)
; MIPS64R5EB-NEXT: jalr $25
; MIPS64R5EB-NEXT: nop
-; MIPS64R5EB-NEXT: sd $2, 16($sp)
-; MIPS64R5EB-NEXT: ldi.b $w0, 0
-; MIPS64R5EB-NEXT: lbu $1, 16($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[0], $1
-; MIPS64R5EB-NEXT: lbu $1, 17($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[1], $1
-; MIPS64R5EB-NEXT: lbu $1, 18($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[2], $1
-; MIPS64R5EB-NEXT: lbu $1, 19($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[3], $1
-; MIPS64R5EB-NEXT: lbu $1, 20($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[4], $1
-; MIPS64R5EB-NEXT: lbu $1, 21($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[5], $1
-; MIPS64R5EB-NEXT: lbu $1, 23($sp)
-; MIPS64R5EB-NEXT: lbu $2, 22($sp)
-; MIPS64R5EB-NEXT: insert.h $w0[6], $2
-; MIPS64R5EB-NEXT: insert.h $w0[7], $1
+; MIPS64R5EB-NEXT: sd $2, 32($sp)
+; MIPS64R5EB-NEXT: lbu $1, 33($sp)
+; MIPS64R5EB-NEXT: sh $1, 2($sp)
+; MIPS64R5EB-NEXT: lbu $1, 32($sp)
+; MIPS64R5EB-NEXT: sh $1, 0($sp)
+; MIPS64R5EB-NEXT: ld.h $w0, 0($sp)
; MIPS64R5EB-NEXT: copy_s.h $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EB-NEXT: sw $2, 12($sp)
-; MIPS64R5EB-NEXT: sw $1, 4($sp)
-; MIPS64R5EB-NEXT: ld.d $w0, 0($sp)
+; MIPS64R5EB-NEXT: sw $2, 28($sp)
+; MIPS64R5EB-NEXT: sw $1, 20($sp)
+; MIPS64R5EB-NEXT: ld.d $w0, 16($sp)
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5EB-NEXT: ld $3, %got_disp(gv2i8)($gp)
; MIPS64R5EB-NEXT: sb $2, 1($3)
; MIPS64R5EB-NEXT: sb $1, 0($3)
-; MIPS64R5EB-NEXT: ld $gp, 32($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
-; MIPS64R5EB-NEXT: daddiu $sp, $sp, 48
+; MIPS64R5EB-NEXT: ld $gp, 48($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 64
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
;
; MIPS64R5EL-LABEL: call_i8_2:
; MIPS64R5EL: # %bb.0: # %entry
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, -48
-; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 48
-; MIPS64R5EL-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill
-; MIPS64R5EL-NEXT: sd $gp, 32($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -64
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 64
+; MIPS64R5EL-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 48($sp) # 8-byte Folded Spill
; MIPS64R5EL-NEXT: .cfi_offset 31, -8
; MIPS64R5EL-NEXT: .cfi_offset 28, -16
; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EL-NEXT: daddu $1, $1, $25
; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(call_i8_2)))
; MIPS64R5EL-NEXT: addiu $1, $zero, 1798
-; MIPS64R5EL-NEXT: sh $1, 24($sp)
+; MIPS64R5EL-NEXT: sh $1, 40($sp)
; MIPS64R5EL-NEXT: addiu $1, $zero, 2060
-; MIPS64R5EL-NEXT: sh $1, 28($sp)
+; MIPS64R5EL-NEXT: sh $1, 44($sp)
; MIPS64R5EL-NEXT: ld $25, %call16(i8_2)($gp)
-; MIPS64R5EL-NEXT: lh $4, 24($sp)
-; MIPS64R5EL-NEXT: lh $5, 28($sp)
+; MIPS64R5EL-NEXT: lh $4, 40($sp)
+; MIPS64R5EL-NEXT: lh $5, 44($sp)
; MIPS64R5EL-NEXT: jalr $25
; MIPS64R5EL-NEXT: nop
-; MIPS64R5EL-NEXT: sd $2, 16($sp)
-; MIPS64R5EL-NEXT: ldi.b $w0, 0
-; MIPS64R5EL-NEXT: lbu $1, 16($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[0], $1
-; MIPS64R5EL-NEXT: lbu $1, 17($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[1], $1
-; MIPS64R5EL-NEXT: lbu $1, 18($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[2], $1
-; MIPS64R5EL-NEXT: lbu $1, 19($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[3], $1
-; MIPS64R5EL-NEXT: lbu $1, 20($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[4], $1
-; MIPS64R5EL-NEXT: lbu $1, 21($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[5], $1
-; MIPS64R5EL-NEXT: lbu $1, 23($sp)
-; MIPS64R5EL-NEXT: lbu $2, 22($sp)
-; MIPS64R5EL-NEXT: insert.h $w0[6], $2
-; MIPS64R5EL-NEXT: insert.h $w0[7], $1
+; MIPS64R5EL-NEXT: sd $2, 32($sp)
+; MIPS64R5EL-NEXT: lbu $1, 33($sp)
+; MIPS64R5EL-NEXT: sh $1, 2($sp)
+; MIPS64R5EL-NEXT: lbu $1, 32($sp)
+; MIPS64R5EL-NEXT: sh $1, 0($sp)
+; MIPS64R5EL-NEXT: ld.h $w0, 0($sp)
; MIPS64R5EL-NEXT: copy_s.h $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.h $2, $w0[1]
-; MIPS64R5EL-NEXT: sw $2, 8($sp)
-; MIPS64R5EL-NEXT: sw $1, 0($sp)
-; MIPS64R5EL-NEXT: ld.d $w0, 0($sp)
+; MIPS64R5EL-NEXT: sw $2, 24($sp)
+; MIPS64R5EL-NEXT: sw $1, 16($sp)
+; MIPS64R5EL-NEXT: ld.d $w0, 16($sp)
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5EL-NEXT: ld $3, %got_disp(gv2i8)($gp)
; MIPS64R5EL-NEXT: sb $2, 1($3)
; MIPS64R5EL-NEXT: sb $1, 0($3)
-; MIPS64R5EL-NEXT: ld $gp, 32($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
-; MIPS64R5EL-NEXT: daddiu $sp, $sp, 48
+; MIPS64R5EL-NEXT: ld $gp, 48($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 64
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
entry:
define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind {
; SSE2-LABEL: v3i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, 8(%rdi)
-; SSE2-NEXT: movq %xmm2, (%rdi)
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movd %xmm2, 8(%rdi)
+; SSE2-NEXT: movq %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: v3i32:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; SSE42-NEXT: pextrd $2, %xmm0, 8(%rdi)
-; SSE42-NEXT: movq %xmm1, (%rdi)
+; SSE42-NEXT: extractps $2, %xmm0, 8(%rdi)
+; SSE42-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE42-NEXT: movlps %xmm0, (%rdi)
; SSE42-NEXT: retq
;
-; AVX1-LABEL: v3i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX1-NEXT: vextractps $2, %xmm0, 8(%rdi)
-; AVX1-NEXT: vmovlps %xmm1, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: v3i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi)
-; AVX2-NEXT: vmovlps %xmm1, (%rdi)
-; AVX2-NEXT: retq
+; AVX-LABEL: v3i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vextractps $2, %xmm0, 8(%rdi)
+; AVX-NEXT: vmovlps %xmm1, (%rdi)
+; AVX-NEXT: retq
;
; XOP-LABEL: v3i32:
; XOP: # %bb.0:
-; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; XOP-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; XOP-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; XOP-NEXT: vextractps $2, %xmm0, 8(%rdi)
; XOP-NEXT: vmovlps %xmm1, (%rdi)
; XOP-NEXT: retq
define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind {
; SSE2-LABEL: v5i16:
; SSE2: # %bb.0:
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-NEXT: pextrw $6, %xmm0, %eax
; SSE2-NEXT: movw %ax, 8(%rdi)
;
; SSE42-LABEL: v5i16:
; SSE42: # %bb.0:
+; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; SSE42-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE42-NEXT: pextrw $6, %xmm0, 8(%rdi)
; SSE42-NEXT: movq %xmm2, (%rdi)
;
; AVX1-LABEL: v5i16:
; AVX1: # %bb.0:
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX1-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; AVX1-NEXT: vmovq %xmm1, (%rdi)
;
; AVX2-SLOW-LABEL: v5i16:
; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX2-SLOW-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; AVX2-SLOW-NEXT: vmovq %xmm1, (%rdi)
; AVX2-FAST-LABEL: v5i16:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,8,9,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,4,5,12,13,14,15,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX2-FAST-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; AVX2-FAST-NEXT: vmovq %xmm1, (%rdi)
;
; XOP-LABEL: v5i16:
; XOP: # %bb.0:
-; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[12,13],xmm1[4,5],xmm0[14,15],xmm1[6,7]
+; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[4,5],xmm1[4,5],xmm0[6,7],xmm1[6,7]
; XOP-NEXT: vpextrw $6, %xmm0, 8(%rdi)
; XOP-NEXT: vmovq %xmm1, (%rdi)
; XOP-NEXT: retq
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,2]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,1,0,3]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: movd %xmm1, 24(%rdi)
-; SSE2-NEXT: movlps %xmm0, 16(%rdi)
+; SSE2-NEXT: movq %xmm0, 16(%rdi)
; SSE2-NEXT: movdqa %xmm3, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: v7i32:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm1, %xmm2
-; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,2]
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,0,3]
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,3,2]
+; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; SSE42-NEXT: movd %xmm1, 24(%rdi)
-; SSE42-NEXT: movq %xmm2, 16(%rdi)
-; SSE42-NEXT: movdqa %xmm0, (%rdi)
+; SSE42-NEXT: movq %xmm0, 16(%rdi)
+; SSE42-NEXT: movdqa %xmm2, (%rdi)
; SSE42-NEXT: retq
;
; AVX1-LABEL: v7i32: