From 61c6f9e58169587fd5a1dea8ef3eae66d4516df4 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 29 Jul 2015 15:58:34 +0000 Subject: [PATCH] Merging r243519: ------------------------------------------------------------------------ r243519 | wschmidt | 2015-07-29 07:31:57 -0700 (Wed, 29 Jul 2015) | 14 lines [PPC] Fix PR24216: Don't generate splat for misaligned shuffle mask Given certain shuffle-vector masks, LLVM emits splat instructions which splat the wrong bytes from the source register. The issue is that the function PPC::isSplatShuffleMask() in PPCISelLowering.cpp does not ensure that the splat pattern found is requesting bytes that are aligned on an EltSize boundary. This patch detects this situation as not a valid splat mask, resulting in a permute being generated instead of a splat. Patch and test case by Tyler Kenney, cleaned up a bit by me. This is a simple bug fix that would be good to incorporate into 3.7. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_37@243528 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++ test/CodeGen/PowerPC/pr24216.ll | 14 ++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 test/CodeGen/PowerPC/pr24216.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b9868053def..c1473b2d5e4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1430,6 +1430,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); + // The consecutive indices need to specify an element, not part of two + // different elements. So abandon ship early if this isn't the case. + if (N->getMaskElt(0) % EltSize != 0) + return false; + // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); diff --git a/test/CodeGen/PowerPC/pr24216.ll b/test/CodeGen/PowerPC/pr24216.ll new file mode 100644 index 00000000000..4ab41985f5b --- /dev/null +++ b/test/CodeGen/PowerPC/pr24216.ll @@ -0,0 +1,14 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; Test case adapted from PR24216. + +define void @foo(<16 x i8>* nocapture readonly %in, <16 x i8>* nocapture %out) { +entry: + %0 = load <16 x i8>, <16 x i8>* %in, align 16 + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + store <16 x i8> %1, <16 x i8>* %out, align 16 + ret void +} + +; CHECK: vperm +; CHECK-NOT: vspltw -- 2.40.0