]> granicus.if.org Git - handbrake/commitdiff
contrib: Add patch to fix AQ-related slowdown in x265 3.2.
authorBradley Sepos <bradley@bradleysepos.com>
Mon, 14 Oct 2019 17:42:12 +0000 (13:42 -0400)
committerBradley Sepos <bradley@bradleysepos.com>
Mon, 14 Oct 2019 17:57:23 +0000 (13:57 -0400)
contrib/x265/A00-fix-aq-slowdown.patch [new file with mode: 0644]
contrib/x265_10bit/A00-fix-aq-slowdown.patch [new file with mode: 0644]
contrib/x265_12bit/A00-fix-aq-slowdown.patch [new file with mode: 0644]
contrib/x265_8bit/A00-fix-aq-slowdown.patch [new file with mode: 0644]

diff --git a/contrib/x265/A00-fix-aq-slowdown.patch b/contrib/x265/A00-fix-aq-slowdown.patch
new file mode 100644 (file)
index 0000000..b0321f3
--- /dev/null
@@ -0,0 +1,73 @@
+# HG changeset patch
+# User joelgalid@gmail.com
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent  329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+             {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+-                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+-                int maxHeight = numCuInHeight * param->maxCUSize;
+-                intptr_t stride = curFrame->m_fencPic->m_stride;
+-                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++                pixel *edgePic = NULL;
++                pixel *gaussianPic = NULL;
++                pixel *thetaPic = NULL;
++
+                 if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++                    int maxHeight = numCuInHeight * param->maxCUSize;
++                    intptr_t stride = curFrame->m_fencPic->m_stride;
++                    edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+                     edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++                }                  
+                 int blockXY = 0, inclinedEdge = 0;
+                 double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+                             energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+                             if (param->rc.aqMode == X265_AQ_EDGE)
+                             {
+-                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+-                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+                                 edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+                                 if (edgeDensity)
+                                 {
+@@ -542,9 +549,13 @@
+                 else
+                     strength = param->rc.aqStrength * 1.0397f;
+-                X265_FREE(edgePic);
+-                X265_FREE(gaussianPic);
+-                X265_FREE(thetaPic);
++                if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    X265_FREE(edgePic);
++                    X265_FREE(gaussianPic);
++                    X265_FREE(thetaPic);
++                }
++
+                 blockXY = 0;
+                 for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+                 {
diff --git a/contrib/x265_10bit/A00-fix-aq-slowdown.patch b/contrib/x265_10bit/A00-fix-aq-slowdown.patch
new file mode 100644 (file)
index 0000000..b0321f3
--- /dev/null
@@ -0,0 +1,73 @@
+# HG changeset patch
+# User joelgalid@gmail.com
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent  329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+             {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+-                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+-                int maxHeight = numCuInHeight * param->maxCUSize;
+-                intptr_t stride = curFrame->m_fencPic->m_stride;
+-                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++                pixel *edgePic = NULL;
++                pixel *gaussianPic = NULL;
++                pixel *thetaPic = NULL;
++
+                 if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++                    int maxHeight = numCuInHeight * param->maxCUSize;
++                    intptr_t stride = curFrame->m_fencPic->m_stride;
++                    edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+                     edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++                }                  
+                 int blockXY = 0, inclinedEdge = 0;
+                 double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+                             energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+                             if (param->rc.aqMode == X265_AQ_EDGE)
+                             {
+-                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+-                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+                                 edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+                                 if (edgeDensity)
+                                 {
+@@ -542,9 +549,13 @@
+                 else
+                     strength = param->rc.aqStrength * 1.0397f;
+-                X265_FREE(edgePic);
+-                X265_FREE(gaussianPic);
+-                X265_FREE(thetaPic);
++                if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    X265_FREE(edgePic);
++                    X265_FREE(gaussianPic);
++                    X265_FREE(thetaPic);
++                }
++
+                 blockXY = 0;
+                 for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+                 {
diff --git a/contrib/x265_12bit/A00-fix-aq-slowdown.patch b/contrib/x265_12bit/A00-fix-aq-slowdown.patch
new file mode 100644 (file)
index 0000000..b0321f3
--- /dev/null
@@ -0,0 +1,73 @@
+# HG changeset patch
+# User joelgalid@gmail.com
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent  329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+             {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+-                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+-                int maxHeight = numCuInHeight * param->maxCUSize;
+-                intptr_t stride = curFrame->m_fencPic->m_stride;
+-                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++                pixel *edgePic = NULL;
++                pixel *gaussianPic = NULL;
++                pixel *thetaPic = NULL;
++
+                 if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++                    int maxHeight = numCuInHeight * param->maxCUSize;
++                    intptr_t stride = curFrame->m_fencPic->m_stride;
++                    edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+                     edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++                }                  
+                 int blockXY = 0, inclinedEdge = 0;
+                 double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+                             energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+                             if (param->rc.aqMode == X265_AQ_EDGE)
+                             {
+-                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+-                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+                                 edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+                                 if (edgeDensity)
+                                 {
+@@ -542,9 +549,13 @@
+                 else
+                     strength = param->rc.aqStrength * 1.0397f;
+-                X265_FREE(edgePic);
+-                X265_FREE(gaussianPic);
+-                X265_FREE(thetaPic);
++                if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    X265_FREE(edgePic);
++                    X265_FREE(gaussianPic);
++                    X265_FREE(thetaPic);
++                }
++
+                 blockXY = 0;
+                 for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+                 {
diff --git a/contrib/x265_8bit/A00-fix-aq-slowdown.patch b/contrib/x265_8bit/A00-fix-aq-slowdown.patch
new file mode 100644 (file)
index 0000000..b0321f3
--- /dev/null
@@ -0,0 +1,73 @@
+# HG changeset patch
+# User joelgalid@gmail.com
+# Date 1568357212 -19800
+# Branch Release_3.2
+# Node ID cdd80b53c90d224fd9281ad13de3ca9a1b6e1d39
+# Parent  329345aea40f95dc94aedf3c7cdb098bbb6d34f1
+Fix: AQ mode 4 commit (21db162) introduces slowdown even is not used AQ mode 4.
+
+diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
+--- a/source/encoder/slicetype.cpp
++++ b/source/encoder/slicetype.cpp
+@@ -480,17 +480,24 @@
+             {
+ #define AQ_EDGE_BIAS 0.5
+ #define EDGE_INCLINATION 45
+-                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
+-                int maxHeight = numCuInHeight * param->maxCUSize;
+-                intptr_t stride = curFrame->m_fencPic->m_stride;
+-                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
+-                memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+-                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++
++                pixel *edgePic = NULL;
++                pixel *gaussianPic = NULL;
++                pixel *thetaPic = NULL;
++
+                 if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
++                    int maxHeight = numCuInHeight * param->maxCUSize;
++                    intptr_t stride = curFrame->m_fencPic->m_stride;
++                    edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
++                    memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
++                    memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+                     edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);
++                }                  
+                 int blockXY = 0, inclinedEdge = 0;
+                 double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
+@@ -507,8 +514,8 @@
+                             energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
+                             if (param->rc.aqMode == X265_AQ_EDGE)
+                             {
+-                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
+-                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
++                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
+                                 edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+                                 if (edgeDensity)
+                                 {
+@@ -542,9 +549,13 @@
+                 else
+                     strength = param->rc.aqStrength * 1.0397f;
+-                X265_FREE(edgePic);
+-                X265_FREE(gaussianPic);
+-                X265_FREE(thetaPic);
++                if (param->rc.aqMode == X265_AQ_EDGE)
++                {
++                    X265_FREE(edgePic);
++                    X265_FREE(gaussianPic);
++                    X265_FREE(thetaPic);
++                }
++
+                 blockXY = 0;
+                 for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
+                 {