val1 = (src[cfilterPos[h] * dstChrStride + w] + local_up_dither[ (w + 3) & 7] ) >> 7;\r
dst[h * dstChrStride + w] = ((val1&(~0xFF)) ? ((-val1) >> 31) : (val1));\r
\r
- }
- );
-
-char *kernel_src_scale = KERNEL (
-
-__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void frame_scale(__global uchar *dst,
- __global const uchar *src,
- const float xscale,
- const float yscale,
- const int srcPlaneOffset0,
- const int srcPlaneOffset1,
- const int srcPlaneOffset2,
- const int dstPlaneOffset0,
- const int dstPlaneOffset1,
- const int dstPlaneOffset2,
- const int srcRowWords0,
- const int srcRowWords1,
- const int srcRowWords2,
- const int dstRowWords0,
- const int dstRowWords1,
- const int dstRowWords2,
- const int srcWidth,
- const int srcHeight,
- const int dstWidth,
- const int dstHeight,
- __global const float4* restrict xweights,
- __global const float4* restrict yweights
- )
-{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
- const int z = get_global_id(2);
-
- // Abort work items outside the dst image bounds.
-
- if ((get_group_id(0) * 64 >= (dstWidth >> ((z == 0) ? 0 : 1))) || (get_group_id(1) * 16 >= (dstHeight >> ((z == 0) ? 0 : 1))))
- return;
-
- const int srcPlaneOffset = (z == 0) ? srcPlaneOffset0 : ((z == 1) ? srcPlaneOffset1 : srcPlaneOffset2);
- const int dstPlaneOffset = (z == 0) ? dstPlaneOffset0 : ((z == 1) ? dstPlaneOffset1 : dstPlaneOffset2);
- const int srcRowWords = (z == 0) ? srcRowWords0: ((z == 1) ? srcRowWords1 : srcRowWords2);
- const int dstRowWords = (z == 0) ? dstRowWords0: ((z == 1) ? dstRowWords1 : dstRowWords2);
-
- __local uchar pixels[64 * 36];
- const int localRowPixels = 64;
- const int groupHeight = 16; // src pixel height output by the workgroup
- const int ypad = 2;
- const int localx = get_local_id(0);
-
- const int globalStartRow = floor((get_group_id(1) * groupHeight) / yscale);
- const int globalRowCount = ceil(groupHeight / yscale) + 2 * ypad;
-
- float4 weights = xweights[x];
- int4 woffs = floor(x / xscale);
- woffs += (int4)(-1, 0, 1, 2);
- woffs = clamp(woffs, 0, (srcWidth >> ((z == 0) ? 0 : 1)) - 1);
- const int maxy = (srcHeight >> ((z == 0) ? 0 : 1)) - 1;
-
- // Scale x from global into LDS
-
- for (int i = 0; i <= globalRowCount; ++i) {
- int4 offs = srcPlaneOffset + clamp(globalStartRow - ypad + i, 0, maxy) * srcRowWords;
- offs += woffs;
- pixels[localx + i * localRowPixels] = convert_uchar(clamp(round(dot(weights,
- (float4)(src[offs.x], src[offs.y], src[offs.z], src[offs.w]))), 0.0f, 255.0f));
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // Scale y from LDS into global
-
- if (x >= dstWidth >> ((z == 0) ? 0 : 1))
- return;
-
- int off = dstPlaneOffset + x + (get_group_id(1) * groupHeight) * dstRowWords;
-
- for (int i = 0; i < groupHeight; ++i) {
- if (y >= dstHeight >> ((z == 0) ? 0 : 1))
- break;
- int localy = floor((get_group_id(1) * groupHeight + i) / yscale);
- localy = localy - globalStartRow + ypad;
- int loff = localx + localy * localRowPixels;
- dst[off] = convert_uchar(clamp(round(dot(yweights[get_group_id(1) * groupHeight + i],
- (float4)(pixels[loff - localRowPixels], pixels[loff], pixels[loff + localRowPixels]
- , pixels[loff + localRowPixels * 2]))), 0.0f, 255.0f));
- off += dstRowWords;
- }
-}
-);
-
-
-char *kernel_src_yadif_filter = KERNEL(
- void filter_v6(
- global unsigned char *dst,
+ }\r
+ );\r
+\r
+char *kernel_src_scale = KERNEL (\r
+\r
+__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void frame_scale(__global uchar *dst,\r
+ __global const uchar *src,\r
+ const float xscale,\r
+ const float yscale,\r
+ const int srcPlaneOffset0,\r
+ const int srcPlaneOffset1,\r
+ const int srcPlaneOffset2,\r
+ const int dstPlaneOffset0,\r
+ const int dstPlaneOffset1,\r
+ const int dstPlaneOffset2,\r
+ const int srcRowWords0,\r
+ const int srcRowWords1,\r
+ const int srcRowWords2,\r
+ const int dstRowWords0,\r
+ const int dstRowWords1,\r
+ const int dstRowWords2,\r
+ const int srcWidth,\r
+ const int srcHeight,\r
+ const int dstWidth,\r
+ const int dstHeight,\r
+ __global const float4* restrict xweights,\r
+ __global const float4* restrict yweights\r
+ )\r
+{\r
+ const int x = get_global_id(0);\r
+ const int y = get_global_id(1);\r
+ const int z = get_global_id(2);\r
+\r
+ // Abort work items outside the dst image bounds.\r
+\r
+ if ((get_group_id(0) * 64 >= (dstWidth >> ((z == 0) ? 0 : 1))) || (get_group_id(1) * 16 >= (dstHeight >> ((z == 0) ? 0 : 1))))\r
+ return;\r
+\r
+ const int srcPlaneOffset = (z == 0) ? srcPlaneOffset0 : ((z == 1) ? srcPlaneOffset1 : srcPlaneOffset2);\r
+ const int dstPlaneOffset = (z == 0) ? dstPlaneOffset0 : ((z == 1) ? dstPlaneOffset1 : dstPlaneOffset2);\r
+ const int srcRowWords = (z == 0) ? srcRowWords0: ((z == 1) ? srcRowWords1 : srcRowWords2);\r
+ const int dstRowWords = (z == 0) ? dstRowWords0: ((z == 1) ? dstRowWords1 : dstRowWords2);\r
+\r
+ __local uchar pixels[64 * 36];\r
+ const int localRowPixels = 64;\r
+ const int groupHeight = 16; // src pixel height output by the workgroup\r
+ const int ypad = 2;\r
+ const int localx = get_local_id(0);\r
+\r
+ const int globalStartRow = floor((get_group_id(1) * groupHeight) / yscale);\r
+ const int globalRowCount = ceil(groupHeight / yscale) + 2 * ypad;\r
+\r
+ float4 weights = xweights[x];\r
+ int4 woffs = floor(x / xscale);\r
+ woffs += (int4)(-1, 0, 1, 2);\r
+ woffs = clamp(woffs, 0, (srcWidth >> ((z == 0) ? 0 : 1)) - 1);\r
+ const int maxy = (srcHeight >> ((z == 0) ? 0 : 1)) - 1;\r
+\r
+ // Scale x from global into LDS\r
+\r
+ for (int i = 0; i <= globalRowCount; ++i) {\r
+ int4 offs = srcPlaneOffset + clamp(globalStartRow - ypad + i, 0, maxy) * srcRowWords;\r
+ offs += woffs;\r
+ pixels[localx + i * localRowPixels] = convert_uchar(clamp(round(dot(weights,\r
+ (float4)(src[offs.x], src[offs.y], src[offs.z], src[offs.w]))), 0.0f, 255.0f));\r
+ }\r
+ \r
+ barrier(CLK_LOCAL_MEM_FENCE);\r
+\r
+ // Scale y from LDS into global\r
+\r
+ if (x >= dstWidth >> ((z == 0) ? 0 : 1))\r
+ return;\r
+\r
+ int off = dstPlaneOffset + x + (get_group_id(1) * groupHeight) * dstRowWords;\r
+\r
+ for (int i = 0; i < groupHeight; ++i) {\r
+ if (y >= dstHeight >> ((z == 0) ? 0 : 1))\r
+ break;\r
+ int localy = floor((get_group_id(1) * groupHeight + i) / yscale);\r
+ localy = localy - globalStartRow + ypad;\r
+ int loff = localx + localy * localRowPixels;\r
+ dst[off] = convert_uchar(clamp(round(dot(yweights[get_group_id(1) * groupHeight + i],\r
+ (float4)(pixels[loff - localRowPixels], pixels[loff], pixels[loff + localRowPixels]\r
+ , pixels[loff + localRowPixels * 2]))), 0.0f, 255.0f));\r
+ off += dstRowWords;\r
+ }\r
+}\r
+);\r
+\r
+\r
+char *kernel_src_yadif_filter = KERNEL(\r
+ void filter_v6(\r
+ global unsigned char *dst,\r
global unsigned char *prev, \r
global unsigned char *cur, \r
global unsigned char *next,\r