#include "common.h"
-static const int8_t x264_cabac_context_init_I[460][2] =
+static const int8_t x264_cabac_context_init_I[1024][2] =
{
/* 0 - 10 */
{ 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 },
{ -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 },
{ -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 },
{ 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 },
- { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }
+ { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 },
+
+ /* 460 -> 1024 */
+ { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+ { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 },
+ { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 },
+ { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+ { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 },
+ { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 },
+ { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 },
+ { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 },
+ { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 },
+ { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 },
+ { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 },
+ { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 },
+ { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 },
+ { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+ { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 },
+ { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 },
+ { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 },
+ { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 },
+ { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 },
+ { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 },
+ { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 },
+ { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 },
+ { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 },
+ { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 },
+ { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+ { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 },
+ { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 },
+ { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 },
+ { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 },
+ { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 },
+ { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 },
+ { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 },
+ { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 },
+ { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 },
+ { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 },
+ { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 },
+ { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 },
+ { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 },
+ { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 },
+ { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 },
+ { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 },
+ { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 },
+ { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 },
+ { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 },
+ { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 },
+ { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 },
+ { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 },
+ { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 },
+ { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 },
+ { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 },
+ { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 },
+ { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 },
+ { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 },
+ { -23, 68 }, { -24, 50 }, { -11, 74 }, { -14, 106 },
+ { -13, 97 }, { -15, 90 }, { -12, 90 }, { -18, 88 },
+ { -10, 73 }, { -9, 79 }, { -14, 86 }, { -10, 73 },
+ { -10, 70 }, { -10, 69 }, { -5, 66 }, { -9, 64 },
+ { -5, 58 }, { 2, 59 }, { 23, -13 }, { 26, -13 },
+ { 40, -15 }, { 49, -14 }, { 44, 3 }, { 45, 6 },
+ { 44, 34 }, { 33, 54 }, { 19, 82 }, { 21, -10 },
+ { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 },
+ { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 },
+ { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 },
+ { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 },
+ { 0, 68 }, { -9, 92 }, { -17, 120 }, { -20, 112 },
+ { -18, 114 }, { -11, 85 }, { -15, 92 }, { -14, 89 },
+ { -26, 71 }, { -15, 81 }, { -14, 80 }, { 0, 68 },
+ { -14, 70 }, { -24, 56 }, { -23, 68 }, { -24, 50 },
+ { -11, 74 }, { -14, 106 }, { -13, 97 }, { -15, 90 },
+ { -12, 90 }, { -18, 88 }, { -10, 73 }, { -9, 79 },
+ { -14, 86 }, { -10, 73 }, { -10, 70 }, { -10, 69 },
+ { -5, 66 }, { -9, 64 }, { -5, 58 }, { 2, 59 },
+ { 23, -13 }, { 26, -13 }, { 40, -15 }, { 49, -14 },
+ { 44, 3 }, { 45, 6 }, { 44, 34 }, { 33, 54 },
+ { 19, 82 }, { 21, -10 }, { 24, -11 }, { 28, -8 },
+ { 28, -1 }, { 29, 3 }, { 29, 9 }, { 35, 20 },
+ { 29, 36 }, { 14, 67 }, { -3, 75 }, { -1, 23 },
+ { 1, 34 }, { 1, 43 }, { 0, 54 }, { -2, 55 },
+ { 0, 61 }, { 1, 64 }, { 0, 68 }, { -9, 92 },
+ { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 },
+ { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 },
+ { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 },
+ { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 },
+ { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 },
+ { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 },
+ { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 },
+ { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 },
+ { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 },
+ { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 },
+ { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 },
+ { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 },
+ { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 },
+ { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 },
+ { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 },
+ { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 },
+ { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 },
+ { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 },
+ { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 },
+ { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 },
+ { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 },
+ { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 },
+ { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 },
+ { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 },
+ { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 },
+ { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 },
+ { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 },
+ { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 },
+ { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 },
+ { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 },
+ { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 },
+ { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 },
+ { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 },
+ { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 },
+ { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 },
+ { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 },
+ { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 },
+ { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 },
+ { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 },
+ { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 },
+ { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 },
+ { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 },
+ { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 },
+ { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 },
+ { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 },
+ { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 },
+ { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 },
+ { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 },
+ { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 },
+ { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 },
+ { -4, 65 }, { -12, 73 }, { -8, 76 }, { -7, 80 },
+ { -9, 88 }, { -17, 110 }, { -3, 71 }, { -6, 42 },
+ { -5, 50 }, { -3, 54 }, { -2, 62 }, { 0, 58 },
+ { 1, 63 }, { -2, 72 }, { -1, 74 }, { -9, 91 },
+ { -5, 67 }, { -5, 27 }, { -3, 39 }, { -2, 44 },
+ { 0, 46 }, { -16, 64 }, { -8, 68 }, { -10, 78 },
+ { -6, 77 }, { -10, 86 }, { -12, 92 }, { -15, 55 },
+ { -10, 60 }, { -6, 62 }, { -4, 65 }, { -12, 73 },
+ { -8, 76 }, { -7, 80 }, { -9, 88 }, { -17, 110 },
+ { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 },
+ { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 },
+ { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }
};
-static const int8_t x264_cabac_context_init_PB[3][460][2] =
+static const int8_t x264_cabac_context_init_PB[3][1024][2] =
{
/* i_cabac_init_idc == 0 */
{
{ -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 },
{ 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 },
{ 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 },
+
+ /* 460 - 1024 */
+ { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 },
+ { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 },
+ { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 },
+ { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 },
+ { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 },
+ { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 },
+ { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 },
+ { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 },
+ { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 },
+ { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 },
+ { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 },
+ { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 },
+ { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 },
+ { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 },
+ { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 },
+ { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 },
+ { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 },
+ { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 },
+ { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 },
+ { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 },
+ { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 },
+ { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 },
+ { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 },
+ { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 },
+ { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 },
+ { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 },
+ { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 },
+ { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 },
+ { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 },
+ { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 },
+ { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 },
+ { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 },
+ { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 },
+ { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 },
+ { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 },
+ { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 },
+ { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 },
+ { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 },
+ { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 },
+ { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 },
+ { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 },
+ { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 },
+ { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 },
+ { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 },
+ { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 },
+ { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 },
+ { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 },
+ { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 },
+ { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 },
+ { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 },
+ { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 },
+ { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 },
+ { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 },
+ { -16, 66 }, { -22, 65 }, { -20, 63 }, { -5, 85 },
+ { -6, 81 }, { -10, 77 }, { -7, 81 }, { -17, 80 },
+ { -18, 73 }, { -4, 74 }, { -10, 83 }, { -9, 71 },
+ { -9, 67 }, { -1, 61 }, { -8, 66 }, { -14, 66 },
+ { 0, 59 }, { 2, 59 }, { 9, -2 }, { 26, -9 },
+ { 33, -9 }, { 39, -7 }, { 41, -2 }, { 45, 3 },
+ { 49, 9 }, { 45, 27 }, { 36, 59 }, { 21, -13 },
+ { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 },
+ { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 },
+ { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 },
+ { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 },
+ { -8, 66 }, { -8, 76 }, { -4, 79 }, { -7, 71 },
+ { -5, 69 }, { -9, 70 }, { -8, 66 }, { -10, 68 },
+ { -19, 73 }, { -12, 69 }, { -16, 70 }, { -15, 67 },
+ { -20, 62 }, { -19, 70 }, { -16, 66 }, { -22, 65 },
+ { -20, 63 }, { -5, 85 }, { -6, 81 }, { -10, 77 },
+ { -7, 81 }, { -17, 80 }, { -18, 73 }, { -4, 74 },
+ { -10, 83 }, { -9, 71 }, { -9, 67 }, { -1, 61 },
+ { -8, 66 }, { -14, 66 }, { 0, 59 }, { 2, 59 },
+ { 9, -2 }, { 26, -9 }, { 33, -9 }, { 39, -7 },
+ { 41, -2 }, { 45, 3 }, { 49, 9 }, { 45, 27 },
+ { 36, 59 }, { 21, -13 }, { 33, -14 }, { 39, -7 },
+ { 46, -2 }, { 51, 2 }, { 60, 6 }, { 61, 17 },
+ { 55, 34 }, { 42, 62 }, { -6, 66 }, { -7, 35 },
+ { -7, 42 }, { -8, 45 }, { -5, 48 }, { -12, 56 },
+ { -6, 60 }, { -5, 62 }, { -8, 66 }, { -8, 76 },
+ { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 },
+ { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 },
+ { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+ { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 },
+ { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 },
+ { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 },
+ { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 },
+ { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 },
+ { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 },
+ { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 },
+ { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 },
+ { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 },
+ { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 },
+ { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+ { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 },
+ { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 },
+ { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 },
+ { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 },
+ { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 },
+ { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 },
+ { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 },
+ { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 },
+ { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 },
+ { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 },
+ { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 },
+ { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 },
+ { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 },
+ { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 },
+ { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 },
+ { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 },
+ { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 },
+ { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 },
+ { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 },
+ { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 },
+ { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 },
+ { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 },
+ { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 },
+ { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 },
+ { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 },
+ { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 },
+ { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 },
+ { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 },
+ { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 },
+ { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 },
+ { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 },
+ { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 },
+ { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 },
+ { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 },
+ { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 },
+ { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 },
+ { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 },
+ { -3, 74 }, { -10, 90 }, { -6, 76 }, { -2, 44 },
+ { 0, 45 }, { 0, 52 }, { -3, 64 }, { -2, 59 },
+ { -4, 70 }, { -4, 75 }, { -8, 82 }, { -17, 102 },
+ { -9, 77 }, { 3, 24 }, { 0, 42 }, { 0, 48 },
+ { 0, 55 }, { -6, 59 }, { -7, 71 }, { -12, 83 },
+ { -11, 87 }, { -30, 119 }, { 1, 58 }, { -3, 29 },
+ { -1, 36 }, { 1, 38 }, { 2, 43 }, { -6, 55 },
+ { 0, 58 }, { 0, 64 }, { -3, 74 }, { -10, 90 },
+ { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 },
+ { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 },
+ { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }
},
/* i_cabac_init_idc == 1 */
{ -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 },
{ 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
{ 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
+
+ /* 460 - 1024 */
+ { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 },
+ { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 },
+ { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 },
+ { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 },
+ { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 },
+ { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 },
+ { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 },
+ { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 },
+ { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 },
+ { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 },
+ { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 },
+ { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 },
+ { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 },
+ { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 },
+ { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 },
+ { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 },
+ { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 },
+ { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 },
+ { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 },
+ { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 },
+ { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 },
+ { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 },
+ { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 },
+ { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 },
+ { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 },
+ { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 },
+ { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 },
+ { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 },
+ { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 },
+ { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 },
+ { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 },
+ { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 },
+ { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 },
+ { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 },
+ { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 },
+ { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 },
+ { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 },
+ { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 },
+ { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 },
+ { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 },
+ { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 },
+ { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 },
+ { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 },
+ { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 },
+ { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 },
+ { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 },
+ { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 },
+ { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 },
+ { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 },
+ { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 },
+ { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
+ { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 },
+ { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
+ { -14, 66 }, { 0, 59 }, { 2, 59 }, { -3, 81 },
+ { -3, 76 }, { -7, 72 }, { -6, 78 }, { -12, 72 },
+ { -14, 68 }, { -3, 70 }, { -6, 76 }, { -5, 66 },
+ { -5, 62 }, { 0, 57 }, { -4, 61 }, { -9, 60 },
+ { 1, 54 }, { 2, 58 }, { 17, -10 }, { 32, -13 },
+ { 42, -9 }, { 49, -5 }, { 53, 0 }, { 64, 3 },
+ { 68, 10 }, { 66, 27 }, { 47, 57 }, { 17, -10 },
+ { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
+ { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
+ { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 },
+ { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 },
+ { -4, 67 }, { -7, 82 }, { -5, 85 }, { -6, 81 },
+ { -10, 77 }, { -7, 81 }, { -17, 80 }, { -18, 73 },
+ { -4, 74 }, { -10, 83 }, { -9, 71 }, { -9, 67 },
+ { -1, 61 }, { -8, 66 }, { -14, 66 }, { 0, 59 },
+ { 2, 59 }, { -3, 81 }, { -3, 76 }, { -7, 72 },
+ { -6, 78 }, { -12, 72 }, { -14, 68 }, { -3, 70 },
+ { -6, 76 }, { -5, 66 }, { -5, 62 }, { 0, 57 },
+ { -4, 61 }, { -9, 60 }, { 1, 54 }, { 2, 58 },
+ { 17, -10 }, { 32, -13 }, { 42, -9 }, { 49, -5 },
+ { 53, 0 }, { 64, 3 }, { 68, 10 }, { 66, 27 },
+ { 47, 57 }, { 17, -10 }, { 32, -13 }, { 42, -9 },
+ { 49, -5 }, { 53, 0 }, { 64, 3 }, { 68, 10 },
+ { 66, 27 }, { 47, 57 }, { -5, 71 }, { 0, 24 },
+ { -1, 36 }, { -2, 42 }, { -2, 52 }, { -9, 57 },
+ { -6, 63 }, { -4, 65 }, { -4, 67 }, { -7, 82 },
+ { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+ { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+ { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+ { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 },
+ { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 },
+ { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 },
+ { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 },
+ { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 },
+ { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 },
+ { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 },
+ { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 },
+ { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+ { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+ { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+ { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 },
+ { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 },
+ { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 },
+ { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 },
+ { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 },
+ { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 },
+ { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 },
+ { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 },
+ { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 },
+ { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 },
+ { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 },
+ { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 },
+ { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 },
+ { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 },
+ { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 },
+ { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 },
+ { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 },
+ { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 },
+ { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 },
+ { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 },
+ { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 },
+ { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 },
+ { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 },
+ { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 },
+ { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 },
+ { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 },
+ { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 },
+ { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 },
+ { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 },
+ { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 },
+ { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 },
+ { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 },
+ { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 },
+ { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 },
+ { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+ { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 },
+ { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 },
+ { -5, 74 }, { -9, 86 }, { -23, 112 }, { -15, 71 },
+ { -7, 61 }, { 0, 53 }, { -5, 66 }, { -11, 77 },
+ { -9, 80 }, { -9, 84 }, { -10, 87 }, { -34, 127 },
+ { -21, 101 }, { -3, 39 }, { -5, 53 }, { -7, 61 },
+ { -11, 75 }, { -15, 77 }, { -17, 91 }, { -25, 107 },
+ { -25, 111 }, { -28, 122 }, { -11, 76 }, { -10, 44 },
+ { -10, 52 }, { -10, 57 }, { -9, 58 }, { -16, 72 },
+ { -7, 69 }, { -4, 69 }, { -5, 74 }, { -9, 86 },
+ { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 },
+ { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 },
+ { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }
},
/* i_cabac_init_idc == 2 */
{ -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
{ 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
{ 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
+
+ /* 460 - 1024 */
+ { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 },
+ { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 },
+ { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 },
+ { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 },
+ { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 },
+ { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 },
+ { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 },
+ { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 },
+ { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 },
+ { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 },
+ { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 },
+ { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 },
+ { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 },
+ { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 },
+ { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 },
+ { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 },
+ { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 },
+ { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 },
+ { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 },
+ { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 },
+ { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 },
+ { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 },
+ { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 },
+ { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 },
+ { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 },
+ { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 },
+ { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 },
+ { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 },
+ { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 },
+ { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 },
+ { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 },
+ { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 },
+ { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 },
+ { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 },
+ { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 },
+ { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 },
+ { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 },
+ { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 },
+ { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 },
+ { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 },
+ { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 },
+ { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 },
+ { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 },
+ { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 },
+ { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 },
+ { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 },
+ { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 },
+ { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 },
+ { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 },
+ { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 },
+ { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
+ { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
+ { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
+ { -14, 59 }, { -9, 52 }, { -11, 68 }, { -3, 78 },
+ { -8, 74 }, { -9, 72 }, { -10, 72 }, { -18, 75 },
+ { -12, 71 }, { -11, 63 }, { -5, 70 }, { -17, 75 },
+ { -14, 72 }, { -16, 67 }, { -8, 53 }, { -14, 59 },
+ { -9, 52 }, { -11, 68 }, { 9, -2 }, { 30, -10 },
+ { 31, -4 }, { 33, -1 }, { 33, 7 }, { 31, 12 },
+ { 37, 23 }, { 31, 38 }, { 20, 64 }, { 9, -2 },
+ { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
+ { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
+ { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 },
+ { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 },
+ { -6, 68 }, { -10, 79 }, { -3, 78 }, { -8, 74 },
+ { -9, 72 }, { -10, 72 }, { -18, 75 }, { -12, 71 },
+ { -11, 63 }, { -5, 70 }, { -17, 75 }, { -14, 72 },
+ { -16, 67 }, { -8, 53 }, { -14, 59 }, { -9, 52 },
+ { -11, 68 }, { -3, 78 }, { -8, 74 }, { -9, 72 },
+ { -10, 72 }, { -18, 75 }, { -12, 71 }, { -11, 63 },
+ { -5, 70 }, { -17, 75 }, { -14, 72 }, { -16, 67 },
+ { -8, 53 }, { -14, 59 }, { -9, 52 }, { -11, 68 },
+ { 9, -2 }, { 30, -10 }, { 31, -4 }, { 33, -1 },
+ { 33, 7 }, { 31, 12 }, { 37, 23 }, { 31, 38 },
+ { 20, 64 }, { 9, -2 }, { 30, -10 }, { 31, -4 },
+ { 33, -1 }, { 33, 7 }, { 31, 12 }, { 37, 23 },
+ { 31, 38 }, { 20, 64 }, { -9, 71 }, { -7, 37 },
+ { -8, 44 }, { -11, 49 }, { -10, 56 }, { -12, 59 },
+ { -8, 63 }, { -9, 67 }, { -6, 68 }, { -10, 79 },
+ { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+ { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+ { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+ { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 },
+ { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 },
+ { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 },
+ { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 },
+ { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 },
+ { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 },
+ { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 },
+ { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 },
+ { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+ { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+ { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+ { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 },
+ { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 },
+ { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 },
+ { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 },
+ { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 },
+ { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 },
+ { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 },
+ { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 },
+ { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 },
+ { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 },
+ { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 },
+ { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 },
+ { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 },
+ { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 },
+ { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 },
+ { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 },
+ { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 },
+ { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 },
+ { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 },
+ { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 },
+ { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 },
+ { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 },
+ { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 },
+ { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 },
+ { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 },
+ { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 },
+ { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 },
+ { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 },
+ { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 },
+ { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 },
+ { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 },
+ { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 },
+ { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 },
+ { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 },
+ { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 },
+ { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 },
+ { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 },
+ { -12, 92 }, { -18, 108 }, { -24, 115 }, { -22, 82 },
+ { -9, 62 }, { 0, 53 }, { 0, 59 }, { -14, 85 },
+ { -13, 89 }, { -13, 94 }, { -11, 92 }, { -29, 127 },
+ { -21, 100 }, { -14, 57 }, { -12, 67 }, { -11, 71 },
+ { -10, 77 }, { -21, 85 }, { -16, 88 }, { -23, 104 },
+ { -15, 98 }, { -37, 127 }, { -10, 82 }, { -8, 48 },
+ { -8, 61 }, { -8, 66 }, { -7, 70 }, { -14, 75 },
+ { -10, 79 }, { -9, 83 }, { -12, 92 }, { -18, 108 },
+ { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 },
+ { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 },
+ { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }
}
};
FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000)
};
-uint8_t x264_cabac_contexts[4][QP_MAX_SPEC+1][460];
+uint8_t x264_cabac_contexts[4][QP_MAX_SPEC+1][1024];
-void x264_cabac_init( void )
+void x264_cabac_init( x264_t *h )
{
+ int ctx_count = CHROMA444 ? 1024 : 460;
for( int i = 0; i < 4; i++ )
{
- const int8_t (*cabac_context_init)[460][2] = i == 0 ? &x264_cabac_context_init_I
- : &x264_cabac_context_init_PB[i-1];
+ const int8_t (*cabac_context_init)[1024][2] = i == 0 ? &x264_cabac_context_init_I
+ : &x264_cabac_context_init_PB[i-1];
for( int qp = 0; qp <= QP_MAX_SPEC; qp++ )
- for( int j = 0; j < 460; j++ )
+ for( int j = 0; j < ctx_count; j++ )
{
int state = x264_clip3( (((*cabac_context_init)[j][0] * qp) >> 4) + (*cabac_context_init)[j][1], 1, 126 );
x264_cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
/*****************************************************************************
*
*****************************************************************************/
-void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
+void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
{
- memcpy( cb->state, x264_cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], 460 );
+ memcpy( cb->state, x264_cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], CHROMA444 ? 1024 : 460 );
}
void x264_cabac_encode_init_core( x264_cabac_t *cb )
ALIGNED_16( int f8_bits_encoded ); // only if using x264_cabac_size_decision()
/* context */
- uint8_t state[460];
+ uint8_t state[1024];
+
+ /* for 16-byte alignment */
+ uint8_t padding[12];
} x264_cabac_t;
extern const uint8_t x264_cabac_transition[128][2];
extern const uint16_t x264_cabac_entropy[128];
/* init the contexts given i_slice_type, the quantif and the model */
-void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
+void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
void x264_cabac_encode_init_core( x264_cabac_t *cb );
void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
param->i_cqm_preset = X264_CQM_FLAT;
memset( param->cqm_4iy, 16, sizeof( param->cqm_4iy ) );
- memset( param->cqm_4ic, 16, sizeof( param->cqm_4ic ) );
memset( param->cqm_4py, 16, sizeof( param->cqm_4py ) );
+ memset( param->cqm_4ic, 16, sizeof( param->cqm_4ic ) );
memset( param->cqm_4pc, 16, sizeof( param->cqm_4pc ) );
memset( param->cqm_8iy, 16, sizeof( param->cqm_8iy ) );
memset( param->cqm_8py, 16, sizeof( param->cqm_8py ) );
+ memset( param->cqm_8ic, 16, sizeof( param->cqm_8ic ) );
+ memset( param->cqm_8pc, 16, sizeof( param->cqm_8pc ) );
param->b_repeat_headers = 1;
param->b_annexb = 1;
{
p->i_cqm_preset = X264_CQM_CUSTOM;
b_error |= parse_cqm( value, p->cqm_4iy, 16 );
- b_error |= parse_cqm( value, p->cqm_4ic, 16 );
b_error |= parse_cqm( value, p->cqm_4py, 16 );
+ b_error |= parse_cqm( value, p->cqm_4ic, 16 );
b_error |= parse_cqm( value, p->cqm_4pc, 16 );
}
OPT("cqm8")
p->i_cqm_preset = X264_CQM_CUSTOM;
b_error |= parse_cqm( value, p->cqm_8iy, 64 );
b_error |= parse_cqm( value, p->cqm_8py, 64 );
+ b_error |= parse_cqm( value, p->cqm_8ic, 64 );
+ b_error |= parse_cqm( value, p->cqm_8pc, 64 );
}
OPT("cqm4i")
{
{
p->i_cqm_preset = X264_CQM_CUSTOM;
b_error |= parse_cqm( value, p->cqm_8iy, 64 );
+ b_error |= parse_cqm( value, p->cqm_8ic, 64 );
}
OPT("cqm8p")
{
p->i_cqm_preset = X264_CQM_CUSTOM;
b_error |= parse_cqm( value, p->cqm_8py, 64 );
+ b_error |= parse_cqm( value, p->cqm_8pc, 64 );
}
OPT("log")
p->i_log_level = atoi(value);
#define X264_BFRAME_MAX 16
#define X264_REF_MAX 16
#define X264_THREAD_MAX 128
-#define X264_PCM_COST (384*BIT_DEPTH+16)
+#define X264_PCM_COST ((384<<CHROMA444)*BIT_DEPTH+16)
#define X264_LOOKAHEAD_MAX 250
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
#define QP_MAX_SPEC (51+QP_BD_OFFSET)
# define PARAM_INTERLACED 0
#endif
+#define CHROMA444 (h->sps->i_chroma_format_idc == 3)
+
/* Unions for type-punning.
* Mn: load or store n bits, aligned, native-endian
* CPn: copy n bits, aligned, native-endian
#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
-#define X264_SCAN8_SIZE (6*8)
#define X264_SCAN8_LUMA_SIZE (5*8)
+#define X264_SCAN8_SIZE (X264_SCAN8_LUMA_SIZE*3)
#define X264_SCAN8_0 (4+1*8)
-static const unsigned x264_scan8[16+2*4+3] =
+/* Scan8 organization:
+ * 0 1 2 3 4 5 6 7
+ * 0 DY y y y y y
+ * 1 y Y Y Y Y
+ * 2 y Y Y Y Y
+ * 3 y Y Y Y Y
+ * 4 y Y Y Y Y
+ * 5 DU u u u u u
+ * 6 u U U U U
+ * 7 u U U U U
+ * 8 u U U U U
+ * 9 u U U U U
+ * 10 DV v v v v v
+ * 11 v V V V V
+ * 12 v V V V V
+ * 13 v V V V V
+ * 14 v V V V V
+ * DY/DU/DV are for luma/chroma DC.
+ */
+
+#define LUMA_DC 48
+#define CHROMA_DC 49
+
+static const uint8_t x264_scan8[16*3 + 3] =
{
- /* Luma */
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
-
- /* Cb */
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
-
- /* Cr */
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
-
- /* Luma DC */
- 4+5*8,
-
- /* Chroma DC */
- 6+5*8, 7+5*8
+ 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
+ 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
+ 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
+ 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
+ 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
+ 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
+ 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
+ 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
+ 4+11*8, 5+11*8, 4+12*8, 5+12*8,
+ 6+11*8, 7+11*8, 6+12*8, 7+12*8,
+ 4+13*8, 5+13*8, 4+14*8, 5+14*8,
+ 6+13*8, 7+13*8, 6+14*8, 7+14*8,
+ 0+ 0*8, 0+ 5*8, 0+10*8
};
-/*
- 0 1 2 3 4 5 6 7
- 0
- 1 B B L L L L
- 2 B B L L L L
- 3 L L L L
- 4 R R L L L L
- 5 R R Dy DuDv
-*/
#include "x264.h"
#include "bitstream.h"
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
void x264_cavlc_init( void );
-void x264_cabac_init( void );
+void x264_cabac_init( x264_t *h );
static ALWAYS_INLINE pixel x264_clip_pixel( int x )
{
/* quantization matrix for decoding, [cqm][qp%6][coef] */
int (*dequant4_mf[4])[16]; /* [4][6][16] */
- int (*dequant8_mf[2])[64]; /* [2][6][64] */
+ int (*dequant8_mf[4])[64]; /* [4][6][64] */
/* quantization matrix for trellis, [cqm][qp][coef] */
int (*unquant4_mf[4])[16]; /* [4][52][16] */
- int (*unquant8_mf[2])[64]; /* [2][52][64] */
+ int (*unquant8_mf[4])[64]; /* [4][52][64] */
/* quantization matrix for deadzone */
udctcoef (*quant4_mf[4])[16]; /* [4][52][16] */
- udctcoef (*quant8_mf[2])[64]; /* [2][52][64] */
+ udctcoef (*quant8_mf[4])[64]; /* [4][52][64] */
udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */
- udctcoef (*quant8_bias[2])[64]; /* [2][52][64] */
- udctcoef (*nr_offset_emergency)[3][64];
+ udctcoef (*quant8_bias[4])[64]; /* [4][52][64] */
+ udctcoef (*nr_offset_emergency)[4][64];
/* mv/ref cost arrays. */
uint16_t *cost_mv[QP_MAX+1];
/* Current MB DCT coeffs */
struct
{
- ALIGNED_16( dctcoef luma16x16_dc[16] );
+ ALIGNED_16( dctcoef luma16x16_dc[3][16] );
ALIGNED_16( dctcoef chroma_dc[2][4] );
// FIXME share memory?
- ALIGNED_16( dctcoef luma8x8[4][64] );
- ALIGNED_16( dctcoef luma4x4[16+8][16] );
+ ALIGNED_16( dctcoef luma8x8[12][64] );
+ ALIGNED_16( dctcoef luma4x4[16*3][16] );
} dct;
/* MB table and cache for current frame/mb */
int16_t *cbp; /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x0200 and 0x0400: chroma dc (all set for PCM)*/
int8_t (*intra4x4_pred_mode)[8]; /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
/* actually has only 7 entries; set to 8 for write-combining optimizations */
- uint8_t (*non_zero_count)[16+4+4]; /* nzc. for I_PCM set to 16 */
+ uint8_t (*non_zero_count)[16*3]; /* nzc. for I_PCM set to 16 */
int8_t *chroma_pred_mode; /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
uint8_t (*mvd[2])[8][2]; /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
/* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
- ALIGNED_16( pixel fenc_buf[24*FENC_STRIDE] );
- ALIGNED_16( pixel fdec_buf[27*FDEC_STRIDE] );
+ ALIGNED_16( pixel fenc_buf[48*FENC_STRIDE] );
+ ALIGNED_16( pixel fdec_buf[52*FDEC_STRIDE] );
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
/* pointer over mb of the frame to be compressed */
pixel *p_fenc[3]; /* y,u,v */
/* pointer to the actual source frame, not a block copy */
- pixel *p_fenc_plane[2]; /* y,uv */
+ pixel *p_fenc_plane[3];
/* pointer over mb of the frame to be reconstructed */
pixel *p_fdec[3];
/* pointer over mb of the references */
int i_fref[2];
- pixel *p_fref[2][X264_REF_MAX*2][4+1]; /* last: yN, yH, yV, yHV, uv */
+ /* [12]: yN, yH, yV, yHV, (NV12 ? uv : I444 ? (uN, uH, uV, uHV, vN, ...)) */
+ pixel *p_fref[2][X264_REF_MAX*2][12];
pixel *p_fref_w[X264_REF_MAX*2]; /* weighted fullpel luma */
uint16_t *p_integral[2][X264_REF_MAX];
} stat;
- /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4 */
+ /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4, 3 = chroma 8x8 */
udctcoef (*nr_offset)[64];
uint32_t (*nr_residual_sum)[64];
uint32_t *nr_count;
- ALIGNED_16( udctcoef nr_offset_denoise[3][64] );
- ALIGNED_16( uint32_t nr_residual_sum_buf[2][3][64] );
- uint32_t nr_count_buf[2][3];
+ ALIGNED_16( udctcoef nr_offset_denoise[4][64] );
+ ALIGNED_16( uint32_t nr_residual_sum_buf[2][4][64] );
+ uint32_t nr_count_buf[2][4];
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
- pixel *intra_border_backup[5][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
+ pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
/* Deblock strength values are stored for each 4x4 partition. In MBAFF
* there are four extra values that need to be stored, located in [4][i]. */
uint8_t (*deblock_strength[2])[2][8][4];
else
{
const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
for( int i = 0; i < 8; i++ )
{
for( int j = 0; j < 2; j++, mbn_xy += h->mb.i_mb_stride )
{
int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] );
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
uint32_t nnz_top[4];
nnz_top[0] = nnz[mbn_xy][3*4+0];
uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][mb_x];
pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
- pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
+ pixel *pixuv = h->fdec->plane[1] + (8<<CHROMA444)*mb_y*strideuv + 16*mb_x;
+ intptr_t uvdiff = CHROMA444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1;
if( mb_y & MB_INTERLACED )
{
pixy -= 15*stridey;
- pixuv -= 7*strideuv;
+ pixuv -= ((8<<CHROMA444)-1)*strideuv;
}
int stride2y = stridey << MB_INTERLACED;
deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\
stride2y, bs[dir][edge], qp, 0,\
h->loopf.deblock_luma##intra[dir] );\
- if( !(edge & 1) )\
+ if( CHROMA444 )\
+ {\
+ deblock_edge##intra( h, pixuv + 4*edge*(dir?stride2uv:1),\
+ stride2uv, bs[dir][edge], chroma_qp, 0,\
+ h->loopf.deblock_luma##intra[dir] );\
+ deblock_edge##intra( h, pixuv + uvdiff + 4*edge*(dir?stride2uv:1),\
+ stride2uv, bs[dir][edge], chroma_qp, 0,\
+ h->loopf.deblock_luma##intra[dir] );\
+ }\
+ else if( !(edge & 1) )\
deblock_edge##intra( h, pixuv + 2*edge*(dir?stride2uv:2),\
stride2uv, bs[dir][edge], chroma_qp, 1,\
h->loopf.deblock_chroma##intra[dir] );\
int chroma_qp[2];
int left_qp[2];
int current_qp = h->mb.qp[mb_xy];
+ x264_deblock_inter_t luma_deblock = deblock_v_luma_mbaff_c;
+ x264_deblock_inter_t chroma_deblock = CHROMA444 ? deblock_v_luma_mbaff_c : deblock_v_chroma_mbaff_c;
+ x264_deblock_intra_t luma_intra_deblock = deblock_v_luma_intra_mbaff_c;
+ x264_deblock_intra_t chroma_intra_deblock = CHROMA444 ? deblock_v_luma_intra_mbaff_c : deblock_v_chroma_intra_mbaff_c;
+ int c = CHROMA444 ? 0 : 1;
+
left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
luma_qp[0] = (current_qp + left_qp[0] + 1) >> 1;
chroma_qp[0] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
if( bs[0][0][0] == 4)
{
- deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], 0, deblock_v_luma_intra_mbaff_c );
- deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
- deblock_edge_intra( h, pixuv + 1, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
+ deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], 0, luma_intra_deblock );
+ deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], c, chroma_intra_deblock );
+ deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], c, chroma_intra_deblock );
}
else
{
- deblock_edge( h, pixy, 2*stridey, bs[0][0], luma_qp[0], 0, deblock_v_luma_mbaff_c );
- deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
- deblock_edge( h, pixuv + 1, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
+ deblock_edge( h, pixy, 2*stridey, bs[0][0], luma_qp[0], 0, luma_deblock );
+ deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], c, chroma_deblock );
+ deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], c, chroma_deblock );
}
int offy = MB_INTERLACED ? 4 : 0;
int offuv = MB_INTERLACED ? 3 : 0;
+ if( CHROMA444 ) offuv = offy;
left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
luma_qp[1] = (current_qp + left_qp[1] + 1) >> 1;
chroma_qp[1] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
if( bs[0][4][0] == 4)
{
- deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], 0, deblock_v_luma_intra_mbaff_c );
- deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
- deblock_edge_intra( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
+ deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], 0, luma_intra_deblock );
+ deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], c, chroma_intra_deblock );
+ deblock_edge_intra( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], c, chroma_intra_deblock );
}
else
{
- deblock_edge( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], 0, deblock_v_luma_mbaff_c );
- deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
- deblock_edge( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
+ deblock_edge( h, pixy + (stridey<<offy), 2*stridey, bs[0][4], luma_qp[1], 0, luma_deblock );
+ deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], c, chroma_deblock );
+ deblock_edge( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], c, chroma_deblock );
}
}
else
// deblock the first horizontal edge of the even rows, then the first horizontal edge of the odd rows
deblock_edge( h, pixy + j*stridey, 2* stridey, bs[1][4*j], qp_top, 0, deblock_v_luma_c );
- deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, 1, deblock_v_chroma_c );
+ if( CHROMA444 )
+ {
+ deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, 0, deblock_v_luma_c );
+ deblock_edge( h, pixuv + uvdiff + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, 0, deblock_v_luma_c );
+ }
+ else
+ deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, 1, deblock_v_chroma_c );
}
}
else
* TODO:
* deblock macroblock edges
* support analysis partitions smaller than 16x16
- * deblock chroma
+ * deblock chroma for 4:2:0
* handle duplicate refs correctly
* handle cavlc+8x8dct correctly
*/
{
int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
int qp = h->mb.i_qp;
+ int qpc = h->mb.i_chroma_qp;
if( qp <= qp_thresh || h->mb.i_type == P_SKIP )
return;
bs, 4 >> SLICE_MBAFF, h->sh.i_type == SLICE_TYPE_B, h );
int transform_8x8 = h->mb.b_transform_8x8;
- pixel *fdec = h->mb.pic.p_fdec[0];
#define FILTER( dir, edge )\
do\
{\
- deblock_edge( h, fdec + 4*edge*(dir?FDEC_STRIDE:1),\
+ deblock_edge( h, h->mb.pic.p_fdec[0] + 4*edge*(dir?FDEC_STRIDE:1),\
FDEC_STRIDE, bs[dir][edge], qp, 0,\
h->loopf.deblock_luma[dir] );\
+ if( CHROMA444 )\
+ {\
+ deblock_edge( h, h->mb.pic.p_fdec[1] + 4*edge*(dir?FDEC_STRIDE:1),\
+ FDEC_STRIDE, bs[dir][edge], qpc, 0,\
+ h->loopf.deblock_luma[dir] );\
+ deblock_edge( h, h->mb.pic.p_fdec[2] + 4*edge*(dir?FDEC_STRIDE:1),\
+ FDEC_STRIDE, bs[dir][edge], qpc, 0,\
+ h->loopf.deblock_luma[dir] );\
+ }\
} while(0)
if( !transform_8x8 ) FILTER( 0, 1 );
return x;
}
-x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
+static int x264_frame_internal_csp( int external_csp )
{
- x264_frame_t *frame;
+ switch( external_csp & X264_CSP_MASK )
+ {
+ case X264_CSP_NV12:
+ case X264_CSP_I420:
+ case X264_CSP_YV12:
+ return X264_CSP_NV12;
+ case X264_CSP_I444:
+ case X264_CSP_YV24:
+ return X264_CSP_I444;
+ default:
+ return X264_CSP_NONE;
+ }
+}
+static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
+{
+ x264_frame_t *frame;
+ int i_csp = x264_frame_internal_csp( h->param.i_csp );
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines;
int i_padv = PADV << PARAM_INTERLACED;
- int luma_plane_size, chroma_plane_size;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
+ int luma_plane_count = i_csp == X264_CSP_NV12 ? 1 : 3;
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
i_lines = h->mb.i_mb_height*16;
i_stride = align_stride( i_width + 2*PADH, align, disalign );
- frame->i_plane = 2;
- for( int i = 0; i < 2; i++ )
+ if( i_csp == X264_CSP_NV12 )
{
- frame->i_width[i] = i_width >> i;
- frame->i_lines[i] = i_lines >> i;
- frame->i_stride[i] = i_stride;
+ frame->i_plane = 2;
+ for( int i = 0; i < 2; i++ )
+ {
+ frame->i_width[i] = i_width >> i;
+ frame->i_lines[i] = i_lines >> i;
+ frame->i_stride[i] = i_stride;
+ }
}
+ else if( i_csp == X264_CSP_I444 )
+ {
+ frame->i_plane = 3;
+ for( int i = 0; i < 3; i++ )
+ {
+ frame->i_width[i] = i_width;
+ frame->i_lines[i] = i_lines;
+ frame->i_stride[i] = i_stride;
+ }
+ }
+ else
+ goto fail;
frame->i_width_lowres = frame->i_width[0]/2;
frame->i_lines_lowres = frame->i_lines[0]/2;
frame->orig = frame;
- luma_plane_size = align_plane_size( frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv), disalign );
- chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + i_padv));
-
- CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
- frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * i_padv/2 + PADH;
- if( PARAM_INTERLACED )
+ if( i_csp == X264_CSP_NV12 )
{
- CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
- frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * i_padv/2 + PADH;
+ int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + i_padv));
+ CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
+ frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * i_padv/2 + PADH;
+ if( PARAM_INTERLACED )
+ {
+ CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
+ frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * i_padv/2 + PADH;
+ }
}
/* all 4 luma planes allocated together, since the cacheline split code
* requires them to be in-phase wrt cacheline alignment. */
- if( h->param.analyse.i_subpel_refine && b_fdec )
+
+ for( int p = 0; p < luma_plane_count; p++ )
{
- /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
- CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size * sizeof(pixel) );
- if( PARAM_INTERLACED )
- CHECKED_MALLOC( frame->buffer_fld[0], 4*luma_plane_size * sizeof(pixel) );
- for( int i = 0; i < 4; i++ )
+ int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+ if( h->param.analyse.i_subpel_refine && b_fdec )
{
- frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
- frame->filtered_fld[i] = frame->buffer_fld[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+ /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
+ CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
+ if( PARAM_INTERLACED )
+ CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
+ for( int i = 0; i < 4; i++ )
+ {
+ frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+ frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+ }
+ frame->plane[p] = frame->filtered[p][0];
+ frame->plane_fld[p] = frame->filtered_fld[p][0];
+ }
+ else
+ {
+ CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
+ if( PARAM_INTERLACED )
+ CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
+ frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
+ frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
}
- frame->plane[0] = frame->filtered[0];
- frame->plane_fld[0] = frame->filtered_fld[0];
- }
- else
- {
- CHECKED_MALLOC( frame->buffer[0], luma_plane_size * sizeof(pixel) );
- if( PARAM_INTERLACED )
- CHECKED_MALLOC( frame->buffer_fld[0], luma_plane_size * sizeof(pixel) );
- frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
- frame->filtered_fld[0] = frame->plane_fld[0] = frame->buffer_fld[0] + frame->i_stride[0] * i_padv + PADH;
}
frame->b_duplicate = 0;
{
if( h->frames.b_have_lowres )
{
- luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
+ int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
for( int i = 0; i < 4; i++ )
h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>1 );
}
- else
+ else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_YV12 )
{
get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I420 ? 1 : 2, 1, 1 );
get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I420 ? 2 : 1, 1, 1 );
(pixel*)pix[2], stride[2]/sizeof(pixel),
h->param.i_width>>1, h->param.i_height>>1 );
}
+ else //if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 )
+ {
+ get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I444 ? 1 : 2, 0, 0 );
+ get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I444 ? 2 : 1, 0, 0 );
+ h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
+ stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height );
+ h->mc.plane_copy( dst->plane[2], dst->i_stride[2], (pixel*)pix[2],
+ stride[2]/sizeof(pixel), h->param.i_width, h->param.i_height );
+ }
return 0;
}
return;
for( int i = 0; i < frame->i_plane; i++ )
{
+ int shift = i && !CHROMA444;
int stride = frame->i_stride[i];
- int width = 16*h->sps->i_mb_width;
- int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> !!i;
+ int width = 16*h->mb.i_mb_width;
+ int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> shift;
int padh = PADH;
- int padv = PADV >> !!i;
+ int padv = PADV >> shift;
// buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
if( b_end && !b_start )
- height += 4 >> (!!i + SLICE_MBAFF);
+ height += 4 >> (shift + SLICE_MBAFF);
pixel *pix;
if( SLICE_MBAFF )
{
// border samples for each field are extended separately
- pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, i );
- plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, i );
+ pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> shift);
+ plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, shift );
+ plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, shift );
- height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> !!i;
+ height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> shift;
if( b_end && !b_start )
- height += 4 >> (!!i);
- pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
+ height += 4 >> shift;
+ pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> shift);
+ plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, shift );
}
else
{
- pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
+ pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> shift);
+ plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, shift );
}
}
}
* but up to 3 of the horizontal ones may be wrong.
we want to expand border from the last filtered pixel */
int b_start = !mb_y;
- int stride = frame->i_stride[0];
int width = 16*h->mb.i_mb_width + 8;
int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF) + 16 : 16;
int padh = PADH - 4;
int padv = PADV - 8;
- for( int i = 1; i < 4; i++ )
- {
- // buffer: 8 luma, to match the hpel filter
- pixel *pix;
- if( SLICE_MBAFF )
+ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
+ for( int i = 1; i < 4; i++ )
{
- pix = frame->filtered_fld[i] + (16*mb_y - 16) * stride - 4;
- plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
- plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, 0 );
- }
+ int stride = frame->i_stride[p];
+ // buffer: 8 luma, to match the hpel filter
+ pixel *pix;
+ if( SLICE_MBAFF )
+ {
+ pix = frame->filtered_fld[p][i] + (16*mb_y - 16) * stride - 4;
+ plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
+ plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, 0 );
+ }
- pix = frame->filtered[i] + (16*mb_y - 8) * stride - 4;
- plane_expand_border( pix, stride, width, height << SLICE_MBAFF, padh, padv, b_start, b_end, 0 );
- }
+ pix = frame->filtered[p][i] + (16*mb_y - 8) * stride - 4;
+ plane_expand_border( pix, stride, width, height << SLICE_MBAFF, padh, padv, b_start, b_end, 0 );
+ }
}
void x264_frame_expand_border_lowres( x264_frame_t *frame )
for( int i = 0; i < frame->i_plane; i++ )
{
int i_width = h->param.i_width;
- int i_height = h->param.i_height >> !!i;
+ int shift = i && !CHROMA444;
+ int i_height = h->param.i_height >> shift;
int i_padx = (h->mb.i_mb_width * 16 - h->param.i_width);
- int i_pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> !!i;
+ int i_pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> shift;
if( i_padx )
{
for( int y = 0; y < i_height; y++ )
pixel_memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
- &frame->plane[i][y*frame->i_stride[i] + i_width - 1-i],
- i_padx>>i, sizeof(pixel)<<i );
+ &frame->plane[i][y*frame->i_stride[i] + i_width - 1-shift],
+ i_padx>>shift, sizeof(pixel)<<shift );
}
if( i_pady )
{
{
for( int i = 0; i < h->fenc->i_plane; i++ )
{
+ int shift = i && !CHROMA444;
int stride = h->fenc->i_stride[i];
- int height = h->param.i_height >> !!i;
- int pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> !!i;
- int mbsize = (16>>!!i);
+ int height = h->param.i_height >> shift;
+ int pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> shift;
+ int mbsize = 16>>shift;
pixel *fenc = h->fenc->plane[i] + mbsize * mb_x;
for( int y = height; y < height + pady; y++ )
memcpy( fenc + y*stride,
/* YUV buffer */
int i_plane;
- int i_stride[2];
- int i_width[2];
- int i_lines[2];
+ int i_stride[3];
+ int i_width[3];
+ int i_lines[3];
int i_stride_lowres;
int i_width_lowres;
int i_lines_lowres;
- pixel *plane[2];
- pixel *plane_fld[2];
- pixel *filtered[4]; /* plane[0], H, V, HV */
- pixel *filtered_fld[4];
+ pixel *plane[3];
+ pixel *plane_fld[3];
+ pixel *filtered[3][4]; /* plane[0], H, V, HV */
+ pixel *filtered_fld[3][4];
pixel *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
uint16_t *integral;
int bframe, x264_t *h );
} x264_deblock_function_t;
-x264_frame_t *x264_frame_new( x264_t *h, int b_fdec );
void x264_frame_delete( x264_frame_t *frame );
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src );
#include "common.h"
#include "encoder/me.h"
+#define MC_LUMA(list,p) \
+ h->mc.mc_luma( &h->mb.pic.p_fdec[p][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, \
+ &h->mb.pic.p_fref[list][i_ref][p*4], h->mb.pic.i_stride[p], \
+ mvx, mvy, 4*width, 4*height, \
+ list ? weight_none : &h->sh.weight[i_ref][p] );
+
static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
{
int i8 = x264_scan8[0]+x+8*y;
int mvx = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
int mvy = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
- h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
- h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height, &h->sh.weight[i_ref][0] );
-
- // chroma is offset if MCing from a field of opposite parity
- if( MB_INTERLACED & i_ref )
- mvy += (h->mb.i_mb_y & 1)*4 - 2;
-
- h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
- &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- h->mb.pic.p_fref[0][i_ref][4], h->mb.pic.i_stride[1],
- mvx, mvy, 2*width, 2*height );
-
- if( h->sh.weight[i_ref][1].weightfn )
- h->sh.weight[i_ref][1].weightfn[width>>1]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->sh.weight[i_ref][1], height*2 );
- if( h->sh.weight[i_ref][2].weightfn )
- h->sh.weight[i_ref][2].weightfn[width>>1]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- &h->sh.weight[i_ref][2],height*2 );
+ MC_LUMA( 0, 0 );
+ if( CHROMA444 )
+ {
+ MC_LUMA( 0, 1 );
+ MC_LUMA( 0, 2 );
+ }
+ else
+ {
+ // chroma is offset if MCing from a field of opposite parity
+ if( MB_INTERLACED & i_ref )
+ mvy += (h->mb.i_mb_y & 1)*4 - 2;
+
+ h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
+ &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ h->mb.pic.p_fref[0][i_ref][4], h->mb.pic.i_stride[1],
+ mvx, mvy, 2*width, 2*height );
+
+ if( h->sh.weight[i_ref][1].weightfn )
+ h->sh.weight[i_ref][1].weightfn[width>>1]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->sh.weight[i_ref][1], height*2 );
+ if( h->sh.weight[i_ref][2].weightfn )
+ h->sh.weight[i_ref][2].weightfn[width>>1]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ &h->sh.weight[i_ref][2],height*2 );
+ }
}
static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
{
int mvx = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
int mvy = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
- h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
- h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height, weight_none );
+ MC_LUMA( 1, 0 );
- if( MB_INTERLACED & i_ref )
- mvy += (h->mb.i_mb_y & 1)*4 - 2;
+ if( CHROMA444 )
+ {
+ MC_LUMA( 1, 1 );
+ MC_LUMA( 1, 2 );
+ }
+ else
+ {
+ if( MB_INTERLACED & i_ref )
+ mvy += (h->mb.i_mb_y & 1)*4 - 2;
- h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
- &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
- h->mb.pic.p_fref[1][i_ref][4], h->mb.pic.i_stride[1],
- mvx, mvy, 2*width, 2*height );
+ h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
+ &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
+ h->mb.pic.p_fref[1][i_ref][4], h->mb.pic.i_stride[1],
+ mvx, mvy, 2*width, 2*height );
+ }
}
+#define MC_LUMA_BI(p) \
+ src0 = h->mc.get_ref( tmp0, &i_stride0, &h->mb.pic.p_fref[0][i_ref0][p*4], h->mb.pic.i_stride[p], \
+ mvx0, mvy0, 4*width, 4*height, weight_none ); \
+ src1 = h->mc.get_ref( tmp1, &i_stride1, &h->mb.pic.p_fref[1][i_ref1][p*4], h->mb.pic.i_stride[p], \
+ mvx1, mvy1, 4*width, 4*height, weight_none ); \
+ h->mc.avg[i_mode]( &h->mb.pic.p_fdec[p][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, \
+ src0, i_stride0, src1, i_stride1, weight );
+
static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int height )
{
int i8 = x264_scan8[0]+x+8*y;
ALIGNED_ARRAY_16( pixel, tmp1,[16*16] );
pixel *src0, *src1;
- src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
- mvx0, mvy0, 4*width, 4*height, weight_none );
- src1 = h->mc.get_ref( tmp1, &i_stride1, h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
- mvx1, mvy1, 4*width, 4*height, weight_none );
- h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
- src0, i_stride0, src1, i_stride1, weight );
+ MC_LUMA_BI( 0 );
- if( MB_INTERLACED & i_ref0 )
- mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
- if( MB_INTERLACED & i_ref1 )
- mvy1 += (h->mb.i_mb_y & 1)*4 - 2;
-
- h->mc.mc_chroma( tmp0, tmp0+8, 16, h->mb.pic.p_fref[0][i_ref0][4], h->mb.pic.i_stride[1],
- mvx0, mvy0, 2*width, 2*height );
- h->mc.mc_chroma( tmp1, tmp1+8, 16, h->mb.pic.p_fref[1][i_ref1][4], h->mb.pic.i_stride[1],
- mvx1, mvy1, 2*width, 2*height );
- h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
- h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0+8, 16, tmp1+8, 16, weight );
+ if( CHROMA444 )
+ {
+ MC_LUMA_BI( 1 );
+ MC_LUMA_BI( 2 );
+ }
+ else
+ {
+ if( MB_INTERLACED & i_ref0 )
+ mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
+ if( MB_INTERLACED & i_ref1 )
+ mvy1 += (h->mb.i_mb_y & 1)*4 - 2;
+
+ h->mc.mc_chroma( tmp0, tmp0+8, 16, h->mb.pic.p_fref[0][i_ref0][4], h->mb.pic.i_stride[1],
+ mvx0, mvy0, 2*width, 2*height );
+ h->mc.mc_chroma( tmp1, tmp1+8, 16, h->mb.pic.p_fref[1][i_ref1][4], h->mb.pic.i_stride[1],
+ mvx1, mvy1, 2*width, 2*height );
+ h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
+ h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0+8, 16, tmp1+8, 16, weight );
+ }
}
+#undef MC_LUMA
+#undef MC_LUMA_BI
+
void x264_mb_mc_8x8( x264_t *h, int i8 )
{
int x = 2*(i8&1);
CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
/* all coeffs */
- CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 24 * sizeof(uint8_t) );
+ CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
if( h->param.b_cabac )
{
if( !b_lookahead )
{
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
- for( int j = 0; j < 2; j++ )
+ for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
{
/* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
for( int i = 0; i <= PARAM_INTERLACED; i++ )
x264_free( h->deblock_strength[i] );
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
- for( int j = 0; j < 2; j++ )
+ for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
x264_free( h->intra_border_backup[i][j] - 16 );
}
x264_free( h->scratch_buffer );
(h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
h->mb.i_mb_prev_xy = -1;
+ h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
+ h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
+ h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
+ h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE;
+ if( CHROMA444 )
+ {
+ h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
+ h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 36*FDEC_STRIDE;
+ }
/* fdec: fenc:
* yyyyyyy
* yYYYY YYYY
* uUU vVV UUVV
* uUU vVV
*/
- h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
- h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
- h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
- h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
- h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE;
- h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE + 16;
+ else
+ {
+ h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
+ h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE + 16;
+ }
}
void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y )
dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
}
-static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_mbaff )
+static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
{
- int w = (i ? 8 : 16);
+ int mb_interlaced = b_mbaff && MB_INTERLACED;
+ int w = b_chroma ? 8 : 16;
int i_stride = h->fdec->i_stride[i];
- int i_stride2 = i_stride << MB_INTERLACED;
- int i_pix_offset = MB_INTERLACED
+ int i_stride2 = i_stride << mb_interlaced;
+ int i_pix_offset = mb_interlaced
? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + w * mb_y * i_stride;
pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- int fdec_idx = b_mbaff ? (MB_INTERLACED ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
+ int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
/* ref_pix_offset[0] references the current field and [1] the opposite field. */
- if( MB_INTERLACED )
+ if( mb_interlaced )
ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
h->mb.pic.i_stride[i] = i_stride2;
h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
- if( i )
+ if( b_chroma )
{
h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
}
else
{
- h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fenc_plane[0], i_stride2, 16 );
- memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 );
+ memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
if( b_mbaff )
- h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = intra_fdec[-1];
+ h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
}
if( b_mbaff )
{
for( int j = 0; j < w; j++ )
- if( i )
+ if( b_chroma )
{
h->mb.pic.p_fdec[1][-1+j*FDEC_STRIDE] = plane_fdec[-2+j*i_stride2];
h->mb.pic.p_fdec[2][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
}
else
- h->mb.pic.p_fdec[0][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
+ h->mb.pic.p_fdec[i][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
}
pixel *plane_src, **filtered_src;
for( int j = 0; j < h->mb.pic.i_fref[0]; j++ )
{
// Interpolate between pixels in same field.
- if( MB_INTERLACED )
+ if( mb_interlaced )
{
plane_src = h->fref[0][j>>1]->plane_fld[i];
- filtered_src = h->fref[0][j>>1]->filtered_fld;
+ filtered_src = h->fref[0][j>>1]->filtered_fld[i];
}
else
{
plane_src = h->fref[0][j]->plane[i];
- filtered_src = h->fref[0][j]->filtered;
+ filtered_src = h->fref[0][j]->filtered[i];
}
- h->mb.pic.p_fref[0][j][i?4:0] = plane_src + ref_pix_offset[j&1];
+ h->mb.pic.p_fref[0][j][i*4] = plane_src + ref_pix_offset[j&1];
- if( !i )
+ if( !b_chroma )
{
for( int k = 1; k < 4; k++ )
- h->mb.pic.p_fref[0][j][k] = filtered_src[k] + ref_pix_offset[j&1];
- if( h->sh.weight[j][0].weightfn )
- h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> MB_INTERLACED][ref_pix_offset[j&1]];
- else
- h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
+ h->mb.pic.p_fref[0][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1];
+ if( !i )
+ {
+ if( h->sh.weight[j][0].weightfn )
+ h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> mb_interlaced][ref_pix_offset[j&1]];
+ else
+ h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
+ }
}
}
if( h->sh.i_type == SLICE_TYPE_B )
for( int j = 0; j < h->mb.pic.i_fref[1]; j++ )
{
- if( MB_INTERLACED )
+ if( mb_interlaced )
{
plane_src = h->fref[1][j>>1]->plane_fld[i];
- filtered_src = h->fref[1][j>>1]->filtered_fld;
+ filtered_src = h->fref[1][j>>1]->filtered_fld[i];
}
else
{
plane_src = h->fref[1][j]->plane[i];
- filtered_src = h->fref[1][j]->filtered;
+ filtered_src = h->fref[1][j]->filtered[i];
}
- h->mb.pic.p_fref[1][j][i?4:0] = plane_src + ref_pix_offset[j&1];
+ h->mb.pic.p_fref[1][j][i*4] = plane_src + ref_pix_offset[j&1];
- if( !i )
+ if( !b_chroma )
for( int k = 1; k < 4; k++ )
- h->mb.pic.p_fref[1][j][k] = filtered_src[k] + ref_pix_offset[j&1];
+ h->mb.pic.p_fref[1][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1];
}
}
static const x264_left_table_t left_indices[4] =
{
/* Current is progressive */
- {{ 4, 4, 5, 5}, { 3, 3, 7, 7}, {16+1, 16+1, 16+4+1, 16+4+1}, {0, 0, 1, 1}, {0, 0, 0, 0}},
- {{ 6, 6, 3, 3}, {11, 11, 15, 15}, {16+3, 16+3, 16+4+3, 16+4+3}, {2, 2, 3, 3}, {1, 1, 1, 1}},
+ {{ 4, 4, 5, 5}, { 3, 3, 7, 7}, {16+1, 16+1, 32+1, 32+1}, {0, 0, 1, 1}, {0, 0, 0, 0}},
+ {{ 6, 6, 3, 3}, {11, 11, 15, 15}, {16+5, 16+5, 32+5, 32+5}, {2, 2, 3, 3}, {1, 1, 1, 1}},
/* Current is interlaced */
- {{ 4, 6, 4, 6}, { 3, 11, 3, 11}, {16+1, 16+1, 16+4+1, 16+4+1}, {0, 2, 0, 2}, {0, 1, 0, 1}},
+ {{ 4, 6, 4, 6}, { 3, 11, 3, 11}, {16+1, 16+1, 32+1, 32+1}, {0, 2, 0, 2}, {0, 1, 0, 1}},
/* Both same */
- {{ 4, 5, 6, 3}, { 3, 7, 11, 15}, {16+1, 16+3, 16+4+1, 16+4+3}, {0, 1, 2, 3}, {0, 0, 1, 1}}
+ {{ 4, 5, 6, 3}, { 3, 7, 11, 15}, {16+1, 16+5, 32+1, 32+5}, {0, 1, 2, 3}, {0, 0, 1, 1}}
};
static void ALWAYS_INLINE x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
/* GCC pessimizes direct loads from heap-allocated arrays due to aliasing. */
/* By only dereferencing them once, we avoid this issue. */
int8_t (*i4x4)[8] = h->mb.intra4x4_pred_mode;
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
int16_t *cbp = h->mb.cbp;
const x264_left_table_t *left_index_table = h->mb.left_index_table;
CP32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8], &i4x4[top][0] );
/* load non_zero_count */
- CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[top][12] );
- /* shift because x264_scan8[16] is misaligned */
- M32( &h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] ) = M16( &nnz[top][18] ) << 8;
- M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = M16( &nnz[top][22] ) << 8;
+ CP32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8], &nnz[top][12] );
+ CP32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8], &nnz[top][16+4 + 8*CHROMA444] );
+ CP32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8], &nnz[top][32+4 + 8*CHROMA444] );
/* Finish the prefetching */
for( int l = 0; l < lists; l++ )
M32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] ) = 0xFFFFFFFFU;
/* load non_zero_count */
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8] ) = 0x80808080U;
- M32( &h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] ) = 0x80808080U;
- M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = 0x80808080U;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[ 0] - 8] ) = 0x80808080U;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16] - 8] ) = 0x80808080U;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[32] - 8] ) = 0x80808080U;
}
if( h->mb.i_neighbour & MB_LEFT )
{
+ int ltop = left[LTOP];
+ int lbot = b_mbaff ? left[LBOT] : ltop;
if( b_mbaff )
{
- const int16_t top_luma = (cbp[left[LTOP]] >> (left_index_table->mv[0]&(~1))) & 2;
- const int16_t bot_luma = (cbp[left[LBOT]] >> (left_index_table->mv[2]&(~1))) & 2;
- h->mb.cache.i_cbp_left = (cbp[left[LTOP]] & 0xfff0) | (bot_luma<<2) | top_luma;
+ const int16_t top_luma = (cbp[ltop] >> (left_index_table->mv[0]&(~1))) & 2;
+ const int16_t bot_luma = (cbp[lbot] >> (left_index_table->mv[2]&(~1))) & 2;
+ h->mb.cache.i_cbp_left = (cbp[ltop] & 0xfff0) | (bot_luma<<2) | top_luma;
}
else
- h->mb.cache.i_cbp_left = cbp[left[0]];
- if( b_mbaff )
+ h->mb.cache.i_cbp_left = cbp[ltop];
+
+ /* load intra4x4 */
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[ 0] - 1] = i4x4[ltop][left_index_table->intra[0]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[ 2] - 1] = i4x4[ltop][left_index_table->intra[1]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[ 8] - 1] = i4x4[lbot][left_index_table->intra[2]];
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[lbot][left_index_table->intra[3]];
+
+ /* load non_zero_count */
+ h->mb.cache.non_zero_count[x264_scan8[ 0] - 1] = nnz[ltop][left_index_table->nnz[0]];
+ h->mb.cache.non_zero_count[x264_scan8[ 2] - 1] = nnz[ltop][left_index_table->nnz[1]];
+ h->mb.cache.non_zero_count[x264_scan8[ 8] - 1] = nnz[lbot][left_index_table->nnz[2]];
+ h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[lbot][left_index_table->nnz[3]];
+
+ if( CHROMA444 )
{
- /* load intra4x4 */
- h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[LTOP]][left_index_table->intra[0]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[LTOP]][left_index_table->intra[1]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[LBOT]][left_index_table->intra[2]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[LBOT]][left_index_table->intra[3]];
-
- /* load non_zero_count */
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[LTOP]][left_index_table->nnz[0]];
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[LTOP]][left_index_table->nnz[1]];
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[LBOT]][left_index_table->nnz[2]];
- h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[LBOT]][left_index_table->nnz[3]];
-
- h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[LTOP]][left_index_table->nnz_chroma[0]];
- h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[LBOT]][left_index_table->nnz_chroma[1]];
-
- h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[LTOP]][left_index_table->nnz_chroma[2]];
- h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[LBOT]][left_index_table->nnz_chroma[3]];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+16];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+16];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+16];
+ h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] = nnz[lbot][left_index_table->nnz[3]+16];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] = nnz[ltop][left_index_table->nnz[0]+32];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = nnz[ltop][left_index_table->nnz[1]+32];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 8] - 1] = nnz[lbot][left_index_table->nnz[2]+32];
+ h->mb.cache.non_zero_count[x264_scan8[32+10] - 1] = nnz[lbot][left_index_table->nnz[3]+32];
}
else
{
- int l = left[0];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[l][left_index_table->intra[0]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[l][left_index_table->intra[1]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[l][left_index_table->intra[2]];
- h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[l][left_index_table->intra[3]];
-
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[l][left_index_table->nnz[0]];
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[l][left_index_table->nnz[1]];
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[l][left_index_table->nnz[2]];
- h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[l][left_index_table->nnz[3]];
-
- h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[l][left_index_table->nnz_chroma[0]];
- h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[l][left_index_table->nnz_chroma[1]];
-
- h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[l][left_index_table->nnz_chroma[2]];
- h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[l][left_index_table->nnz_chroma[3]];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] = nnz[ltop][left_index_table->nnz_chroma[0]];
+ h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] = nnz[lbot][left_index_table->nnz_chroma[1]];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] = nnz[ltop][left_index_table->nnz_chroma[2]];
+ h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = nnz[lbot][left_index_table->nnz_chroma[3]];
}
}
else
{
h->mb.cache.i_cbp_left = -1;
- h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
- h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] =
- h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] =
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[ 0] - 1] =
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[ 2] - 1] =
+ h->mb.cache.intra4x4_pred_mode[x264_scan8[ 8] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = -1;
/* load non_zero_count */
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[ 0] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[ 2] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[ 8] - 1] =
h->mb.cache.non_zero_count[x264_scan8[10] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] =
- h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80;
+ h->mb.cache.non_zero_count[x264_scan8[16+ 0] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[16+ 2] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[32+ 0] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[32+ 2] - 1] = 0x80;
+ if( CHROMA444 )
+ {
+ h->mb.cache.non_zero_count[x264_scan8[16+ 8] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[16+10] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[32+ 8] - 1] =
+ h->mb.cache.non_zero_count[x264_scan8[32+10] - 1] = 0x80;
+ }
}
if( h->pps->b_transform_8x8_mode )
{
x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE );
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0 );
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0 );
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 0 );
+ if( CHROMA444 )
+ {
+ x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+ 4*FDEC_STRIDE );
+ x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+12*FDEC_STRIDE );
+ x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+ 4*FDEC_STRIDE );
+ x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+12*FDEC_STRIDE );
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 );
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 );
+ }
+ else
+ {
+ x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE );
+ x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE );
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 );
+ }
}
else
{
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 1 );
- x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1 );
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 1 );
+ if( CHROMA444 )
+ {
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 );
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 );
+ }
+ else
+ x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 );
}
if( h->fdec->integral )
int s8x8 = h->mb.i_b8_stride;
int s4x4 = h->mb.i_b4_stride;
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
const x264_left_table_t *left_index_table = SLICE_MBAFF ? h->mb.left_index_table : &left_indices[3];
if( h->mb.i_neighbour & MB_TOP )
/* Munge NNZ for cavlc + 8x8dct */
if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
{
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
+ uint8_t (*nnz)[48] = h->mb.non_zero_count;
int top = h->mb.i_mb_top_xy;
int *left = h->mb.i_mb_left_xy;
bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B, h );
}
-static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_mbaff )
+static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
{
- int w = i ? 8 : 16;
+ int w = b_chroma ? 8 : 16;
int i_stride = h->fdec->i_stride[i];
int i_stride2 = i_stride << (b_mbaff && MB_INTERLACED);
int i_pix_offset = (b_mbaff && MB_INTERLACED)
? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + w * mb_y * i_stride;
- if( i )
+ if( b_chroma )
h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
else
- h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
+ h->mc.copy[PIXEL_16x16]( &h->fdec->plane[i][i_pix_offset], i_stride2, h->mb.pic.p_fdec[i], FDEC_STRIDE, 16 );
}
static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff )
* mbpair in intra_border_backup[2]. */
int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
- memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
+ if( CHROMA444 )
+ {
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*15, 16*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+FDEC_STRIDE*15, 16*sizeof(pixel) );
+ }
+ else
+ {
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
+ }
if( b_mbaff )
{
if( mb_y&1 )
int backup_src = (MB_INTERLACED ? 7 : 14) * FDEC_STRIDE;
backup_dst = MB_INTERLACED ? 2 : 0;
memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+backup_src, 16*sizeof(pixel) );
- backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
- memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
- memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) );
+ if( CHROMA444 )
+ {
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 16*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+backup_src, 16*sizeof(pixel) );
+ }
+ else
+ {
+ backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
+ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) );
+ }
}
}
else
/* In progressive we update intra_border_backup in-place, so the topleft neighbor will
* no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */
h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15];
- h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+7];
- h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+7];
+ h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+7 + 8*CHROMA444];
+ h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+7 + 8*CHROMA444];
}
}
if( SLICE_MBAFF )
{
x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 1 );
- x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 1 );
- x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1 );
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 1 );
+ if( CHROMA444 )
+ {
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 1 );
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 1 );
+ }
+ else
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 1 );
}
else
{
x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 0 );
- x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0 );
- x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0 );
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 0 );
+ if( CHROMA444 )
+ {
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 0 );
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 0 );
+ }
+ else
+ x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 0 );
}
x264_prefetch_fenc( h, h->fdec, h->mb.i_mb_x, h->mb.i_mb_y );
{
h->mb.qp[i_mb_xy] = 0;
h->mb.i_last_dqp = 0;
- h->mb.i_cbp_chroma = 2;
+ h->mb.i_cbp_chroma = CHROMA444 ? 0 : 2;
h->mb.i_cbp_luma = 0xf;
- h->mb.cbp[i_mb_xy] = 0x72f; /* all set */
+ h->mb.cbp[i_mb_xy] = (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma | 0x700;
h->mb.b_transform_8x8 = 0;
- for( int i = 0; i < 24; i++ )
+ for( int i = 0; i < 48; i++ )
h->mb.cache.non_zero_count[x264_scan8[i]] = h->param.b_cabac ? 1 : 16;
}
else
}
/* save non zero count */
- CP32( &nnz[0*4], &h->mb.cache.non_zero_count[x264_scan8[0]+0*8] );
- CP32( &nnz[1*4], &h->mb.cache.non_zero_count[x264_scan8[0]+1*8] );
- CP32( &nnz[2*4], &h->mb.cache.non_zero_count[x264_scan8[0]+2*8] );
- CP32( &nnz[3*4], &h->mb.cache.non_zero_count[x264_scan8[0]+3*8] );
- M16( &nnz[16+0*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+0*2]-1] ) >> 8;
- M16( &nnz[16+1*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+1*2]-1] ) >> 8;
- M16( &nnz[16+2*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+2*2]-1] ) >> 8;
- M16( &nnz[16+3*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+3*2]-1] ) >> 8;
+ CP32( &nnz[ 0+0*4], &h->mb.cache.non_zero_count[x264_scan8[ 0]] );
+ CP32( &nnz[ 0+1*4], &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
+ CP32( &nnz[ 0+2*4], &h->mb.cache.non_zero_count[x264_scan8[ 8]] );
+ CP32( &nnz[ 0+3*4], &h->mb.cache.non_zero_count[x264_scan8[10]] );
+ CP32( &nnz[16+0*4], &h->mb.cache.non_zero_count[x264_scan8[16+0]] );
+ CP32( &nnz[16+1*4], &h->mb.cache.non_zero_count[x264_scan8[16+2]] );
+ CP32( &nnz[32+0*4], &h->mb.cache.non_zero_count[x264_scan8[32+0]] );
+ CP32( &nnz[32+1*4], &h->mb.cache.non_zero_count[x264_scan8[32+2]] );
+ if( CHROMA444 )
+ {
+ CP32( &nnz[16+2*4], &h->mb.cache.non_zero_count[x264_scan8[16+ 8]] );
+ CP32( &nnz[16+3*4], &h->mb.cache.non_zero_count[x264_scan8[16+10]] );
+ CP32( &nnz[32+2*4], &h->mb.cache.non_zero_count[x264_scan8[32+ 8]] );
+ CP32( &nnz[32+3*4], &h->mb.cache.non_zero_count[x264_scan8[32+10]] );
+ }
if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
h->mb.b_transform_8x8 = 0;
enum cabac_ctx_block_cat_e
{
- DCT_LUMA_DC = 0,
- DCT_LUMA_AC = 1,
- DCT_LUMA_4x4 = 2,
- DCT_CHROMA_DC = 3,
- DCT_CHROMA_AC = 4,
- DCT_LUMA_8x8 = 5,
+ DCT_LUMA_DC = 0,
+ DCT_LUMA_AC = 1,
+ DCT_LUMA_4x4 = 2,
+ DCT_CHROMA_DC = 3,
+ DCT_CHROMA_AC = 4,
+ DCT_LUMA_8x8 = 5,
+ DCT_CHROMAU_DC = 6,
+ DCT_CHROMAU_AC = 7,
+ DCT_CHROMAU_4x4 = 8,
+ DCT_CHROMAU_8x8 = 9,
+ DCT_CHROMAV_DC = 10,
+ DCT_CHROMAV_AC = 11,
+ DCT_CHROMAV_4x4 = 12,
+ DCT_CHROMAV_8x8 = 13,
+};
+
+static const uint8_t ctx_cat_plane[6][3] =
+{
+ { DCT_LUMA_DC, DCT_CHROMAU_DC, DCT_CHROMAV_DC},
+ { DCT_LUMA_AC, DCT_CHROMAU_AC, DCT_CHROMAV_AC},
+ {DCT_LUMA_4x4, DCT_CHROMAU_4x4, DCT_CHROMAV_4x4},
+ {0},
+ {0},
+ {DCT_LUMA_8x8, DCT_CHROMAU_8x8, DCT_CHROMAV_8x8}
};
/* Per-frame allocation: is allocated per-thread only in frame-threads mode. */
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
const int b_interlaced = PARAM_INTERLACED;
- int stride = frame->i_stride[0];
- const int width = frame->i_width[0];
int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
int height = (b_end ? frame->i_lines[0] + 16*PARAM_INTERLACED : (mb_y+b_interlaced)*16) + 8;
- int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd
if( mb_y & b_interlaced )
return;
- if( !b_interlaced || h->mb.b_adaptive_mbaff )
- h->mc.hpel_filter(
- frame->filtered[1] + offs,
- frame->filtered[2] + offs,
- frame->filtered[3] + offs,
- frame->plane[0] + offs,
- stride, width + 16, height - start,
- h->scratch_buffer );
-
- if( b_interlaced )
+ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
{
- /* MC must happen between pixels in the same field. */
- stride = frame->i_stride[0] << 1;
- start = (mb_y*16 >> 1) - 8;
- int height_fld = ((b_end ? frame->i_lines[0] : mb_y*16) >> 1) + 8;
- offs = start*stride - 8;
- for( int i = 0; i < 2; i++, offs += frame->i_stride[0] )
- {
+ int stride = frame->i_stride[p];
+ const int width = frame->i_width[p];
+ int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd
+
+ if( !b_interlaced || h->mb.b_adaptive_mbaff )
h->mc.hpel_filter(
- frame->filtered_fld[1] + offs,
- frame->filtered_fld[2] + offs,
- frame->filtered_fld[3] + offs,
- frame->plane_fld[0] + offs,
- stride, width + 16, height_fld - start,
+ frame->filtered[p][1] + offs,
+ frame->filtered[p][2] + offs,
+ frame->filtered[p][3] + offs,
+ frame->plane[p] + offs,
+ stride, width + 16, height - start,
h->scratch_buffer );
+
+ if( b_interlaced )
+ {
+ /* MC must happen between pixels in the same field. */
+ stride = frame->i_stride[p] << 1;
+ start = (mb_y*16 >> 1) - 8;
+ int height_fld = ((b_end ? frame->i_lines[p] : mb_y*16) >> 1) + 8;
+ offs = start*stride - 8;
+ for( int i = 0; i < 2; i++, offs += frame->i_stride[p] )
+ {
+ h->mc.hpel_filter(
+ frame->filtered_fld[p][1] + offs,
+ frame->filtered_fld[p][2] + offs,
+ frame->filtered_fld[p][3] + offs,
+ frame->plane_fld[p] + offs,
+ stride, width + 16, height_fld - start,
+ h->scratch_buffer );
+ }
}
}
if( frame->integral )
{
+ int stride = frame->i_stride[0];
if( start < 0 )
{
memset( frame->integral - PADV * stride - PADH, 0, stride * sizeof(uint16_t) );
}
#endif
#endif // HIGH_BIT_DEPTH
- pf->coeff_last[ DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
- pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
- pf->coeff_level_run[ DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
- pf->coeff_level_run[DCT_CHROMA_AC] = pf->coeff_level_run[ DCT_LUMA_AC];
+ pf->coeff_last[DCT_LUMA_DC] = pf->coeff_last[DCT_CHROMAU_DC] = pf->coeff_last[DCT_CHROMAV_DC] =
+ pf->coeff_last[DCT_CHROMAU_4x4] = pf->coeff_last[DCT_CHROMAV_4x4] = pf->coeff_last[DCT_LUMA_4x4];
+ pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[DCT_CHROMAU_AC] =
+ pf->coeff_last[DCT_CHROMAV_AC] = pf->coeff_last[DCT_LUMA_AC];
+ pf->coeff_last[DCT_CHROMAU_8x8] = pf->coeff_last[DCT_CHROMAV_8x8] = pf->coeff_last[DCT_LUMA_8x8];
+
+ pf->coeff_level_run[DCT_LUMA_DC] = pf->coeff_level_run[DCT_CHROMAU_DC] = pf->coeff_level_run[DCT_CHROMAV_DC] =
+ pf->coeff_level_run[DCT_CHROMAU_4x4] = pf->coeff_level_run[DCT_CHROMAV_4x4] = pf->coeff_level_run[DCT_LUMA_4x4];
+ pf->coeff_level_run[DCT_CHROMA_AC] = pf->coeff_level_run[DCT_CHROMAU_AC] =
+ pf->coeff_level_run[DCT_CHROMAV_AC] = pf->coeff_level_run[DCT_LUMA_AC];
}
int (*decimate_score15)( dctcoef *dct );
int (*decimate_score16)( dctcoef *dct );
int (*decimate_score64)( dctcoef *dct );
- int (*coeff_last[6])( dctcoef *dct );
- int (*coeff_level_run[5])( dctcoef *dct, x264_run_level_t *runlevel );
+ int (*coeff_last[14])( dctcoef *dct );
+ int (*coeff_level_run[13])( dctcoef *dct, x264_run_level_t *runlevel );
} x264_quant_function_t;
void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf );
int def_dequant4[6][16];
int def_dequant8[6][64];
int quant4_mf[4][6][16];
- int quant8_mf[2][6][64];
+ int quant8_mf[4][6][64];
int deadzone[4] = { 32 - h->param.analyse.i_luma_deadzone[1],
32 - h->param.analyse.i_luma_deadzone[0],
32 - 11, 32 - 21 };
int max_qp_err = -1;
int max_chroma_qp_err = -1;
int min_qp_err = QP_MAX+1;
+ int num_8x8_lists = CHROMA444 ? 4 : 2;
- for( int i = 0; i < 6; i++ )
+ for( int i = 0; i < 4 + num_8x8_lists; i++ )
{
int size = i<4 ? 16 : 64;
int j;
h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->pps->scaling_list[i_list][i];
quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->pps->scaling_list[i_list][i]);
}
- for( int i_list = 0; i_list < 2; i_list++ )
+ for( int i_list = 0; i_list < num_8x8_lists; i_list++ )
for( int i = 0; i < 64; i++ )
{
h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->pps->scaling_list[4+i_list][i];
max_chroma_qp_err = q;
}
if( h->param.analyse.b_transform_8x8 )
- for( int i_list = 0; i_list < 2; i_list++ )
+ for( int i_list = 0; i_list < num_8x8_lists; i_list++ )
for( int i = 0; i < 64; i++ )
{
h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
continue;
}
h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- if( j > 0xffff && q > max_qp_err )
+ if( j > 0xffff && q > max_qp_err && (i_list == CQM_8IY || i_list == CQM_8PY) )
max_qp_err = q;
+ if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_8IC || i_list == CQM_8PC) )
+ max_chroma_qp_err = q;
}
}
x264_emms();
CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) );
for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ )
- for( int cat = 0; cat <= 2; cat++ )
+ for( int cat = 0; cat < 3 + CHROMA444; cat++ )
{
- int dct8x8 = cat == 1;
+ int dct8x8 = cat&1;
int size = dct8x8 ? 64 : 16;
udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
/* Denoise chroma first (due to h264's chroma QP offset), then luma, then DC. */
continue;
}
- int thresh = i == 0 ? dc_threshold : cat == 2 ? chroma_threshold : luma_threshold;
+ int thresh = i == 0 ? dc_threshold : cat >= 2 ? chroma_threshold : luma_threshold;
if( q < thresh )
{
nr_offset[i] = 0;
}
#define CQM_DELETE( n, max )\
- for( int i = 0; i < max; i++ )\
+ for( int i = 0; i < (max); i++ )\
{\
int j;\
for( j = 0; j < i; j++ )\
void x264_cqm_delete( x264_t *h )
{
CQM_DELETE( 4, 4 );
- CQM_DELETE( 8, 2 );
+ CQM_DELETE( 8, CHROMA444 ? 4 : 2 );
x264_free( h->nr_offset_emergency );
}
static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
- uint8_t *cqm, const uint8_t *jvt, int length )
+ uint8_t *cqm, const uint8_t *jvt, int length )
{
int i;
memset( p, ' ', strcspn( p, "\n" ) );
b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA", h->param.cqm_4iy, x264_cqm_jvt4i, 16 );
- b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 );
b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_LUMA", h->param.cqm_4py, x264_cqm_jvt4p, 16 );
+ b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 );
b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 );
b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA", h->param.cqm_8iy, x264_cqm_jvt8i, 64 );
b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_LUMA", h->param.cqm_8py, x264_cqm_jvt8p, 64 );
+ if( CHROMA444 )
+ {
+ b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_CHROMA", h->param.cqm_8iy, x264_cqm_jvt8i, 64 );
+ b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_CHROMA", h->param.cqm_8py, x264_cqm_jvt8p, 64 );
+ }
x264_free( buf );
return b_error;
enum cqm8_e
{
CQM_8IY = 0,
- CQM_8PY = 1
+ CQM_8PY = 1,
+ CQM_8IC = 2,
+ CQM_8PC = 3,
};
typedef struct
} vui;
int b_qpprime_y_zero_transform_bypass;
+ int i_chroma_format_idc;
} x264_sps_t;
int b_transform_8x8_mode;
int i_cqm_preset;
- const uint8_t *scaling_list[6]; /* could be 8, but we don't allow separate Cb/Cr lists */
+ const uint8_t *scaling_list[8]; /* could be 12, but we don't allow separate Cb/Cr lists */
} x264_pps_t;
16,16,16,16,16,16,16,16,
16,16,16,16,16,16,16,16
};
-static const uint8_t * const x264_cqm_jvt[6] =
+static const uint8_t * const x264_cqm_jvt[8] =
{
x264_cqm_jvt4i, x264_cqm_jvt4p,
x264_cqm_jvt4i, x264_cqm_jvt4p,
+ x264_cqm_jvt8i, x264_cqm_jvt8p,
x264_cqm_jvt8i, x264_cqm_jvt8p
};
.end: pointer 1
align 16, resb 1
.bits_encoded: resd 1
- .state: resb 460
+ .state: resb 1024
endstruc
%macro LOAD_GLOBAL 4
int width = frame->i_width[0] + 2*PADH;
int i_padv = PADV << PARAM_INTERLACED;
int offset, height;
- pixel *src = frame->filtered[0] - frame->i_stride[0]*i_padv - PADH;
+ pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
offset = h->fenc->i_lines_weighted*frame->i_stride[0];
h->fenc->i_lines_weighted += height;
if( a->i_satd_i8x8chroma < COST_MAX )
return;
+ if( CHROMA444 )
+ {
+ if( !h->mb.b_chroma_me )
+ {
+ a->i_satd_i8x8chroma = 0;
+ return;
+ }
+
+ /* Cheap approximation of chroma costs to avoid a full i4x4/i8x8 analysis. */
+ h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[1] );
+ h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[2] );
+ a->i_satd_i8x8chroma = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE )
+ + h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
+ return;
+ }
+
const int8_t *predict_mode = predict_8x8chroma_mode_available( h->mb.i_neighbour_intra );
/* 8x8 prediction selection for chroma */
h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}
+/* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
{
const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
int i_mode = *predict_mode;
if( h->mb.b_lossless )
- x264_predict_lossless_16x16( h, i_mode );
+ x264_predict_lossless_16x16( h, 0, i_mode );
else
h->predict_16x16[i_mode]( p_dst );
int i_mode = *predict_mode;
if( h->mb.b_lossless )
- x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
+ x264_predict_lossless_8x8( h, p_dst_by, 0, idx, i_mode, edge );
else
h->predict_8x8[i_mode]( p_dst_by, edge );
break;
/* we need to encode this block now (for next ones) */
- h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by, edge );
- x264_mb_encode_i8x8( h, idx, a->i_qp );
+ x264_mb_encode_i8x8( h, 0, idx, a->i_qp, a->i_predict8x8[idx], edge );
x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
}
int i_mode = *predict_mode;
if( h->mb.b_lossless )
- x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
+ x264_predict_lossless_4x4( h, p_dst_by, 0, idx, i_mode );
else
h->predict_4x4[i_mode]( p_dst_by );
break;
/* we need to encode this block now (for next ones) */
- h->predict_4x4[a->i_predict4x4[idx]]( p_dst_by );
- x264_mb_encode_i4x4( h, idx, a->i_qp );
+ x264_mb_encode_i4x4( h, 0, idx, a->i_qp, a->i_predict4x4[idx] );
h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
}
static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
{
- pixel *p_dst = h->mb.pic.p_fdec[0];
uint64_t i_satd, i_best;
+ int plane_count = CHROMA444 ? 3 : 1;
h->mb.i_skip_intra = 0;
if( h->mb.i_type == I_16x16 )
}
/* RD selection for chroma prediction */
- const int8_t *predict_mode = predict_8x8chroma_mode_available( h->mb.i_neighbour_intra );
- if( predict_mode[1] >= 0 )
+ if( !CHROMA444 )
{
- int8_t predict_mode_sorted[4];
- int i_max;
- int i_thresh = a->i_satd_i8x8chroma * 5/4;
-
- for( i_max = 0; *predict_mode >= 0; predict_mode++ )
+ const int8_t *predict_mode = predict_8x8chroma_mode_available( h->mb.i_neighbour_intra );
+ if( predict_mode[1] >= 0 )
{
- int i_mode = *predict_mode;
- if( a->i_satd_i8x8chroma_dir[i_mode] < i_thresh && i_mode != a->i_predict8x8chroma )
- predict_mode_sorted[i_max++] = i_mode;
- }
+ int8_t predict_mode_sorted[4];
+ int i_max;
+ int i_thresh = a->i_satd_i8x8chroma * 5/4;
- if( i_max > 0 )
- {
- int i_cbp_chroma_best = h->mb.i_cbp_chroma;
- int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp];
- /* the previous thing encoded was x264_intra_rd(), so the pixels and
- * coefs for the current chroma mode are still around, so we only
- * have to recount the bits. */
- i_best = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 );
- for( int i = 0; i < i_max; i++ )
+ for( i_max = 0; *predict_mode >= 0; predict_mode++ )
{
- int i_mode = predict_mode_sorted[i];
- if( h->mb.b_lossless )
- x264_predict_lossless_8x8_chroma( h, i_mode );
- else
+ int i_mode = *predict_mode;
+ if( a->i_satd_i8x8chroma_dir[i_mode] < i_thresh && i_mode != a->i_predict8x8chroma )
+ predict_mode_sorted[i_max++] = i_mode;
+ }
+
+ if( i_max > 0 )
+ {
+ int i_cbp_chroma_best = h->mb.i_cbp_chroma;
+ int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp];
+ /* the previous thing encoded was x264_intra_rd(), so the pixels and
+ * coefs for the current chroma mode are still around, so we only
+ * have to recount the bits. */
+ i_best = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 );
+ for( int i = 0; i < i_max; i++ )
{
- h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
- h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
+ int i_mode = predict_mode_sorted[i];
+ if( h->mb.b_lossless )
+ x264_predict_lossless_8x8_chroma( h, i_mode );
+ else
+ {
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
+ }
+ /* if we've already found a mode that needs no residual, then
+ * probably any mode with a residual will be worse.
+ * so avoid dct on the remaining modes to improve speed. */
+ i_satd = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 );
+ COPY3_IF_LT( i_best, i_satd, a->i_predict8x8chroma, i_mode, i_cbp_chroma_best, h->mb.i_cbp_chroma );
}
- /* if we've already found a mode that needs no residual, then
- * probably any mode with a residual will be worse.
- * so avoid dct on the remaining modes to improve speed. */
- i_satd = x264_rd_cost_i8x8_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 );
- COPY3_IF_LT( i_best, i_satd, a->i_predict8x8chroma, i_mode, i_cbp_chroma_best, h->mb.i_cbp_chroma );
+ h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
+ h->mb.i_cbp_chroma = i_cbp_chroma_best;
}
- h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
- h->mb.i_cbp_chroma = i_cbp_chroma_best;
}
}
if( h->mb.i_type == I_4x4 )
{
- pixel4 pels[4] = {0}; // doesn't need initting, just shuts up a gcc warning
- int i_nnz = 0;
+ pixel4 pels[3][4] = {{0}}; // doesn't need initting, just shuts up a gcc warning
+ int nnz[3] = {0};
for( int idx = 0; idx < 16; idx++ )
{
- pixel *p_dst_by = p_dst + block_idx_xy_fdec[idx];
+ pixel *dst[3] = {h->mb.pic.p_fdec[0] + block_idx_xy_fdec[idx],
+ h->mb.pic.p_fdec[1] + block_idx_xy_fdec[idx],
+ h->mb.pic.p_fdec[2] + block_idx_xy_fdec[idx]};
i_best = COST_MAX64;
- predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
+ const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
- /* emulate missing topright samples */
- MPIXEL_X4( &p_dst_by[4 - FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst_by[3 - FDEC_STRIDE] );
+ for( int p = 0; p < plane_count; p++ )
+ /* emulate missing topright samples */
+ MPIXEL_X4( dst[p]+4-FDEC_STRIDE ) = PIXEL_SPLAT_X4( dst[p][3-FDEC_STRIDE] );
for( ; *predict_mode >= 0; predict_mode++ )
{
int i_mode = *predict_mode;
- if( h->mb.b_lossless )
- x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
- else
- h->predict_4x4[i_mode]( p_dst_by );
i_satd = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );
if( i_best > i_satd )
{
a->i_predict4x4[idx] = i_mode;
i_best = i_satd;
- pels[0] = MPIXEL_X4( p_dst_by+0*FDEC_STRIDE );
- pels[1] = MPIXEL_X4( p_dst_by+1*FDEC_STRIDE );
- pels[2] = MPIXEL_X4( p_dst_by+2*FDEC_STRIDE );
- pels[3] = MPIXEL_X4( p_dst_by+3*FDEC_STRIDE );
- i_nnz = h->mb.cache.non_zero_count[x264_scan8[idx]];
+ for( int p = 0; p < plane_count; p++ )
+ {
+ pels[p][0] = MPIXEL_X4( dst[p]+0*FDEC_STRIDE );
+ pels[p][1] = MPIXEL_X4( dst[p]+1*FDEC_STRIDE );
+ pels[p][2] = MPIXEL_X4( dst[p]+2*FDEC_STRIDE );
+ pels[p][3] = MPIXEL_X4( dst[p]+3*FDEC_STRIDE );
+ nnz[p] = h->mb.cache.non_zero_count[x264_scan8[idx+p*16]];
+ }
}
}
- MPIXEL_X4( p_dst_by+0*FDEC_STRIDE ) = pels[0];
- MPIXEL_X4( p_dst_by+1*FDEC_STRIDE ) = pels[1];
- MPIXEL_X4( p_dst_by+2*FDEC_STRIDE ) = pels[2];
- MPIXEL_X4( p_dst_by+3*FDEC_STRIDE ) = pels[3];
- h->mb.cache.non_zero_count[x264_scan8[idx]] = i_nnz;
+ for( int p = 0; p < plane_count; p++ )
+ {
+ MPIXEL_X4( dst[p]+0*FDEC_STRIDE ) = pels[p][0];
+ MPIXEL_X4( dst[p]+1*FDEC_STRIDE ) = pels[p][1];
+ MPIXEL_X4( dst[p]+2*FDEC_STRIDE ) = pels[p][2];
+ MPIXEL_X4( dst[p]+3*FDEC_STRIDE ) = pels[p][3];
+ h->mb.cache.non_zero_count[x264_scan8[idx+p*16]] = nnz[p];
+ }
h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
}
}
else if( h->mb.i_type == I_8x8 )
{
- ALIGNED_ARRAY_16( pixel, edge,[33] );
+ ALIGNED_ARRAY_16( pixel, edge,[3],[48] );
+ pixel4 pels_h[3][2] = {{0}};
+ pixel pels_v[3][7] = {{0}};
+ uint16_t nnz[3][2] = {{0}}; //shut up gcc
for( int idx = 0; idx < 4; idx++ )
{
- pixel4 pels_h[2] = {0};
- pixel pels_v[7] = {0};
- uint16_t i_nnz[2] = {0}; //shut up gcc
- pixel *p_dst_by;
+ int x = idx&1;
+ int y = idx>>1;
+ int s8 = X264_SCAN8_0 + 2*x + 16*y;
+ pixel *dst[3] = {h->mb.pic.p_fdec[0] + 8*x + 8*y*FDEC_STRIDE,
+ h->mb.pic.p_fdec[1] + 8*x + 8*y*FDEC_STRIDE,
+ h->mb.pic.p_fdec[2] + 8*x + 8*y*FDEC_STRIDE};
int cbp_luma_new = 0;
int i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;
i_best = COST_MAX64;
- int x = idx&1;
- int y = idx>>1;
- int s8 = X264_SCAN8_0 + 2*x + 16*y;
- p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
- predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
- h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
+ const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
+ for( int p = 0; p < plane_count; p++ )
+ h->predict_8x8_filter( dst[p], edge[p], h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
for( ; *predict_mode >= 0; predict_mode++ )
{
if( a->i_satd_i8x8_dir[i_mode][idx] > i_thresh )
continue;
- if( h->mb.b_lossless )
- x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
- else
- h->predict_8x8[i_mode]( p_dst_by, edge );
h->mb.i_cbp_luma = a->i_cbp_i8x8_luma;
- i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode );
+ i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode, edge );
if( i_best > i_satd )
{
cbp_luma_new = h->mb.i_cbp_luma;
i_best = i_satd;
- pels_h[0] = MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+0 );
- pels_h[1] = MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+4 );
- if( !(idx&1) )
- for( int j = 0; j < 7; j++ )
- pels_v[j] = p_dst_by[7+j*FDEC_STRIDE];
- i_nnz[0] = M16( &h->mb.cache.non_zero_count[s8 + 0*8] );
- i_nnz[1] = M16( &h->mb.cache.non_zero_count[s8 + 1*8] );
+ for( int p = 0; p < plane_count; p++ )
+ {
+ pels_h[p][0] = MPIXEL_X4( dst[p]+7*FDEC_STRIDE+0 );
+ pels_h[p][1] = MPIXEL_X4( dst[p]+7*FDEC_STRIDE+4 );
+ if( !(idx&1) )
+ for( int j = 0; j < 7; j++ )
+ pels_v[p][j] = dst[p][7+j*FDEC_STRIDE];
+ nnz[p][0] = M16( &h->mb.cache.non_zero_count[s8 + 0*8 + p*16] );
+ nnz[p][1] = M16( &h->mb.cache.non_zero_count[s8 + 1*8 + p*16] );
+ }
}
}
a->i_cbp_i8x8_luma = cbp_luma_new;
- MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+0 ) = pels_h[0];
- MPIXEL_X4( p_dst_by+7*FDEC_STRIDE+4 ) = pels_h[1];
- if( !(idx&1) )
- for( int j = 0; j < 7; j++ )
- p_dst_by[7+j*FDEC_STRIDE] = pels_v[j];
- M16( &h->mb.cache.non_zero_count[s8 + 0*8] ) = i_nnz[0];
- M16( &h->mb.cache.non_zero_count[s8 + 1*8] ) = i_nnz[1];
+ for( int p = 0; p < plane_count; p++ )
+ {
+ MPIXEL_X4( dst[p]+7*FDEC_STRIDE+0 ) = pels_h[p][0];
+ MPIXEL_X4( dst[p]+7*FDEC_STRIDE+4 ) = pels_h[p][1];
+ if( !(idx&1) )
+ for( int j = 0; j < 7; j++ )
+ dst[p][7+j*FDEC_STRIDE] = pels_v[p][j];
+ M16( &h->mb.cache.non_zero_count[s8 + 0*8 + p*16] ) = nnz[p][0];
+ M16( &h->mb.cache.non_zero_count[s8 + 1*8 + p*16] ) = nnz[p][1];
+ }
x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
}
}
}
-#define LOAD_FENC( m, src, xoff, yoff) \
+#define LOAD_FENC(m, src, xoff, yoff) \
+{ \
+ int s = !CHROMA444; \
(m)->p_cost_mv = a->p_cost_mv; \
(m)->i_stride[0] = h->mb.pic.i_stride[0]; \
(m)->i_stride[1] = h->mb.pic.i_stride[1]; \
+ (m)->i_stride[2] = h->mb.pic.i_stride[2]; \
(m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*FENC_STRIDE]; \
- (m)->p_fenc[1] = &(src)[1][((xoff)>>1)+((yoff)>>1)*FENC_STRIDE]; \
- (m)->p_fenc[2] = &(src)[2][((xoff)>>1)+((yoff)>>1)*FENC_STRIDE];
+ (m)->p_fenc[1] = &(src)[1][((xoff)>>s)+((yoff)>>s)*FENC_STRIDE]; \
+ (m)->p_fenc[2] = &(src)[2][((xoff)>>s)+((yoff)>>s)*FENC_STRIDE]; \
+}
#define LOAD_HPELS(m, src, list, ref, xoff, yoff) \
+{ \
(m)->p_fref_w = (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \
- (m)->p_fref[4] = &(src)[4][(xoff)+((yoff)>>1)*(m)->i_stride[1]]; \
+ if( CHROMA444 ) \
+ { \
+ (m)->p_fref[ 4] = &(src)[ 4][(xoff)+(yoff)*(m)->i_stride[1]]; \
+ (m)->p_fref[ 5] = &(src)[ 5][(xoff)+(yoff)*(m)->i_stride[1]]; \
+ (m)->p_fref[ 6] = &(src)[ 6][(xoff)+(yoff)*(m)->i_stride[1]]; \
+ (m)->p_fref[ 7] = &(src)[ 7][(xoff)+(yoff)*(m)->i_stride[1]]; \
+ (m)->p_fref[ 8] = &(src)[ 8][(xoff)+(yoff)*(m)->i_stride[2]]; \
+ (m)->p_fref[ 9] = &(src)[ 9][(xoff)+(yoff)*(m)->i_stride[2]]; \
+ (m)->p_fref[10] = &(src)[10][(xoff)+(yoff)*(m)->i_stride[2]]; \
+ (m)->p_fref[11] = &(src)[11][(xoff)+(yoff)*(m)->i_stride[2]]; \
+ } \
+ else \
+ (m)->p_fref[4] = &(src)[4][(xoff)+((yoff)>>1)*(m)->i_stride[1]]; \
(m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->weight = weight_none; \
- (m)->i_ref = ref;
+ (m)->i_ref = ref; \
+}
#define LOAD_WPELS(m, src, list, ref, xoff, yoff) \
(m)->p_fref_w = &(src)[(xoff)+(yoff)*(m)->i_stride[0]]; \
pixel *pix2 = pix1+8;
const int i_stride = h->mb.pic.i_stride[1];
const int or = 8*(i8x8&1) + 2*(i8x8&2)*i_stride;
- const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
const int i_ref = a->l0.me8x8[i8x8].i_ref;
const int mvy_offset = MB_INTERLACED & i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
x264_weight_t *weight = h->sh.weight[i_ref];
// FIXME weight can be done on 4x4 blocks even if mc is smaller
#define CHROMA4x4MC( width, height, me, x, y ) \
- h->mc.mc_chroma( &pix1[x+y*16], &pix2[x+y*16], 16, &p_fref[4][or+x*2+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height ); \
- if( weight[1].weightfn ) \
- weight[1].weightfn[width>>2]( &pix1[x+y*16], 16, &pix1[x+y*16], 16, &weight[1], height ); \
- if( weight[2].weightfn ) \
- weight[2].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
+ if( CHROMA444 ) \
+ { \
+ int mvx = (me).mv[0] + 4*2*x; \
+ int mvy = (me).mv[1] + 4*2*y; \
+ h->mc.mc_luma( &pix1[2*x+2*y*16], 16, &h->mb.pic.p_fref[0][i_ref][4], i_stride, \
+ mvx, mvy, 2*width, 2*height, &h->sh.weight[i_ref][1] ); \
+ h->mc.mc_luma( &pix2[2*x+2*y*16], 16, &h->mb.pic.p_fref[0][i_ref][8], i_stride, \
+ mvx, mvy, 2*width, 2*height, &h->sh.weight[i_ref][2] ); \
+ } \
+ else \
+ { \
+ h->mc.mc_chroma( &pix1[x+y*16], &pix2[x+y*16], 16, &p_fref[4][or+x*2+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height ); \
+ if( weight[1].weightfn ) \
+ weight[1].weightfn[width>>2]( &pix1[x+y*16], 16, &pix1[x+y*16], 16, &weight[1], height ); \
+ if( weight[2].weightfn ) \
+ weight[2].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height ); \
+ }
if( size == PIXEL_4x4 )
CHROMA4x4MC( 2,4, m[1], 2,0 );
}
- return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 16 )
- + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 );
+ int oe = (8*(i8x8&1) + 4*(i8x8&2)*FENC_STRIDE) >> !CHROMA444;
+ int chromapix = CHROMA444 ? PIXEL_8x8 : PIXEL_4x4;
+ return h->pixf.mbcmp[chromapix]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 16 )
+ + h->pixf.mbcmp[chromapix]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 );
}
static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
{
- ALIGNED_ARRAY_16( pixel, pix, [4],[8*8] );
- ALIGNED_ARRAY_16( pixel, bi, [2],[8*8] );
+ ALIGNED_ARRAY_16( pixel, pix, [4],[16*16] );
+ ALIGNED_ARRAY_16( pixel, bi, [2],[16*16] );
int l0_mvy_offset, l1_mvy_offset;
int i_chroma_cost = 0;
#define COST_BI_CHROMA( m0, m1, width, height ) \
{ \
- l0_mvy_offset = MB_INTERLACED & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
- l1_mvy_offset = MB_INTERLACED & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
- h->mc.mc_chroma( pix[0], pix[1], 8, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
- h->mc.mc_chroma( pix[2], pix[3], 8, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
- h->mc.avg[i_pixel+3]( bi[0], 8, pix[0], 8, pix[2], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
- h->mc.avg[i_pixel+3]( bi[1], 8, pix[1], 8, pix[3], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
- i_chroma_cost = h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[1], FENC_STRIDE, bi[0], 8 ); \
- i_chroma_cost += h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[2], FENC_STRIDE, bi[1], 8 ); \
+ if( CHROMA444 ) \
+ { \
+ h->mc.mc_luma( pix[0], 16, &m0.p_fref[4], m0.i_stride[1], \
+ m0.mv[0], m0.mv[1], 2*width, 2*height, weight_none ); \
+ h->mc.mc_luma( pix[1], 16, &m0.p_fref[8], m0.i_stride[2], \
+ m0.mv[0], m0.mv[1], 2*width, 2*height, weight_none ); \
+ h->mc.mc_luma( pix[2], 16, &m1.p_fref[4], m1.i_stride[1], \
+ m1.mv[0], m1.mv[1], 2*width, 2*height, weight_none ); \
+ h->mc.mc_luma( pix[3], 16, &m1.p_fref[8], m1.i_stride[2], \
+ m1.mv[0], m1.mv[1], 2*width, 2*height, weight_none ); \
+ h->mc.avg[i_pixel]( bi[0], 16, pix[0], 16, pix[2], 16, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
+ h->mc.avg[i_pixel]( bi[1], 16, pix[1], 16, pix[3], 16, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
+ i_chroma_cost = h->pixf.mbcmp[i_pixel]( m0.p_fenc[1], FENC_STRIDE, bi[0], 16 ); \
+ i_chroma_cost += h->pixf.mbcmp[i_pixel]( m0.p_fenc[2], FENC_STRIDE, bi[1], 16 ); \
+ } \
+ else \
+ { \
+ l0_mvy_offset = MB_INTERLACED & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
+ l1_mvy_offset = MB_INTERLACED & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
+ h->mc.mc_chroma( pix[0], pix[1], 16, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
+ h->mc.mc_chroma( pix[2], pix[3], 16, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
+ h->mc.avg[i_pixel+3]( bi[0], 16, pix[0], 16, pix[2], 16, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
+ h->mc.avg[i_pixel+3]( bi[1], 16, pix[1], 16, pix[3], 16, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
+ i_chroma_cost = h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[1], FENC_STRIDE, bi[0], 16 ); \
+ i_chroma_cost += h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[2], FENC_STRIDE, bi[1], 16 ); \
+ } \
}
if( i_pixel == PIXEL_16x16 )
pixel *p_fenc = h->mb.pic.p_fenc[0];
pixel *p_fdec = h->mb.pic.p_fdec[0];
+ int s = !CHROMA444;
a->i_cost16x16direct = a->i_lambda * i_mb_b_cost_table[B_DIRECT];
if( h->param.analyse.inter & X264_ANALYSE_BSUB16x16 )
+ {
+ int chromapix = CHROMA444 ? PIXEL_8x8 : PIXEL_4x4;
for( int i = 0; i < 4; i++ )
{
const int x = (i&1)*8;
&p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
if( h->mb.b_chroma_me )
{
- a->i_cost8x8direct[i] += h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
- &h->mb.pic.p_fdec[1][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE )
- + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
- &h->mb.pic.p_fdec[2][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE );
+ a->i_cost8x8direct[i] += h->pixf.mbcmp[chromapix]( &h->mb.pic.p_fenc[1][(x>>s)+(y>>s)*FENC_STRIDE], FENC_STRIDE,
+ &h->mb.pic.p_fdec[1][(x>>s)+(y>>s)*FDEC_STRIDE], FDEC_STRIDE )
+ + h->pixf.mbcmp[chromapix]( &h->mb.pic.p_fenc[2][(x>>s)+(y>>s)*FENC_STRIDE], FENC_STRIDE,
+ &h->mb.pic.p_fdec[2][(x>>s)+(y>>s)*FDEC_STRIDE], FDEC_STRIDE );
}
a->i_cost16x16direct += a->i_cost8x8direct[i];
/* mb type cost */
a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
}
+ }
else
{
+ int chromapix = CHROMA444 ? PIXEL_16x16 : PIXEL_8x8;
a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_16x16]( p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE );
if( h->mb.b_chroma_me )
{
- a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE )
- + h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE );
+ a->i_cost16x16direct += h->pixf.mbcmp[chromapix]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE )
+ + h->pixf.mbcmp[chromapix]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE );
}
}
}
if( h->mb.b_chroma_me )
{
ALIGNED_ARRAY_16( pixel, pixuv, [2],[8*FENC_STRIDE] );
- ALIGNED_ARRAY_16( pixel, bi, [8*FENC_STRIDE] );
+ ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
- if( MB_INTERLACED & a->l0.bi16x16.i_ref )
+ if( CHROMA444 )
{
- int l0_mvy_offset = MB_INTERLACED & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
- h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
- h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
+ h->mc.avg[PIXEL_16x16]( bi, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1],
+ h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1],
+ h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
+ cost00 += h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE );
+ h->mc.avg[PIXEL_16x16]( bi, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][8], h->mb.pic.i_stride[2],
+ h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][8], h->mb.pic.i_stride[2],
+ h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
+ cost00 += h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi, FENC_STRIDE );
}
else
- h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
-
- if( MB_INTERLACED & a->l1.bi16x16.i_ref )
{
- int l1_mvy_offset = MB_INTERLACED & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
- h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
- h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
- }
- else
- h->mc.load_deinterleave_8x8x2_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
+ if( MB_INTERLACED & a->l0.bi16x16.i_ref )
+ {
+ int l0_mvy_offset = MB_INTERLACED & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
+ h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
+ }
+ else
+ h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
+
+ if( MB_INTERLACED & a->l1.bi16x16.i_ref )
+ {
+ int l1_mvy_offset = MB_INTERLACED & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+ h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
+ h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
+ }
+ else
+ h->mc.load_deinterleave_8x8x2_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
- h->mc.avg[PIXEL_8x8]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
- h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
- h->mc.avg[PIXEL_8x8]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
- h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
+ h->mc.avg[PIXEL_8x8]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
+ h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
+ h->mc.avg[PIXEL_8x8]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
+ h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
- cost00 += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
- + h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
+ cost00 += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
+ + h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
+ }
}
if( cost00 < a->i_cost16x16bi )
{
if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 && !h->mb.b_lossless )
{
- /* Only luma MC is really needed, but the full MC is re-used in macroblock_encode. */
+ /* Only luma MC is really needed for 4:2:0, but the full MC is re-used in macroblock_encode. */
x264_mb_mc( h );
- int i_cost8 = h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
- h->mb.pic.p_fdec[0], FDEC_STRIDE );
- int i_cost4 = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
- h->mb.pic.p_fdec[0], FDEC_STRIDE );
+ int plane_count = CHROMA444 && h->mb.b_chroma_me ? 3 : 1;
+ int i_cost8 = 0, i_cost4 = 0;
+ for( int p = 0; p < plane_count; p++ )
+ {
+ i_cost8 += h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE,
+ h->mb.pic.p_fdec[p], FDEC_STRIDE );
+ i_cost4 += h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE,
+ h->mb.pic.p_fdec[p], FDEC_STRIDE );
+ }
h->mb.b_transform_8x8 = i_cost8 < i_cost4;
h->mb.b_skip_mc = 1;
{
x264_analyse_update_cache( h, a );
h->mb.b_transform_8x8 ^= 1;
- /* FIXME only luma is needed, but the score for comparison already includes chroma */
+ /* FIXME only luma is needed for 4:2:0, but the score for comparison already includes chroma */
int i_rd8 = x264_rd_cost_mb( h, a->i_lambda2 );
if( *i_rd >= i_rd8 )
if( h->mb.b_chroma_me )
{
- x264_mb_analyse_intra_chroma( h, &analysis );
- x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_i8x8chroma );
+ if( CHROMA444 )
+ {
+ x264_mb_analyse_intra( h, &analysis, i_cost );
+ x264_mb_analyse_intra_chroma( h, &analysis );
+ }
+ else
+ {
+ x264_mb_analyse_intra_chroma( h, &analysis );
+ x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_i8x8chroma );
+ }
analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
* it was an inter block. */
x264_analyse_update_cache( h, &analysis );
x264_macroblock_encode( h );
- h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, 8 );
- h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, 8 );
+ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE, h->mb.pic.p_fdec[p], FDEC_STRIDE, 16 );
+ if( !CHROMA444 )
+ {
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, 8 );
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, 8 );
+ }
x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
goto intra_analysis;
}
if( h->mb.b_chroma_me )
{
- x264_mb_analyse_intra_chroma( h, &analysis );
- x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_i8x8chroma );
+ if( CHROMA444 )
+ {
+ x264_mb_analyse_intra( h, &analysis, i_satd_inter );
+ x264_mb_analyse_intra_chroma( h, &analysis );
+ }
+ else
+ {
+ x264_mb_analyse_intra_chroma( h, &analysis );
+ x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_i8x8chroma );
+ }
analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
static int ALWAYS_INLINE x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra )
{
+ static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
int i_nza;
int i_nzb;
switch( i_cat )
{
+ case DCT_LUMA_8x8:
+ case DCT_CHROMAU_8x8:
+ case DCT_CHROMAV_8x8:
case DCT_LUMA_AC:
case DCT_LUMA_4x4:
case DCT_CHROMA_AC:
- /* no need to test for skip/pcm */
+ case DCT_CHROMAU_AC:
+ case DCT_CHROMAU_4x4:
+ case DCT_CHROMAV_AC:
+ case DCT_CHROMAV_4x4:
i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
if( x264_constant_p(b_intra) && !b_intra )
- return 85 + 4*i_cat + ((2*i_nzb + i_nza)&0x7f);
+ return base_ctx[i_cat] + ((2*i_nzb + i_nza)&0x7f);
else
{
i_nza &= 0x7f + (b_intra << 7);
i_nzb &= 0x7f + (b_intra << 7);
- return 85 + 4*i_cat + 2*!!i_nzb + !!i_nza;
+ return base_ctx[i_cat] + 2*!!i_nzb + !!i_nza;
}
case DCT_LUMA_DC:
- i_nza = (h->mb.cache.i_cbp_left >> 8) & 1;
- i_nzb = (h->mb.cache.i_cbp_top >> 8) & 1;
- return 85 + 4*i_cat + 2*i_nzb + i_nza;
+ case DCT_CHROMAU_DC:
+ case DCT_CHROMAV_DC:
+ i_idx -= LUMA_DC;
+ i_nza = (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1;
+ i_nzb = (h->mb.cache.i_cbp_top >> (8 + i_idx)) & 1;
+ return base_ctx[i_cat] + 2*i_nzb + i_nza;
case DCT_CHROMA_DC:
- /* no need to test skip/pcm */
- i_idx -= 25;
- i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (9 + i_idx)) & 1 : b_intra;
- i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (9 + i_idx)) & 1 : b_intra;
- return 85 + 4*i_cat + 2*i_nzb + i_nza;
+ i_idx -= LUMA_DC;
+ i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1 : b_intra;
+ i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (8 + i_idx)) & 1 : b_intra;
+ return base_ctx[i_cat] + 2*i_nzb + i_nza;
default:
return 0;
}
}
-
-static const uint16_t significant_coeff_flag_offset[2][6] =
+static const uint16_t significant_coeff_flag_offset[2][14] =
+{
+ { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
+ { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
+};
+static const uint16_t last_coeff_flag_offset[2][14] =
{
- { 105, 120, 134, 149, 152, 402 },
- { 277, 292, 306, 321, 324, 436 }
+ { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 },
+ { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 }
};
-static const uint16_t last_coeff_flag_offset[2][6] =
+static const uint16_t coeff_abs_level_m1_offset[14] =
{
- { 166, 181, 195, 210, 213, 417 },
- { 338, 353, 367, 382, 385, 451 }
+ 227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
};
-static const uint16_t coeff_abs_level_m1_offset[6] =
- { 227, 237, 247, 257, 266, 426 };
static const uint8_t significant_coeff_flag_offset_8x8[2][63] =
{{
0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
/* update node ctx after coding a level>1 */
{ 4, 4, 4, 4, 5, 6, 7, 7 }
};
-static const uint8_t count_cat_m1[5] = {15, 14, 15, 3, 14};
+static const uint8_t count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
#if !RDO_SKIP_BS
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
last = h->quantf.coeff_last[ctx_block_cat]( l );
#define WRITE_SIGMAP( l8x8 )\
+{\
int i = 0;\
while( 1 )\
{\
coeffs[++coeff_idx] = l[i];\
break;\
}\
- }
+ }\
+}
- if( ctx_block_cat == DCT_LUMA_8x8 )
- {
- int count_m1 = 63;
+ int count_m1 = count_cat_m1[ctx_block_cat];
+ if( count_m1 == 63 )
WRITE_SIGMAP( 1 )
- }
else
- {
- int count_m1 = count_cat_m1[ctx_block_cat];
WRITE_SIGMAP( 0 )
- }
do
{
x264_cabac_encode_bypass( cb, coeff_sign );
} while( --coeff_idx >= 0 );
}
-#define block_residual_write_cabac_8x8( h, cb, l ) block_residual_write_cabac( h, cb, DCT_LUMA_8x8, l )
+#define block_residual_write_cabac_8x8( h, cb, cat, l ) block_residual_write_cabac( h, cb, cat, l )
#else
}
}
-static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, dctcoef *l )
+static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
- block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, l, 1 );
+ block_residual_write_cabac_internal( h, cb, ctx_block_cat, l, 1 );
}
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
{
#endif
#define block_residual_write_cabac_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
+do\
{\
int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra );\
if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
}\
else\
x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
-}
+} while(0)
-void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
+#define block_residual_write_cabac_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
+do\
+{\
+ int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra );\
+ if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
+ {\
+ x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
+ block_residual_write_cabac_8x8( h, cb, ctx_block_cat, l );\
+ }\
+ else\
+ x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
+} while(0)
+
+static ALWAYS_INLINE void x264_macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma )
{
const int i_mb_type = h->mb.i_type;
int i_list;
bs_t s;
bs_init( &s, cb->p, cb->p_end - cb->p );
- for( int i = 0; i < 256; i++ )
- bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
- for( int ch = 1; ch < 3; ch++ )
- for( int i = 0; i < 8; i++ )
- for( int j = 0; j < 8; j++ )
- bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
+ for( int p = 0; p < plane_count; p++ )
+ for( int i = 0; i < 256; i++ )
+ bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
+ if( chroma )
+ for( int ch = 1; ch < 3; ch++ )
+ for( int i = 0; i < 8; i++ )
+ for( int j = 0; j < 8; j++ )
+ bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
bs_flush( &s );
cb->p = s.p;
}
}
- x264_cabac_mb_intra_chroma_pred_mode( h, cb );
+ if( chroma )
+ x264_cabac_mb_intra_chroma_pred_mode( h, cb );
}
else if( i_mb_type == P_L0 )
{
if( i_mb_type != I_16x16 )
{
x264_cabac_mb_cbp_luma( h, cb );
- x264_cabac_mb_cbp_chroma( h, cb );
+ if( chroma )
+ x264_cabac_mb_cbp_chroma( h, cb );
}
if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
x264_cabac_mb_transform_size( h, cb );
}
- if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
+ if( h->mb.i_cbp_luma > 0 || (chroma && h->mb.i_cbp_chroma > 0) || i_mb_type == I_16x16 )
{
const int b_intra = IS_INTRA( i_mb_type );
x264_cabac_mb_qp_delta( h, cb );
if( i_mb_type == I_16x16 )
{
/* DC Luma */
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 );
+ for( int p = 0; p < plane_count; p++ )
+ {
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 );
- /* AC Luma */
- if( h->mb.i_cbp_luma != 0 )
- for( int i = 0; i < 16; i++ )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 );
+ /* AC Luma */
+ if( h->mb.i_cbp_luma )
+ for( int i = p*16; i < p*16+16; i++ )
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 );
+ }
}
else if( h->mb.b_transform_8x8 )
{
- for( int i = 0; i < 4; i++ )
- if( h->mb.i_cbp_luma & ( 1 << i ) )
- block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i] );
+ if( plane_count == 3 )
+ {
+ ALIGNED_4( uint8_t nnzbak[3][8] );
+
+/* Stupid nnz munging in the case that neighbors don't have
+ * 8x8 transform enabled. */
+#define BACKUP( dst, src, res )\
+ dst = src;\
+ src = res;
+
+#define RESTORE( dst, src, res )\
+ src = dst;
+
+#define MUNGE_8x8_NNZ( MUNGE )\
+if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[0]] )\
+{\
+ MUNGE( nnzbak[0][0], h->mb.cache.non_zero_count[x264_scan8[16*0+ 0] - 1], 0x80 )\
+ MUNGE( nnzbak[0][1], h->mb.cache.non_zero_count[x264_scan8[16*0+ 2] - 1], 0x80 )\
+ MUNGE( nnzbak[1][0], h->mb.cache.non_zero_count[x264_scan8[16*1+ 0] - 1], 0x80 )\
+ MUNGE( nnzbak[1][1], h->mb.cache.non_zero_count[x264_scan8[16*1+ 2] - 1], 0x80 )\
+ MUNGE( nnzbak[2][0], h->mb.cache.non_zero_count[x264_scan8[16*2+ 0] - 1], 0x80 )\
+ MUNGE( nnzbak[2][1], h->mb.cache.non_zero_count[x264_scan8[16*2+ 2] - 1], 0x80 )\
+}\
+if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[1]] )\
+{\
+ MUNGE( nnzbak[0][2], h->mb.cache.non_zero_count[x264_scan8[16*0+ 8] - 1], 0x80 )\
+ MUNGE( nnzbak[0][3], h->mb.cache.non_zero_count[x264_scan8[16*0+10] - 1], 0x80 )\
+ MUNGE( nnzbak[1][2], h->mb.cache.non_zero_count[x264_scan8[16*1+ 8] - 1], 0x80 )\
+ MUNGE( nnzbak[1][3], h->mb.cache.non_zero_count[x264_scan8[16*1+10] - 1], 0x80 )\
+ MUNGE( nnzbak[2][2], h->mb.cache.non_zero_count[x264_scan8[16*2+ 8] - 1], 0x80 )\
+ MUNGE( nnzbak[2][3], h->mb.cache.non_zero_count[x264_scan8[16*2+10] - 1], 0x80 )\
+}\
+if( (h->mb.i_neighbour & MB_TOP) && !h->mb.mb_transform_size[h->mb.i_mb_top_xy] )\
+{\
+ MUNGE( M32( &nnzbak[0][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*0] - 8] ), 0x80808080U )\
+ MUNGE( M32( &nnzbak[1][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*1] - 8] ), 0x80808080U )\
+ MUNGE( M32( &nnzbak[2][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*2] - 8] ), 0x80808080U )\
+}
+
+ MUNGE_8x8_NNZ( BACKUP )
+
+ for( int p = 0; p < 3; p++ )
+ for( int i = 0; i < 4; i++ )
+ if( h->mb.i_cbp_luma & ( 1 << i ) )
+ block_residual_write_cabac_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
+
+ MUNGE_8x8_NNZ( RESTORE )
+ }
+ else
+ {
+ for( int i = 0; i < 4; i++ )
+ if( h->mb.i_cbp_luma & ( 1 << i ) )
+ block_residual_write_cabac_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
+ }
}
else
{
- for( int i = 0; i < 16; i++ )
- if( h->mb.i_cbp_luma & ( 1 << ( i >> 2 ) ) )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
+ for( int p = 0; p < plane_count; p++ )
+ for( int i = 0; i < 16; i++ )
+ if( h->mb.i_cbp_luma & ( 1 << ( i >> 2 ) ) )
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+p*16, h->dct.luma4x4[i+p*16], b_intra );
}
- if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
+ if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */
{
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra );
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra );
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
- for( int i = 16; i < 24; i++ )
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, b_intra );
+ for( int ch = 1; ch < 3; ch++ )
+ for( int i = ch*16; i < ch*16+4; i++ )
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, b_intra );
}
}
#endif
}
+void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
+{
+ if( CHROMA444 )
+ x264_macroblock_write_cabac_internal( h, cb, 3, 0 );
+ else
+ x264_macroblock_write_cabac_internal( h, cb, 1, 1 );
+}
+
#if RDO_SKIP_BS
/*****************************************************************************
* RD only; doesn't generate a valid bitstream
{
const int i_mb_type = h->mb.i_type;
int b_8x16 = h->mb.i_partition == D_8x16;
+ int plane_count = CHROMA444 ? 3 : 1;
if( i_mb_type == P_8x8 )
{
if( h->mb.i_cbp_luma & (1 << i8) )
{
if( h->mb.b_transform_8x8 )
- block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] );
+ {
+ if( CHROMA444 )
+ for( int p = 0; p < 3; p++ )
+ block_residual_write_cabac_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 );
+ else
+ block_residual_write_cabac_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
+ }
else
- for( int i4 = 0; i4 < 4; i4++ )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 0 );
+ for( int p = 0; p < plane_count; p++ )
+ for( int i4 = 0; i4 < 4; i4++ )
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 );
}
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 0 );
+ if( h->mb.i_cbp_chroma )
+ {
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 );
+ }
i8 += x264_pixel_size[i_pixel].h >> 3;
}
static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
{
int b_8x4 = i_pixel == PIXEL_8x4;
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 );
+ int plane_count = CHROMA444 ? 3 : 1;
if( i_pixel == PIXEL_4x4 )
- {
x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
- }
else
- {
x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4+2-b_8x4, h->dct.luma4x4[i4+2-b_8x4], 0 );
+ for( int p = 0; p < plane_count; p++ )
+ {
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 );
+ if( i_pixel != PIXEL_4x4 )
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 );
}
}
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
x264_cabac_mb_cbp_luma( h, cb );
if( h->mb.i_cbp_luma & (1 << i8) )
- block_residual_write_cabac_8x8( h, cb, h->dct.luma8x8[i8] );
+ {
+ if( CHROMA444 )
+ for( int p = 0; p < 3; p++ )
+ block_residual_write_cabac_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 );
+ else
+ block_residual_write_cabac_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
+ }
}
static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
{
const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
+ int plane_count = CHROMA444 ? 3 : 1;
i_mode = x264_mb_pred_mode4x4_fix( i_mode );
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 1 );
+ for( int p = 0; p < plane_count; p++ )
+ block_residual_write_cabac_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 );
}
static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
x264_cabac_mb_cbp_chroma( h, cb );
if( h->mb.i_cbp_chroma > 0 )
{
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 1 );
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 1 );
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 );
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 );
if( h->mb.i_cbp_chroma == 2 )
- for( int i = 16; i < 24; i++ )
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 1 );
+ for( int ch = 1; ch < 3; ch++ )
+ for( int i = ch*16; i < ch*16+4; i++ )
+ block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 1 );
}
}
#endif
#define RDO_SKIP_BS 0
#endif
-static const uint8_t intra4x4_cbp_to_golomb[48]=
+/* [400,420][inter,intra] */
+static const uint8_t cbp_to_golomb[2][2][48] =
{
- 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
- 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1,
- 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0
-};
-static const uint8_t inter_cbp_to_golomb[48]=
-{
- 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11,
- 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
- 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
+ {{ 0, 1, 2, 5, 3, 6, 14, 10, 4, 15, 7, 11, 8, 12, 13, 9 },
+ { 1, 10, 11, 6, 12, 7, 14, 2, 13, 15, 8, 3, 9, 4, 5, 0 }},
+ {{ 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11,
+ 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
+ 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12 },
+ { 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
+ 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1,
+ 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0 }}
};
+
static const uint8_t mb_type_b_to_golomb[3][9]=
{
{ 4, 8, 12, 10, 6, 14, 16, 18, 20 }, /* D_16x8 */
{ 5, 9, 13, 11, 7, 15, 17, 19, 21 }, /* D_8x16 */
{ 1, -1, -1, -1, 2, -1, -1, -1, 3 } /* D_16x16 */
};
+
static const uint8_t sub_mb_type_p_to_golomb[4]=
{
3, 1, 2, 0
};
+
static const uint8_t sub_mb_type_b_to_golomb[13]=
{
10, 4, 5, 1, 11, 6, 7, 2, 12, 8, 9, 3, 0
{
bs_t *s = &h->out.bs;
static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
- static const uint8_t count_cat[5] = {16, 15, 16, 4, 15};
+ static const uint8_t count_cat[14] = {16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64};
x264_run_level_t runlevel;
int i_trailing, i_total_zero, i_suffix_length;
int i_total = 0;
#define block_residual_write_cavlc(h,cat,idx,l)\
{\
- int nC = cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? 0 : idx )];\
+ int nC = cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? (idx - LUMA_DC)*16 : idx )];\
uint8_t *nnz = &h->mb.cache.non_zero_count[x264_scan8[idx]];\
if( !*nnz )\
bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
/* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
- && !h->mb.cache.non_zero_count[x264_scan8[24]] )
+ && !h->mb.cache.non_zero_count[x264_scan8[LUMA_DC]]
+ && !h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]]
+ && !h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] )
{
#if !RDO_SKIP_BS
h->mb.i_qp = h->mb.i_last_qp;
{
/* shuffle 8x8 dct coeffs into 4x4 lists */
for( int i8 = i8start; i8 <= i8end; i8++ )
- if( h->mb.i_cbp_luma & (1 << i8) )
+ if( h->mb.cache.non_zero_count[x264_scan8[i8*4]] )
h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8], &h->mb.cache.non_zero_count[x264_scan8[i8*4]] );
}
for( int i8 = i8start; i8 <= i8end; i8++ )
- if( h->mb.i_cbp_luma & (1 << i8) )
+ if( h->mb.i_cbp_luma & (1 << (i8&3)) )
for( int i4 = 0; i4 < 4; i4++ )
block_residual_write_cavlc( h, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
}
const int i_mb_type = h->mb.i_type;
static const uint8_t i_offsets[3] = {5,23,0};
int i_mb_i_offset = i_offsets[h->sh.i_type];
+ int plane_count = CHROMA444 ? 3 : 1;
+ int chroma = !CHROMA444;
#if RDO_SKIP_BS
s->i_bits_encoded = 0;
bs_align_0( s );
- for( int i = 0; i < 256; i++ )
- bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
- for( int ch = 1; ch < 3; ch++ )
- for( int i = 0; i < 8; i++ )
- for( int j = 0; j < 8; j++ )
- bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
+ for( int p = 0; p < plane_count; p++ )
+ for( int i = 0; i < 256; i++ )
+ bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
+ if( chroma )
+ for( int ch = 1; ch < 3; ch++ )
+ for( int i = 0; i < 8; i++ )
+ for( int j = 0; j < 8; j++ )
+ bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
bs_init( s, s->p, s->p_end - s->p );
s->p_start = p_start;
else
bs_write( s, 4, i_mode - (i_mode > i_pred) );
}
- bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
+ if( chroma )
+ bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
}
else if( i_mb_type == I_16x16 )
{
bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
- bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
+ if( chroma )
+ bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
}
else if( i_mb_type == P_L0 )
{
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
#endif
- /* Coded block patern */
- if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- else if( i_mb_type != I_16x16 )
- bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
+ /* Coded block pattern */
+ if( i_mb_type != I_16x16 )
+ bs_write_ue( s, cbp_to_golomb[chroma][IS_INTRA(i_mb_type)][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] );
/* transform size 8x8 flag */
if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
bs_write1( s, h->mb.b_transform_8x8 );
- /* write residual */
if( i_mb_type == I_16x16 )
{
cavlc_qp_delta( h );
/* DC Luma */
- block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
+ for( int p = 0; p < plane_count; p++ )
+ {
+ block_residual_write_cavlc( h, DCT_LUMA_DC, LUMA_DC+p, h->dct.luma16x16_dc[p] );
- /* AC Luma */
- if( h->mb.i_cbp_luma )
- for( int i = 0; i < 16; i++ )
- block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
+ /* AC Luma */
+ if( h->mb.i_cbp_luma )
+ for( int i = p*16; i < p*16+16; i++ )
+ block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
+ }
}
else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
{
cavlc_qp_delta( h );
- x264_macroblock_luma_write_cavlc( h, 0, 3 );
+ x264_macroblock_luma_write_cavlc( h, 0, plane_count*4-1 );
}
if( h->mb.i_cbp_chroma )
{
/* Chroma DC residual present */
- block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
- block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
+ block_residual_write_cavlc( h, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0] );
+ block_residual_write_cavlc( h, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1] );
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
- for( int i = 16; i < 24; i++ )
- block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
+ for( int ch = 1; ch < 3; ch++ )
+ for( int i = ch*16; i < ch*16+4; i++ )
+ block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
}
#if !RDO_SKIP_BS
bs_t *s = &h->out.bs;
const int i_mb_type = h->mb.i_type;
int b_8x16 = h->mb.i_partition == D_8x16;
+ int plane_count = CHROMA444 ? 3 : 1;
int j;
if( i_mb_type == P_8x8 )
for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
{
- x264_macroblock_luma_write_cavlc( h, i8, i8 );
- block_residual_write_cavlc( h, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
- block_residual_write_cavlc( h, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
+ for( int p = 0; p < plane_count; p++ )
+ x264_macroblock_luma_write_cavlc( h, p*4+i8, p*4+i8 );
+ if( h->mb.i_cbp_chroma )
+ {
+ block_residual_write_cavlc( h, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
+ block_residual_write_cavlc( h, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1 );
+ }
i8 += x264_pixel_size[i_pixel].h >> 3;
}
static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
{
+ int plane_count = CHROMA444 ? 3 : 1;
int b_8x4 = i_pixel == PIXEL_8x4;
h->out.bs.i_bits_encoded = 0;
cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
- block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
- if( i_pixel != PIXEL_4x4 )
+ for( int p = 0; p < plane_count; p++ )
{
- i4 += 2-b_8x4;
- block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+ block_residual_write_cavlc( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] );
+ if( i_pixel != PIXEL_4x4 )
+ block_residual_write_cavlc( h, DCT_LUMA_4x4, p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4] );
}
return h->out.bs.i_bits_encoded;
static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
{
+ int plane_count = CHROMA444 ? 3 : 1;
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
- bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- x264_macroblock_luma_write_cavlc( h, i8, i8 );
+ bs_write_ue( &h->out.bs, cbp_to_golomb[!CHROMA444][1][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] );
+ for( int p = 0; p < plane_count; p++ )
+ x264_macroblock_luma_write_cavlc( h, p*4+i8, p*4+i8 );
return h->out.bs.i_bits_encoded;
}
static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
{
+ int plane_count = CHROMA444 ? 3 : 1;
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
- block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+ for( int p = 0; p < plane_count; p++ )
+ block_residual_write_cavlc( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] );
return h->out.bs.i_bits_encoded;
}
h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
if( h->mb.i_cbp_chroma )
{
- block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
- block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
+ block_residual_write_cavlc( h, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0] );
+ block_residual_write_cavlc( h, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1] );
if( h->mb.i_cbp_chroma == 2 )
- {
- for( int i = 16; i < 24; i++ )
- block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
- }
+ for( int ch = 1; ch < 3; ch++ )
+ for( int i = ch*16; i < ch*16+4; i++ )
+ block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
}
return h->out.bs.i_bits_encoded;
}
FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
if( !f )
return;
+
/* Write the frame in display order */
- fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2 * sizeof(pixel), SEEK_SET );
- for( int y = 0; y < h->param.i_height; y++ )
- fwrite( &h->fdec->plane[0][y*h->fdec->i_stride[0]], sizeof(pixel), h->param.i_width, f );
- int cw = h->param.i_width>>1;
- int ch = h->param.i_height>>1;
- pixel *planeu = x264_malloc( (cw*ch*2+32)*sizeof(pixel) );
- pixel *planev = planeu + cw*ch + 16;
- h->mc.plane_copy_deinterleave( planeu, cw, planev, cw, h->fdec->plane[1], h->fdec->i_stride[1], cw, ch );
- fwrite( planeu, 1, cw*ch*sizeof(pixel), f );
- fwrite( planev, 1, cw*ch*sizeof(pixel), f );
- x264_free( planeu );
+ int frame_size = h->param.i_height * h->param.i_width * (3<<CHROMA444)/2 * sizeof(pixel);
+ fseek( f, (uint64_t)h->fdec->i_frame * frame_size, SEEK_SET );
+ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
+ for( int y = 0; y < h->param.i_height; y++ )
+ fwrite( &h->fdec->plane[p][y*h->fdec->i_stride[p]], sizeof(pixel), h->param.i_width, f );
+ if( !CHROMA444 )
+ {
+ int cw = h->param.i_width>>1;
+ int ch = h->param.i_height>>1;
+ pixel *planeu = x264_malloc( (cw*ch*2+32)*sizeof(pixel) );
+ pixel *planev = planeu + cw*ch + 16;
+ h->mc.plane_copy_deinterleave( planeu, cw, planev, cw, h->fdec->plane[1], h->fdec->i_stride[1], cw, ch );
+ fwrite( planeu, 1, cw*ch*sizeof(pixel), f );
+ fwrite( planev, 1, cw*ch*sizeof(pixel), f );
+ x264_free( planeu );
+ }
fclose( f );
}
return -1;
}
- if( h->param.i_width % 2 || h->param.i_height % 2 )
+ int i_csp = h->param.i_csp & X264_CSP_MASK;
+ if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
{
- x264_log( h, X264_LOG_ERROR, "width or height not divisible by 2 (%dx%d)\n",
- h->param.i_width, h->param.i_height );
+ x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I444/YV24 supported)\n" );
return -1;
}
- int i_csp = h->param.i_csp & X264_CSP_MASK;
- if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
+
+ if( i_csp < X264_CSP_I444 && (h->param.i_width % 2 || h->param.i_height % 2) )
{
- x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12 supported)\n" );
+ x264_log( h, X264_LOG_ERROR, "width or height not divisible by 2 (%dx%d)\n",
+ h->param.i_width, h->param.i_height );
return -1;
}
h->param.analyse.f_psy_trellis = x264_clip3f( h->param.analyse.f_psy_trellis, 0, 10 );
h->mb.i_psy_rd = h->param.analyse.i_subpel_refine >= 6 ? FIX8( h->param.analyse.f_psy_rd ) : 0;
h->mb.i_psy_trellis = h->param.analyse.i_trellis ? FIX8( h->param.analyse.f_psy_trellis / 4 ) : 0;
+ /* In 4:4:4 mode, chroma gets twice as much resolution, so we can halve its quality. */
+ if( b_open && i_csp >= X264_CSP_I444 && h->param.analyse.b_psy )
+ h->param.analyse.i_chroma_qp_offset += 6;
/* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
/* so we lower the chroma QP offset to compensate */
if( b_open && h->mb.i_psy_rd )
x264_predict_8x8_init( h->param.cpu, h->predict_8x8, &h->predict_8x8_filter );
x264_predict_4x4_init( h->param.cpu, h->predict_4x4 );
if( h->param.b_cabac )
- x264_cabac_init();
+ x264_cabac_init( h );
else
x264_cavlc_init();
x264_pixel_init( h->param.cpu, &h->pixf );
h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
- "High 4:4:4 Predictive";
+ h->sps->b_constraint_set3 == 1 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive";
char level[4];
snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
- if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 ) )
+ if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 &&
+ (h->sps->i_profile_idc >= PROFILE_BASELINE && h->sps->i_profile_idc <= PROFILE_EXTENDED) ) )
strcpy( level, "1b" );
if( h->sps->i_profile_idc < PROFILE_HIGH10 )
}
else
{
- x264_log( h, X264_LOG_INFO, "profile %s, level %s, bit depth %d\n",
- profile, level, BIT_DEPTH );
+ x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s %d-bit\n",
+ profile, level, CHROMA444 ? "4:4:4" : "4:2:0", BIT_DEPTH );
}
return h;
/* generate picture parameters */
x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
- x264_pps_write( &h->out.bs, h->pps );
+ x264_pps_write( &h->out.bs, h->sps, h->pps );
if( x264_nal_end( h ) )
return -1;
{
/* for now no analysis and set all weights to nothing */
for( int i_ref = 0; i_ref < h->i_ref[0]; i_ref++ )
- h->fenc->weighted[i_ref] = h->fref[0][i_ref]->filtered[0];
+ h->fenc->weighted[i_ref] = h->fref[0][i_ref]->filtered[0][0];
// FIXME: This only supports weighting of one reference frame
// and duplicates of that frame.
//scale full resolution frame
if( h->param.i_threads == 1 )
{
- pixel *src = h->fref[0][j]->filtered[0] - h->fref[0][j]->i_stride[0]*i_padv - PADH;
+ pixel *src = h->fref[0][j]->filtered[0][0] - h->fref[0][j]->i_stride[0]*i_padv - PADH;
pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
int stride = h->fenc->i_stride[0];
int width = h->fenc->i_width[0] + PADH*2;
* but the actual image data is equivalent. For now, maintain this
* consistency by copying deblocked pixels between planes. */
if( PARAM_INTERLACED )
- for( int p = 0; p < 2; p++ )
- for( int i = minpix_y>>p; i < maxpix_y>>p; i++ )
+ for( int p = 0; p < h->fdec->i_plane; p++ )
+ for( int i = minpix_y>>(!CHROMA444 && p); i < maxpix_y>>(!CHROMA444 && p); i++ )
memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p],
h->fdec->plane[p] + i*h->fdec->i_stride[p],
h->mb.i_mb_width*16*sizeof(pixel) );
}
if( SLICE_MBAFF )
- for( int i = 0; i < 2; i++ )
+ for( int i = 0; i < 3; i++ )
{
XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] );
XCHG( pixel *, h->intra_border_backup[1][i], h->intra_border_backup[4][i] );
maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
if( h->param.analyse.b_psnr )
{
- uint64_t ssd_y = x264_pixel_ssd_wxh( &h->pixf,
- h->fdec->plane[0] + minpix_y * h->fdec->i_stride[0], h->fdec->i_stride[0],
- h->fenc->plane[0] + minpix_y * h->fenc->i_stride[0], h->fenc->i_stride[0],
- h->param.i_width, maxpix_y-minpix_y );
- uint64_t ssd_u, ssd_v;
- x264_pixel_ssd_nv12( &h->pixf,
- h->fdec->plane[1] + (minpix_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
- h->fenc->plane[1] + (minpix_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
- h->param.i_width>>1, (maxpix_y-minpix_y)>>1, &ssd_u, &ssd_v );
- h->stat.frame.i_ssd[0] += ssd_y;
- h->stat.frame.i_ssd[1] += ssd_u;
- h->stat.frame.i_ssd[2] += ssd_v;
+ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
+ h->stat.frame.i_ssd[p] += x264_pixel_ssd_wxh( &h->pixf,
+ h->fdec->plane[p] + minpix_y * h->fdec->i_stride[p], h->fdec->i_stride[p],
+ h->fenc->plane[p] + minpix_y * h->fenc->i_stride[p], h->fenc->i_stride[p],
+ h->param.i_width, maxpix_y-minpix_y );
+ if( !CHROMA444 )
+ {
+ uint64_t ssd_u, ssd_v;
+ x264_pixel_ssd_nv12( &h->pixf,
+ h->fdec->plane[1] + (minpix_y>>1) * h->fdec->i_stride[1], h->fdec->i_stride[1],
+ h->fenc->plane[1] + (minpix_y>>1) * h->fenc->i_stride[1], h->fenc->i_stride[1],
+ h->param.i_width>>1, (maxpix_y-minpix_y)>>1, &ssd_u, &ssd_v );
+ h->stat.frame.i_ssd[1] += ssd_u;
+ h->stat.frame.i_ssd[2] += ssd_v;
+ }
}
if( h->param.analyse.b_ssim )
bs_align_1( &h->out.bs );
/* init cabac */
- x264_cabac_context_init( &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
+ x264_cabac_context_init( h, &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
last_emu_check = h->cabac.p;
}
{
if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
{
- int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
- + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
- h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
- h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
- h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
+ if( CHROMA444 )
+ {
+ for( int i = 0; i < 4; i++ )
+ if( h->mb.i_cbp_luma & (1 << i) )
+ for( int p = 0; p < 3; p++ )
+ {
+ int s8 = i*4+p*16;
+ int nnz8x8 = M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+0] )
+ | M16( &h->mb.cache.non_zero_count[x264_scan8[s8]+8] );
+ h->stat.frame.i_mb_cbp[!b_intra + p*2] += !!nnz8x8;
+ }
+ }
+ else
+ {
+ int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
+ + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
+ h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
+ h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
+ h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
+ }
}
if( h->mb.i_cbp_luma && !b_intra )
{
/* generate picture parameters */
x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
- x264_pps_write( &h->out.bs, h->pps );
+ x264_pps_write( &h->out.bs, h->sps, h->pps );
if( x264_nal_end( h ) )
return -1;
overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
h->stat.frame.i_ssd[1],
h->stat.frame.i_ssd[2],
};
+ int luma_size = h->param.i_width * h->param.i_height;
+ int chroma_size = h->param.i_width * h->param.i_height >> (!CHROMA444 * 2);
+ double psnr_y = x264_psnr( ssd[0], luma_size );
+ double psnr_u = x264_psnr( ssd[1], chroma_size );
+ double psnr_v = x264_psnr( ssd[2], chroma_size );
h->stat.f_ssd_global[h->sh.i_type] += dur * (ssd[0] + ssd[1] + ssd[2]);
- h->stat.f_psnr_average[h->sh.i_type] += dur * x264_psnr( ssd[0] + ssd[1] + ssd[2], 3 * h->param.i_width * h->param.i_height / 2 );
- h->stat.f_psnr_mean_y[h->sh.i_type] += dur * x264_psnr( ssd[0], h->param.i_width * h->param.i_height );
- h->stat.f_psnr_mean_u[h->sh.i_type] += dur * x264_psnr( ssd[1], h->param.i_width * h->param.i_height / 4 );
- h->stat.f_psnr_mean_v[h->sh.i_type] += dur * x264_psnr( ssd[2], h->param.i_width * h->param.i_height / 4 );
+ h->stat.f_psnr_average[h->sh.i_type] += dur * x264_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 );
+ h->stat.f_psnr_mean_y[h->sh.i_type] += dur * psnr_y;
+ h->stat.f_psnr_mean_u[h->sh.i_type] += dur * psnr_u;
+ h->stat.f_psnr_mean_v[h->sh.i_type] += dur * psnr_v;
- snprintf( psz_message, 80, " PSNR Y:%5.2f U:%5.2f V:%5.2f",
- x264_psnr( ssd[0], h->param.i_width * h->param.i_height ),
- x264_psnr( ssd[1], h->param.i_width * h->param.i_height / 4),
- x264_psnr( ssd[2], h->param.i_width * h->param.i_height / 4) );
+ snprintf( psz_message, 80, " PSNR Y:%5.2f U:%5.2f V:%5.2f", psnr_y, psnr_u, psnr_v );
}
if( h->param.analyse.b_ssim )
****************************************************************************/
void x264_encoder_close ( x264_t *h )
{
- int64_t i_yuv_size = 3 * h->param.i_width * h->param.i_height / 2;
+ int luma_size = h->param.i_width * h->param.i_height;
+ int chroma_size = h->param.i_width * h->param.i_height >> (!CHROMA444 * 2);
+ int64_t i_yuv_size = luma_size + chroma_size * 2;
int64_t i_mb_count_size[2][7] = {{0}};
char buf[200];
int b_print_pcm = h->stat.i_mb_count[SLICE_TYPE_I][I_PCM]
}
buf[0] = 0;
+ int csize = CHROMA444 ? 4 : 1;
if( i_mb_count != i_all_intra )
sprintf( buf, " inter: %.1f%% %.1f%% %.1f%%",
h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4),
- h->stat.i_mb_cbp[3] * 100.0 / ((i_mb_count - i_all_intra) ),
- h->stat.i_mb_cbp[5] * 100.0 / ((i_mb_count - i_all_intra)) );
- x264_log( h, X264_LOG_INFO, "coded y,uvDC,uvAC intra: %.1f%% %.1f%% %.1f%%%s\n",
+ h->stat.i_mb_cbp[3] * 100.0 / ((i_mb_count - i_all_intra)*csize),
+ h->stat.i_mb_cbp[5] * 100.0 / ((i_mb_count - i_all_intra)*csize) );
+ x264_log( h, X264_LOG_INFO, "coded y,%s,%s intra: %.1f%% %.1f%% %.1f%%%s\n",
+ CHROMA444?"u":"uvDC", CHROMA444?"v":"uvAC",
h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4),
- h->stat.i_mb_cbp[2] * 100.0 / (i_all_intra ),
- h->stat.i_mb_cbp[4] * 100.0 / (i_all_intra ), buf );
+ h->stat.i_mb_cbp[2] * 100.0 / (i_all_intra*csize),
+ h->stat.i_mb_cbp[4] * 100.0 / (i_all_intra*csize), buf );
int64_t fixed_pred_modes[4][9] = {{0}};
int64_t sum_pred_modes[4] = {0};
fixed_pred_modes[3][x264_mb_pred_mode8x8c_fix[i]] += h->stat.i_mb_pred_mode[3][i];
sum_pred_modes[3] += h->stat.i_mb_pred_mode[3][i];
}
- if( sum_pred_modes[3] )
+ if( sum_pred_modes[3] && !CHROMA444 )
x264_log( h, X264_LOG_INFO, "i8c dc,h,v,p: %2.0f%% %2.0f%% %2.0f%% %2.0f%%\n",
fixed_pred_modes[3][0] * 100.0 / sum_pred_modes[3],
fixed_pred_modes[3][1] * 100.0 / sum_pred_modes[3],
dct4x4[3][0] = 0;
}
-static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx )
+static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
{
- int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
- if( h->mb.b_noise_reduction && ctx_block_cat != DCT_LUMA_AC )
- h->quantf.denoise_dct( dct, h->nr_residual_sum[0], h->nr_offset[0], 16 );
+ int i_quant_cat = b_intra ? (p?CQM_4IC:CQM_4IY) : (p?CQM_4PC:CQM_4PY);
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct, h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 );
if( h->mb.b_trellis )
- return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx );
+ return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*16 );
else
return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}
-static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
+static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
{
- int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
+ int i_quant_cat = b_intra ? (p?CQM_8IC:CQM_8IY) : (p?CQM_8PC:CQM_8PY);
if( h->mb.b_noise_reduction )
- h->quantf.denoise_dct( dct, h->nr_residual_sum[1], h->nr_offset[1], 64 );
+ h->quantf.denoise_dct( dct, h->nr_residual_sum[1+!!p*2], h->nr_offset[1+!!p*2], 64 );
if( h->mb.b_trellis )
- return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
+ return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*4 );
else
return h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
}
/* This means that decimation can be done merely by adjusting the CBP and NNZ
* rather than memsetting the coefficients. */
-void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
+void x264_mb_encode_i4x4( x264_t *h, int p, int idx, int i_qp, int i_mode )
{
int nz;
- pixel *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
- pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
+ pixel *p_src = &h->mb.pic.p_fenc[p][block_idx_xy_fenc[idx]];
+ pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[idx]];
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
+ if( h->mb.b_lossless )
+ x264_predict_lossless_4x4( h, p_dst, p, idx, i_mode );
+ else
+ h->predict_4x4[i_mode]( p_dst );
+
if( h->mb.b_lossless )
{
- nz = h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
- h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
+ nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+idx], p_src, p_dst );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
h->mb.i_cbp_luma |= nz<<(idx>>2);
return;
}
h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
- nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
- h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
+ nz = x264_quant_4x4( h, dct4x4, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 1, p, idx );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
if( nz )
{
h->mb.i_cbp_luma |= 1<<(idx>>2);
- h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
- h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
+ h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4 );
+ h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[p?CQM_4IC:CQM_4IY], i_qp );
h->dctf.add4x4_idct( p_dst, dct4x4 );
}
}
-#define STORE_8x8_NNZ( s8, nz )\
+#define STORE_8x8_NNZ( p, idx, nz )\
do\
{\
- M16( &h->mb.cache.non_zero_count[(s8) + 0*8] ) = (nz) * 0x0101;\
- M16( &h->mb.cache.non_zero_count[(s8) + 1*8] ) = (nz) * 0x0101;\
+ M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+0] ) = (nz) * 0x0101;\
+ M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+8] ) = (nz) * 0x0101;\
} while(0)
-#define CLEAR_16x16_NNZ \
+#define CLEAR_16x16_NNZ( p ) \
+do\
{\
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = 0;\
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = 0;\
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = 0;\
- M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = 0;\
-}
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
+} while(0)
-void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
+void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_qp, int i_mode, pixel *edge )
{
int x = idx&1;
int y = idx>>1;
- int s8 = X264_SCAN8_0 + 2*x + 16*y;
int nz;
- pixel *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE];
- pixel *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
+ pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE];
+ pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE];
ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
+ ALIGNED_ARRAY_16( pixel, edge_buf,[33] );
+
+ if( !edge )
+ {
+ h->predict_8x8_filter( p_dst, edge_buf, h->mb.i_neighbour8[idx], x264_pred_i4x4_neighbors[i_mode] );
+ edge = edge_buf;
+ }
+
+ if( h->mb.b_lossless )
+ x264_predict_lossless_8x8( h, p_dst, p, idx, i_mode, edge );
+ else
+ h->predict_8x8[i_mode]( p_dst, edge );
if( h->mb.b_lossless )
{
- nz = h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
- STORE_8x8_NNZ( s8, nz );
+ nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+idx], p_src, p_dst );
+ STORE_8x8_NNZ( p, idx, nz );
h->mb.i_cbp_luma |= nz<<idx;
return;
}
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
- nz = x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
+ nz = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 1, p, idx );
if( nz )
{
h->mb.i_cbp_luma |= 1<<idx;
- h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
- h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
+ h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8 );
+ h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[p?CQM_8IC:CQM_8IY], i_qp );
h->dctf.add8x8_idct8( p_dst, dct8x8 );
- STORE_8x8_NNZ( s8, 1 );
+ STORE_8x8_NNZ( p, idx, 1 );
}
else
- STORE_8x8_NNZ( s8, 0 );
+ STORE_8x8_NNZ( p, idx, 0 );
}
-static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
+static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp )
{
- pixel *p_src = h->mb.pic.p_fenc[0];
- pixel *p_dst = h->mb.pic.p_fdec[0];
+ pixel *p_src = h->mb.pic.p_fenc[p];
+ pixel *p_dst = h->mb.pic.p_fdec[p];
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16],[16] );
ALIGNED_ARRAY_16( dctcoef, dct_dc4x4,[16] );
- int nz;
+ int nz, block_cbp = 0;
int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
+ int i_quant_cat = p ? CQM_4IC : CQM_4IY;
+ int i_mode = h->mb.i_intra16x16_pred_mode;
+
+ if( h->mb.b_lossless )
+ x264_predict_lossless_16x16( h, p, i_mode );
+ else
+ h->predict_16x16[i_mode]( h->mb.pic.p_fdec[p] );
if( h->mb.b_lossless )
{
{
int oe = block_idx_xy_fenc[i];
int od = block_idx_xy_fdec[i];
- nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[i], p_src+oe, p_dst+od, &dct_dc4x4[block_idx_yx_1d[i]] );
- h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
- h->mb.i_cbp_luma |= nz;
+ nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16*p+i], p_src+oe, p_dst+od, &dct_dc4x4[block_idx_yx_1d[i]] );
+ h->mb.cache.non_zero_count[x264_scan8[16*p+i]] = nz;
+ block_cbp |= nz;
}
- h->mb.i_cbp_luma *= 0xf;
- h->mb.cache.non_zero_count[x264_scan8[24]] = array_non_zero( dct_dc4x4 );
- h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
+ h->mb.i_cbp_luma |= block_cbp * 0xf;
+ h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = array_non_zero( dct_dc4x4 );
+ h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );
return;
}
dct4x4[i][0] = 0;
/* quant/scan/dequant */
- nz = x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
- h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
+ if( h->mb.b_trellis )
+ nz = x264_quant_4x4_trellis( h, dct4x4[i], i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_AC][p], 1, !!p, i );
+ else
+ nz = h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
+ h->mb.cache.non_zero_count[x264_scan8[16*p+i]] = nz;
if( nz )
{
- h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
- h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
- if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[i] );
- h->mb.i_cbp_luma = 0xf;
+ h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+i], dct4x4[i] );
+ h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[i_quant_cat], i_qp );
+ if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+i] );
+ block_cbp = 0xf;
}
}
/* More useful with CAVLC, but still useful with CABAC. */
if( decimate_score < 6 )
{
- h->mb.i_cbp_luma = 0;
- CLEAR_16x16_NNZ
+ CLEAR_16x16_NNZ( p );
+ block_cbp = 0;
}
+ else
+ h->mb.i_cbp_luma |= block_cbp;
h->dctf.dct4x4dc( dct_dc4x4 );
if( h->mb.b_trellis )
- nz = x264_quant_dc_trellis( h, dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1, 0 );
+ nz = x264_quant_dc_trellis( h, dct_dc4x4, i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_DC][p], 1, 0, LUMA_DC+p );
else
- nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
+ nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[i_quant_cat][i_qp][0]>>1, h->quant4_bias[i_quant_cat][i_qp][0]<<1 );
- h->mb.cache.non_zero_count[x264_scan8[24]] = nz;
+ h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = nz;
if( nz )
{
- h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
+ h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );
/* output samples to fdec */
h->dctf.idct4x4dc( dct_dc4x4 );
- h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */
- if( h->mb.i_cbp_luma )
+ h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[i_quant_cat], i_qp ); /* XXX not inversed */
+ if( block_cbp )
for( int i = 0; i < 16; i++ )
dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]];
}
/* put pixels to fdec */
- if( h->mb.i_cbp_luma )
+ if( block_cbp )
h->dctf.add16x16_idct( p_dst, dct4x4 );
else if( nz )
h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
score += h->pixf.var2_8x8( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, &ssd[1] );
if( score < thresh*4 )
{
- h->mb.cache.non_zero_count[x264_scan8[16]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[17]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[18]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[19]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[20]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[21]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[22]] = 0;
- h->mb.cache.non_zero_count[x264_scan8[23]] = 0;
- M16( &h->mb.cache.non_zero_count[x264_scan8[25]] ) = 0;
+ M16( &h->mb.cache.non_zero_count[x264_scan8[16]] ) = 0;
+ M16( &h->mb.cache.non_zero_count[x264_scan8[18]] ) = 0;
+ M16( &h->mb.cache.non_zero_count[x264_scan8[32]] ) = 0;
+ M16( &h->mb.cache.non_zero_count[x264_scan8[34]] ) = 0;
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] = 0;
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] = 0;
for( int ch = 0; ch < 2; ch++ )
{
{
h->dctf.sub8x8_dct_dc( dct2x2, h->mb.pic.p_fenc[1+ch], h->mb.pic.p_fdec[1+ch] );
if( h->mb.b_trellis )
- nz_dc = x264_quant_dc_trellis( h, dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
+ nz_dc = x264_quant_dc_trellis( h, dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1, CHROMA_DC+ch );
else
nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
{
if( !x264_mb_optimize_chroma_dc( h, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ) )
continue;
- h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 1;
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 1;
zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
h->dctf.add8x8_idct_dc( h->mb.pic.p_fdec[1+ch], dct2x2 );
{
int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
- nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od, &h->dct.chroma_dc[ch][i] );
- h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
+ nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i+ch*16], p_src+oe, p_dst+od, &h->dct.chroma_dc[ch][i] );
+ h->mb.cache.non_zero_count[x264_scan8[16+i+ch*16]] = nz;
h->mb.i_cbp_chroma |= nz;
}
- h->mb.cache.non_zero_count[x264_scan8[25]+ch] = array_non_zero( h->dct.chroma_dc[ch] );
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = array_non_zero( h->dct.chroma_dc[ch] );
continue;
}
nz = x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 1, 0 );
else
nz = h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
- h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
+ h->mb.cache.non_zero_count[x264_scan8[16+i+ch*16]] = nz;
if( nz )
{
nz_ac = 1;
- h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
+ h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*16], dct4x4[i] );
h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
if( b_decimate )
- i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
+ i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*16] );
}
}
if( h->mb.b_trellis )
- nz_dc = x264_quant_dc_trellis( h, dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
+ nz_dc = x264_quant_dc_trellis( h, dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1, CHROMA_DC+ch );
else
nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
- h->mb.cache.non_zero_count[x264_scan8[25]+ch] = nz_dc;
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = nz_dc;
if( (b_decimate && i_decimate_score < 7) || !nz_ac )
{
/* Decimate the block */
- h->mb.cache.non_zero_count[x264_scan8[16+0]+24*ch] = 0;
- h->mb.cache.non_zero_count[x264_scan8[16+1]+24*ch] = 0;
- h->mb.cache.non_zero_count[x264_scan8[16+2]+24*ch] = 0;
- h->mb.cache.non_zero_count[x264_scan8[16+3]+24*ch] = 0;
+ M16( &h->mb.cache.non_zero_count[x264_scan8[16+0+16*ch]] ) = 0;
+ M16( &h->mb.cache.non_zero_count[x264_scan8[16+2+16*ch]] ) = 0;
if( !nz_dc ) /* Whole block is empty */
continue;
- if( !x264_mb_optimize_chroma_dc( h, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ) )
+ if( !x264_mb_optimize_chroma_dc( h, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ) )
{
- h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 0;
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 0;
continue;
}
/* DC-only */
}
/* 0 = none, 1 = DC only, 2 = DC+AC */
- h->mb.i_cbp_chroma = ((!!M16( &h->mb.cache.non_zero_count[x264_scan8[25]] )) | h->mb.i_cbp_chroma) + h->mb.i_cbp_chroma;
+ h->mb.i_cbp_chroma += (h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] |
+ h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] | h->mb.i_cbp_chroma);
}
static void x264_macroblock_encode_skip( x264_t *h )
{
- M32( &h->mb.cache.non_zero_count[x264_scan8[0]+0*8] ) = 0;
- M32( &h->mb.cache.non_zero_count[x264_scan8[0]+1*8] ) = 0;
- M32( &h->mb.cache.non_zero_count[x264_scan8[0]+2*8] ) = 0;
- M32( &h->mb.cache.non_zero_count[x264_scan8[0]+3*8] ) = 0;
- for( int i = 16; i < 24; i++ )
- h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16+ 0]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16+ 2]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[32+ 0]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[32+ 2]] ) = 0;
+ if( CHROMA444 )
+ {
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16+ 8]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16+10]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[32+ 8]] ) = 0;
+ M32( &h->mb.cache.non_zero_count[x264_scan8[32+10]] ) = 0;
+ }
h->mb.i_cbp_luma = 0;
h->mb.i_cbp_chroma = 0;
h->mb.cbp[h->mb.i_mb_xy] = 0;
}
-/*****************************************************************************
- * x264_macroblock_encode_pskip:
- * Encode an already marked skip block
- *****************************************************************************/
-static void x264_macroblock_encode_pskip( x264_t *h )
-{
- /* don't do pskip motion compensation if it was already done in macroblock_analyse */
- if( !h->mb.b_skip_mc )
- {
- int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
- h->mb.mv_min[0], h->mb.mv_max[0] );
- int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
- h->mb.mv_min[1], h->mb.mv_max[1] );
-
- h->mc.mc_luma( h->mb.pic.p_fdec[0], FDEC_STRIDE,
- h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
- mvx, mvy, 16, 16, &h->sh.weight[0][0] );
-
- /* Special case for mv0, which is (of course) very common in P-skip mode. */
- if( mvx | mvy )
- h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
- h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
- mvx, mvy, 8, 8 );
- else
- h->mc.load_deinterleave_8x8x2_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1] );
-
- if( h->sh.weight[0][1].weightfn )
- h->sh.weight[0][1].weightfn[8>>2]( h->mb.pic.p_fdec[1], FDEC_STRIDE,
- h->mb.pic.p_fdec[1], FDEC_STRIDE,
- &h->sh.weight[0][1], 8 );
- if( h->sh.weight[0][2].weightfn )
- h->sh.weight[0][2].weightfn[8>>2]( h->mb.pic.p_fdec[2], FDEC_STRIDE,
- h->mb.pic.p_fdec[2], FDEC_STRIDE,
- &h->sh.weight[0][2], 8 );
- }
-
- x264_macroblock_encode_skip( h );
-}
-
/*****************************************************************************
* Intra prediction for predictive lossless mode.
*****************************************************************************/
}
}
-void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int idx, int i_mode )
+void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode )
{
- int stride = h->fenc->i_stride[0] << MB_INTERLACED;
- pixel *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
+ int stride = h->fenc->i_stride[p] << MB_INTERLACED;
+ pixel *p_src = h->mb.pic.p_fenc_plane[p] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
if( i_mode == I_PRED_4x4_V )
h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
h->predict_4x4[i_mode]( p_dst );
}
-void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int idx, int i_mode, pixel edge[33] )
+void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[33] )
{
- int stride = h->fenc->i_stride[0] << MB_INTERLACED;
- pixel *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;
+ int stride = h->fenc->i_stride[p] << MB_INTERLACED;
+ pixel *p_src = h->mb.pic.p_fenc_plane[p] + (idx&1)*8 + (idx>>1)*8*stride;
if( i_mode == I_PRED_8x8_V )
h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
h->predict_8x8[i_mode]( p_dst, edge );
}
-void x264_predict_lossless_16x16( x264_t *h, int i_mode )
+void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode )
{
- int stride = h->fenc->i_stride[0] << MB_INTERLACED;
+ int stride = h->fenc->i_stride[p] << MB_INTERLACED;
if( i_mode == I_PRED_16x16_V )
- h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc_plane[p]-stride, stride, 16 );
else if( i_mode == I_PRED_16x16_H )
- h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
+ h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc_plane[p]-1, stride, 16 );
else
- h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
+ h->predict_16x16[i_mode]( h->mb.pic.p_fdec[p] );
}
/*****************************************************************************
* x264_macroblock_encode:
*****************************************************************************/
-void x264_macroblock_encode( x264_t *h )
+static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
{
int i_qp = h->mb.i_qp;
int b_decimate = h->mb.b_dct_decimate;
int b_force_no_skip = 0;
int nz;
h->mb.i_cbp_luma = 0;
- h->mb.cache.non_zero_count[x264_scan8[24]] = 0;
+ for( int p = 0; p < plane_count; p++ )
+ h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = 0;
if( h->mb.i_type == I_PCM )
{
/* if PCM is chosen, we need to store reconstructed frame data */
- h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
- h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
- h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
+ for( int p = 0; p < plane_count; p++ )
+ h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc[p], FENC_STRIDE, 16 );
+ if( chroma )
+ {
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
+ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
+ }
return;
}
if( h->mb.i_type == P_SKIP )
{
- /* A bit special */
- x264_macroblock_encode_pskip( h );
+ /* don't do pskip motion compensation if it was already done in macroblock_analyse */
+ if( !h->mb.b_skip_mc )
+ {
+ int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
+ h->mb.mv_min[0], h->mb.mv_max[0] );
+ int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
+ h->mb.mv_min[1], h->mb.mv_max[1] );
+
+ for( int p = 0; p < plane_count; p++ )
+ h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE,
+ &h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p],
+ mvx, mvy, 16, 16, &h->sh.weight[0][p] );
+
+ if( chroma )
+ {
+ /* Special case for mv0, which is (of course) very common in P-skip mode. */
+ if( mvx | mvy )
+ h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
+ h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
+ mvx, mvy, 8, 8 );
+ else
+ h->mc.load_deinterleave_8x8x2_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1] );
+
+ if( h->sh.weight[0][1].weightfn )
+ h->sh.weight[0][1].weightfn[8>>2]( h->mb.pic.p_fdec[1], FDEC_STRIDE,
+ h->mb.pic.p_fdec[1], FDEC_STRIDE,
+ &h->sh.weight[0][1], 8 );
+ if( h->sh.weight[0][2].weightfn )
+ h->sh.weight[0][2].weightfn[8>>2]( h->mb.pic.p_fdec[2], FDEC_STRIDE,
+ h->mb.pic.p_fdec[2], FDEC_STRIDE,
+ &h->sh.weight[0][2], 8 );
+ }
+ }
+
+ x264_macroblock_encode_skip( h );
return;
}
if( h->mb.i_type == B_SKIP )
if( h->mb.i_type == I_16x16 )
{
- const int i_mode = h->mb.i_intra16x16_pred_mode;
h->mb.b_transform_8x8 = 0;
- if( h->mb.b_lossless )
- x264_predict_lossless_16x16( h, i_mode );
- else
- h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
-
- /* encode the 16x16 macroblock */
- x264_mb_encode_i16x16( h, i_qp );
+ for( int p = 0; p < plane_count; p++ )
+ {
+ x264_mb_encode_i16x16( h, p, i_qp );
+ i_qp = h->mb.i_chroma_qp;
+ }
}
else if( h->mb.i_type == I_8x8 )
{
- ALIGNED_ARRAY_16( pixel, edge,[33] );
h->mb.b_transform_8x8 = 1;
/* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
if( h->mb.i_skip_intra )
if( h->mb.i_skip_intra == 2 )
h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
}
- for( int i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
+ for( int p = 0; p < plane_count; p++ )
{
- pixel *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
- int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
- h->predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
-
- if( h->mb.b_lossless )
- x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
- else
- h->predict_8x8[i_mode]( p_dst, edge );
-
- x264_mb_encode_i8x8( h, i, i_qp );
+ for( int i = (p == 0 && h->mb.i_skip_intra) ? 3 : 0 ; i < 4; i++ )
+ {
+ int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
+ x264_mb_encode_i8x8( h, p, i, i_qp, i_mode, NULL );
+ }
+ i_qp = h->mb.i_chroma_qp;
}
}
else if( h->mb.i_type == I_4x4 )
if( h->mb.i_skip_intra == 2 )
h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
}
- for( int i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
+ for( int p = 0; p < plane_count; p++ )
{
- pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
- int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
+ for( int i = (p == 0 && h->mb.i_skip_intra) ? 15 : 0 ; i < 16; i++ )
+ {
+ pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[i]];
+ int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
- if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
- /* emulate missing topright samples */
- MPIXEL_X4( &p_dst[4-FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst[3-FDEC_STRIDE] );
+ if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
+ /* emulate missing topright samples */
+ MPIXEL_X4( &p_dst[4-FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst[3-FDEC_STRIDE] );
- if( h->mb.b_lossless )
- x264_predict_lossless_4x4( h, p_dst, i, i_mode );
- else
- h->predict_4x4[i_mode]( p_dst );
- x264_mb_encode_i4x4( h, i, i_qp );
+ x264_mb_encode_i4x4( h, p, i, i_qp, i_mode );
+ }
+ i_qp = h->mb.i_chroma_qp;
}
}
else /* Inter MB */
if( h->mb.b_lossless )
{
if( h->mb.b_transform_8x8 )
- for( int i8x8 = 0; i8x8 < 4; i8x8++ )
- {
- int x = i8x8&1;
- int y = i8x8>>1;
- int s8 = X264_SCAN8_0 + 2*x + 16*y;
-
- nz = h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8], h->mb.pic.p_fenc[0] + 8*x + 8*y*FENC_STRIDE,
- h->mb.pic.p_fdec[0] + 8*x + 8*y*FDEC_STRIDE );
- STORE_8x8_NNZ( s8, nz );
- h->mb.i_cbp_luma |= nz << i8x8;
- }
+ for( int p = 0; p < plane_count; p++ )
+ for( int i8x8 = 0; i8x8 < 4; i8x8++ )
+ {
+ int x = i8x8&1;
+ int y = i8x8>>1;
+ nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+i8x8], h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE,
+ h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE );
+ STORE_8x8_NNZ( p, i8x8, nz );
+ h->mb.i_cbp_luma |= nz << i8x8;
+ }
else
- for( int i4x4 = 0; i4x4 < 16; i4x4++ )
- {
- nz = h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
- h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
- h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
- h->mb.cache.non_zero_count[x264_scan8[i4x4]] = nz;
- h->mb.i_cbp_luma |= nz << (i4x4>>2);
- }
+ for( int p = 0; p < plane_count; p++ )
+ for( int i4x4 = 0; i4x4 < 16; i4x4++ )
+ {
+ nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+i4x4],
+ h->mb.pic.p_fenc[p]+block_idx_xy_fenc[i4x4],
+ h->mb.pic.p_fdec[p]+block_idx_xy_fdec[i4x4] );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+i4x4]] = nz;
+ h->mb.i_cbp_luma |= nz << (i4x4>>2);
+ }
}
else if( h->mb.b_transform_8x8 )
{
ALIGNED_ARRAY_16( dctcoef, dct8x8,[4],[64] );
b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC
- h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
- h->nr_count[1] += h->mb.b_noise_reduction * 4;
- for( int idx = 0; idx < 4; idx++ )
+ for( int p = 0; p < plane_count; p++ )
{
- nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
+ h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] );
+ h->nr_count[1+!!p*2] += h->mb.b_noise_reduction * 4;
- if( nz )
+ int plane_cbp = 0;
+ for( int idx = 0; idx < 4; idx++ )
{
- h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
- if( b_decimate )
+ nz = x264_quant_8x8( h, dct8x8[idx], i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 0, p, idx );
+
+ if( nz )
{
- int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[idx] );
- i_decimate_mb += i_decimate_8x8;
- if( i_decimate_8x8 >= 4 )
- h->mb.i_cbp_luma |= 1<<idx;
+ h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8[idx] );
+ if( b_decimate )
+ {
+ int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[p*4+idx] );
+ i_decimate_mb += i_decimate_8x8;
+ if( i_decimate_8x8 >= 4 )
+ plane_cbp |= 1<<idx;
+ }
+ else
+ plane_cbp |= 1<<idx;
}
- else
- h->mb.i_cbp_luma |= 1<<idx;
}
- }
- if( i_decimate_mb < 6 && b_decimate )
- {
- h->mb.i_cbp_luma = 0;
- CLEAR_16x16_NNZ
- }
- else
- {
- for( int idx = 0; idx < 4; idx++ )
+ if( i_decimate_mb < 6 && b_decimate )
{
- int x = idx&1;
- int y = idx>>1;
- int s8 = X264_SCAN8_0 + 2*x + 16*y;
-
- if( h->mb.i_cbp_luma&(1<<idx) )
+ plane_cbp = 0;
+ CLEAR_16x16_NNZ( p );
+ }
+ else
+ {
+ for( int idx = 0; idx < 4; idx++ )
{
- h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
- h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE], dct8x8[idx] );
- STORE_8x8_NNZ( s8, 1 );
+ int x = idx&1;
+ int y = idx>>1;
+
+ if( plane_cbp&(1<<idx) )
+ {
+ h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[p?CQM_8PC:CQM_8PY], i_qp );
+ h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE], dct8x8[idx] );
+ STORE_8x8_NNZ( p, idx, 1 );
+ }
+ else
+ STORE_8x8_NNZ( p, idx, 0 );
}
- else
- STORE_8x8_NNZ( s8, 0 );
}
+ h->mb.i_cbp_luma |= plane_cbp;
+ i_qp = h->mb.i_chroma_qp;
}
}
else
{
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16],[16] );
- h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
- h->nr_count[0] += h->mb.b_noise_reduction * 16;
-
- for( int i8x8 = 0; i8x8 < 4; i8x8++ )
+ for( int p = 0; p < plane_count; p++ )
{
- int i_decimate_8x8 = 0;
- int cbp = 0;
+ h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] );
+ h->nr_count[0+!!p*2] += h->mb.b_noise_reduction * 16;
- /* encode one 4x4 block */
- for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ int plane_cbp = 0;
+ for( int i8x8 = 0; i8x8 < 4; i8x8++ )
{
- int idx = i8x8 * 4 + i4x4;
+ int i_decimate_8x8 = 0;
+ int cbp = 0;
- nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
- h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
+ /* encode one 4x4 block */
+ for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ {
+ int idx = i8x8 * 4 + i4x4;
+
+ nz = x264_quant_4x4( h, dct4x4[idx], i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, p, idx );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
+
+ if( nz )
+ {
+ h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] );
+ h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[p?CQM_4PC:CQM_4PY], i_qp );
+ if( b_decimate && i_decimate_8x8 < 6 )
+ i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] );
+ cbp = 1;
+ }
+ }
- if( nz )
+ int x = i8x8&1;
+ int y = i8x8>>1;
+
+ /* decimate this 8x8 block */
+ i_decimate_mb += i_decimate_8x8;
+ if( b_decimate )
+ {
+ if( i_decimate_8x8 < 4 )
+ STORE_8x8_NNZ( p, i8x8, 0 );
+ else
+ plane_cbp |= 1<<i8x8;
+ }
+ else if( cbp )
{
- h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
- h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
- if( b_decimate && i_decimate_8x8 < 6 )
- i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
- cbp = 1;
+ h->dctf.add8x8_idct( &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE], &dct4x4[i8x8*4] );
+ plane_cbp |= 1<<i8x8;
}
}
- int x = i8x8&1;
- int y = i8x8>>1;
-
- /* decimate this 8x8 block */
- i_decimate_mb += i_decimate_8x8;
if( b_decimate )
{
- if( i_decimate_8x8 < 4 )
+ if( i_decimate_mb < 6 )
{
- int s8 = X264_SCAN8_0 + 2*x + 16*y;
- STORE_8x8_NNZ( s8, 0 );
+ plane_cbp = 0;
+ CLEAR_16x16_NNZ( p );
}
else
- h->mb.i_cbp_luma |= 1<<i8x8;
- }
- else if( cbp )
- {
- h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE], &dct4x4[i8x8*4] );
- h->mb.i_cbp_luma |= 1<<i8x8;
- }
- }
-
- if( b_decimate )
- {
- if( i_decimate_mb < 6 )
- {
- h->mb.i_cbp_luma = 0;
- CLEAR_16x16_NNZ
- }
- else
- {
- for( int i8x8 = 0; i8x8 < 4; i8x8++ )
- if( h->mb.i_cbp_luma&(1<<i8x8) )
- h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
+ {
+ for( int i8x8 = 0; i8x8 < 4; i8x8++ )
+ if( plane_cbp&(1<<i8x8) )
+ h->dctf.add8x8_idct( &h->mb.pic.p_fdec[p][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
+ }
}
+ h->mb.i_cbp_luma |= plane_cbp;
+ i_qp = h->mb.i_chroma_qp;
}
}
}
/* encode chroma */
- if( IS_INTRA( h->mb.i_type ) )
+ if( chroma )
{
- const int i_mode = h->mb.i_chroma_pred_mode;
- if( h->mb.b_lossless )
- x264_predict_lossless_8x8_chroma( h, i_mode );
- else
+ if( IS_INTRA( h->mb.i_type ) )
{
- h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
- h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
+ const int i_mode = h->mb.i_chroma_pred_mode;
+ if( h->mb.b_lossless )
+ x264_predict_lossless_8x8_chroma( h, i_mode );
+ else
+ {
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
+ }
}
- }
- /* encode the 8x8 blocks */
- x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
+ /* encode the 8x8 blocks */
+ x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
+ }
+ else
+ h->mb.i_cbp_chroma = 0;
/* store cbp */
int cbp = h->mb.i_cbp_chroma << 4 | h->mb.i_cbp_luma;
if( h->param.b_cabac )
- cbp |= h->mb.cache.non_zero_count[x264_scan8[24]] << 8
- | h->mb.cache.non_zero_count[x264_scan8[25]] << 9
- | h->mb.cache.non_zero_count[x264_scan8[26]] << 10;
+ cbp |= h->mb.cache.non_zero_count[x264_scan8[LUMA_DC ]] << 8
+ | h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] << 9
+ | h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] << 10;
h->mb.cbp[h->mb.i_mb_xy] = cbp;
/* Check for P_SKIP
}
}
+void x264_macroblock_encode( x264_t *h )
+{
+ if( CHROMA444 )
+ x264_macroblock_encode_internal( h, 3, 0 );
+ else
+ x264_macroblock_encode_internal( h, 1, 1 );
+}
+
/*****************************************************************************
* x264_macroblock_probe_skip:
* Check if the current MB could be encoded as a [PB]_SKIP
*****************************************************************************/
-int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
+static ALWAYS_INLINE int x264_macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma )
{
ALIGNED_ARRAY_16( dctcoef, dct4x4,[4],[16] );
ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] );
int i_qp = h->mb.i_qp;
int thresh, ssd;
- if( !b_bidir )
+ for( int p = 0; p < plane_count; p++ )
{
- /* Get the MV */
- mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
- mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
-
- /* Motion compensation */
- h->mc.mc_luma( h->mb.pic.p_fdec[0], FDEC_STRIDE,
- h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
- mvp[0], mvp[1], 16, 16, &h->sh.weight[0][0] );
- }
-
- for( int i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
- {
- int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
- int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
- /* get luma diff */
- h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[0] + fenc_offset,
- h->mb.pic.p_fdec[0] + fdec_offset );
- /* encode one 4x4 block */
- for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ int quant_cat = p ? CQM_4PC : CQM_4PY;
+ if( !b_bidir )
{
- if( h->mb.b_noise_reduction )
- h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[0], h->nr_offset[0], 16 );
- if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) )
- continue;
- h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
- i_decimate_mb += h->quantf.decimate_score16( dctscan );
- if( i_decimate_mb >= 6 )
- return 0;
+ /* Get the MV */
+ mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
+ mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
+
+ /* Motion compensation */
+ h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE,
+ &h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p],
+ mvp[0], mvp[1], 16, 16, &h->sh.weight[0][p] );
}
- }
-
- /* encode chroma */
- i_qp = h->mb.i_chroma_qp;
- thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
- if( !b_bidir )
- {
- /* Special case for mv0, which is (of course) very common in P-skip mode. */
- if( M32( mvp ) )
- h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
- h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
- mvp[0], mvp[1], 8, 8 );
- else
- h->mc.load_deinterleave_8x8x2_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1] );
+ for( int i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
+ {
+ int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
+ int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
+ /* get luma diff */
+ h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[p] + fenc_offset,
+ h->mb.pic.p_fdec[p] + fdec_offset );
+ /* encode one 4x4 block */
+ for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ {
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 );
+ if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[quant_cat][i_qp], h->quant4_bias[quant_cat][i_qp] ) )
+ continue;
+ h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
+ i_decimate_mb += h->quantf.decimate_score16( dctscan );
+ if( i_decimate_mb >= 6 )
+ return 0;
+ }
+ }
+ i_qp = h->mb.i_chroma_qp;
}
- for( int ch = 0; ch < 2; ch++ )
+ if( chroma )
{
- pixel *p_src = h->mb.pic.p_fenc[1+ch];
- pixel *p_dst = h->mb.pic.p_fdec[1+ch];
-
- if( !b_bidir && h->sh.weight[0][1+ch].weightfn )
- h->sh.weight[0][1+ch].weightfn[8>>2]( h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
- h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
- &h->sh.weight[0][1+ch], 8 );
+ /* encode chroma */
+ i_qp = h->mb.i_chroma_qp;
+ thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
- /* there is almost never a termination during chroma, but we can't avoid the check entirely */
- /* so instead we check SSD and skip the actual check if the score is low enough. */
- ssd = h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
- if( ssd < thresh )
- continue;
+ if( !b_bidir )
+ {
+ /* Special case for mv0, which is (of course) very common in P-skip mode. */
+ if( M32( mvp ) )
+ h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
+ h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
+ mvp[0], mvp[1], 8, 8 );
+ else
+ h->mc.load_deinterleave_8x8x2_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1] );
+ }
- /* The vast majority of chroma checks will terminate during the DC check or the higher
- * threshold check, so we can save time by doing a DC-only DCT. */
- if( h->mb.b_noise_reduction )
+ for( int ch = 0; ch < 2; ch++ )
{
- h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
- for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ pixel *p_src = h->mb.pic.p_fenc[1+ch];
+ pixel *p_dst = h->mb.pic.p_fdec[1+ch];
+
+ if( !b_bidir && h->sh.weight[0][1+ch].weightfn )
+ h->sh.weight[0][1+ch].weightfn[8>>2]( h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
+ h->mb.pic.p_fdec[1+ch], FDEC_STRIDE,
+ &h->sh.weight[0][1+ch], 8 );
+
+ /* there is almost never a termination during chroma, but we can't avoid the check entirely */
+ /* so instead we check SSD and skip the actual check if the score is low enough. */
+ ssd = h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
+ if( ssd < thresh )
+ continue;
+
+ /* The vast majority of chroma checks will terminate during the DC check or the higher
+ * threshold check, so we can save time by doing a DC-only DCT. */
+ if( h->mb.b_noise_reduction )
{
- h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
- dct2x2[i4x4] = dct4x4[i4x4][0];
+ h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
+ for( int i4x4 = 0; i4x4 < 4; i4x4++ )
+ {
+ h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
+ dct2x2[i4x4] = dct4x4[i4x4][0];
+ }
}
- }
- else
- h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
+ else
+ h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
- if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
- return 0;
+ if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
+ return 0;
- /* If there wasn't a termination in DC, we can check against a much higher threshold. */
- if( ssd < thresh*4 )
- continue;
+ /* If there wasn't a termination in DC, we can check against a much higher threshold. */
+ if( ssd < thresh*4 )
+ continue;
- if( !h->mb.b_noise_reduction )
- h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
+ if( !h->mb.b_noise_reduction )
+ h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
- /* calculate dct coeffs */
- for( int i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
- {
- dct4x4[i4x4][0] = 0;
- if( h->mb.b_noise_reduction )
- h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
- if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ) )
- continue;
- h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
- i_decimate_mb += h->quantf.decimate_score15( dctscan );
- if( i_decimate_mb >= 7 )
- return 0;
+ /* calculate dct coeffs */
+ for( int i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
+ {
+ dct4x4[i4x4][0] = 0;
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
+ if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ) )
+ continue;
+ h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
+ i_decimate_mb += h->quantf.decimate_score15( dctscan );
+ if( i_decimate_mb >= 7 )
+ return 0;
+ }
}
}
return 1;
}
+int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
+{
+ if( CHROMA444 )
+ return x264_macroblock_probe_skip_internal( h, b_bidir, 3, 0 );
+ else
+ return x264_macroblock_probe_skip_internal( h, b_bidir, 1, 1 );
+}
+
/****************************************************************************
* DCT-domain noise reduction / adaptive deadzone
* from libavcodec
h->nr_offset = h->nr_offset_denoise;
h->nr_residual_sum = h->nr_residual_sum_buf[0];
h->nr_count = h->nr_count_buf[0];
- for( int cat = 0; cat < 3; cat++ )
+ for( int cat = 0; cat < 3 + CHROMA444; cat++ )
{
- int dct8x8 = cat == 1;
+ int dct8x8 = cat&1;
int size = dct8x8 ? 64 : 16;
const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
* RD only; 4 calls to this do not make up for one macroblock_encode.
* doesn't transform chroma dc.
*****************************************************************************/
-void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
+static ALWAYS_INLINE void x264_macroblock_encode_p8x8_internal( x264_t *h, int i8, int plane_count, int chroma )
{
+ int b_decimate = h->mb.b_dct_decimate;
int i_qp = h->mb.i_qp;
int x = i8&1;
int y = i8>>1;
- int s8 = X264_SCAN8_0 + 2*x + 16*y;
- pixel *p_fenc = h->mb.pic.p_fenc[0] + 8*x + 8*y*FENC_STRIDE;
- pixel *p_fdec = h->mb.pic.p_fdec[0] + 8*x + 8*y*FDEC_STRIDE;
- int b_decimate = h->mb.b_dct_decimate;
- int nnz8x8 = 0;
int nz;
+ h->mb.i_cbp_chroma = 0;
+ h->mb.i_cbp_luma &= ~(1 << i8);
+
if( !h->mb.b_skip_mc )
x264_mb_mc_8x8( h, i8 );
if( h->mb.b_lossless )
{
- if( h->mb.b_transform_8x8 )
+ for( int p = 0; p < plane_count; p++ )
{
- nnz8x8 = h->zigzagf.sub_8x8( h->dct.luma8x8[i8], p_fenc, p_fdec );
- STORE_8x8_NNZ( s8, nnz8x8 );
- }
- else
- {
- for( int i4 = i8*4; i4 < i8*4+4; i4++ )
+ pixel *p_fenc = h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE;
+ pixel *p_fdec = h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE;
+ int nnz8x8 = 0;
+ if( h->mb.b_transform_8x8 )
{
- nz = h->zigzagf.sub_4x4( h->dct.luma4x4[i4],
- h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4],
- h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4] );
- h->mb.cache.non_zero_count[x264_scan8[i4]] = nz;
- nnz8x8 |= nz;
+ nnz8x8 = h->zigzagf.sub_8x8( h->dct.luma8x8[4*p+i8], p_fenc, p_fdec );
+ STORE_8x8_NNZ( p, i8, nnz8x8 );
}
+ else
+ {
+ for( int i4 = i8*4; i4 < i8*4+4; i4++ )
+ {
+ nz = h->zigzagf.sub_4x4( h->dct.luma4x4[16*p+i4],
+ h->mb.pic.p_fenc[p]+block_idx_xy_fenc[i4],
+ h->mb.pic.p_fdec[p]+block_idx_xy_fdec[i4] );
+ h->mb.cache.non_zero_count[x264_scan8[16*p+i4]] = nz;
+ nnz8x8 |= nz;
+ }
+ }
+ h->mb.i_cbp_luma |= nnz8x8 << i8;
}
- for( int ch = 0; ch < 2; ch++ )
+ if( chroma )
{
- dctcoef dc;
- p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
- p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
- nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i8+ch*4], p_fenc, p_fdec, &dc );
- h->mb.cache.non_zero_count[x264_scan8[16+i8+ch*4]] = nz;
+ for( int ch = 0; ch < 2; ch++ )
+ {
+ dctcoef dc;
+ pixel *p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
+ pixel *p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
+ nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i8+ch*4], p_fenc, p_fdec, &dc );
+ h->mb.cache.non_zero_count[x264_scan8[16+i8+ch*4]] = nz;
+ }
+ h->mb.i_cbp_chroma = 0x02;
}
}
else
{
if( h->mb.b_transform_8x8 )
{
- ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
- h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
- nnz8x8 = x264_quant_8x8( h, dct8x8, i_qp, 0, i8 );
- if( nnz8x8 )
+ for( int p = 0; p < plane_count; p++ )
{
- h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 );
-
- if( b_decimate && !h->mb.b_trellis )
- nnz8x8 = 4 <= h->quantf.decimate_score64( h->dct.luma8x8[i8] );
-
+ int quant_cat = p ? CQM_8PC : CQM_8PY;
+ pixel *p_fenc = h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE;
+ pixel *p_fdec = h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE;
+ ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
+ h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
+ int nnz8x8 = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 0, p, i8 );
if( nnz8x8 )
{
- h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp );
- h->dctf.add8x8_idct8( p_fdec, dct8x8 );
- STORE_8x8_NNZ( s8, 1 );
+ h->zigzagf.scan_8x8( h->dct.luma8x8[4*p+i8], dct8x8 );
+
+ if( b_decimate && !h->mb.b_trellis )
+ nnz8x8 = 4 <= h->quantf.decimate_score64( h->dct.luma8x8[4*p+i8] );
+
+ if( nnz8x8 )
+ {
+ h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[quant_cat], i_qp );
+ h->dctf.add8x8_idct8( p_fdec, dct8x8 );
+ STORE_8x8_NNZ( p, i8, 1 );
+ }
+ else
+ STORE_8x8_NNZ( p, i8, 0 );
}
else
- STORE_8x8_NNZ( s8, 0 );
+ STORE_8x8_NNZ( p, i8, 0 );
+ h->mb.i_cbp_luma |= nnz8x8 << i8;
+ i_qp = h->mb.i_chroma_qp;
}
- else
- STORE_8x8_NNZ( s8, 0 );
}
else
{
- int i_decimate_8x8 = 0;
- ALIGNED_ARRAY_16( dctcoef, dct4x4,[4],[16] );
- h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
- for( int i4 = 0; i4 < 4; i4++ )
+ for( int p = 0; p < plane_count; p++ )
{
- nz = x264_quant_4x4( h, dct4x4[i4], i_qp, DCT_LUMA_4x4, 0, i8*4+i4 );
- h->mb.cache.non_zero_count[x264_scan8[i8*4+i4]] = nz;
- if( nz )
+ int quant_cat = p ? CQM_4PC : CQM_4PY;
+ pixel *p_fenc = h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE;
+ pixel *p_fdec = h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE;
+ int i_decimate_8x8 = 0, nnz8x8 = 0;
+ ALIGNED_ARRAY_16( dctcoef, dct4x4,[4],[16] );
+ h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
+ for( int i4 = 0; i4 < 4; i4++ )
{
- h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
- h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
- if( b_decimate )
- i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[i8*4+i4] );
- nnz8x8 = 1;
+ nz = x264_quant_4x4( h, dct4x4[i4], i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, p, 8*4+i4 );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+i8*4+i4]] = nz;
+ if( nz )
+ {
+ h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+i8*4+i4], dct4x4[i4] );
+ h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[quant_cat], i_qp );
+ if( b_decimate )
+ i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+i8*4+i4] );
+ nnz8x8 = 1;
+ }
}
- }
- if( b_decimate && i_decimate_8x8 < 4 )
- nnz8x8 = 0;
+ if( b_decimate && i_decimate_8x8 < 4 )
+ nnz8x8 = 0;
- if( nnz8x8 )
- h->dctf.add8x8_idct( p_fdec, dct4x4 );
- else
- STORE_8x8_NNZ( s8, 0 );
- }
+ if( nnz8x8 )
+ h->dctf.add8x8_idct( p_fdec, dct4x4 );
+ else
+ STORE_8x8_NNZ( p, i8, 0 );
- i_qp = h->mb.i_chroma_qp;
+ h->mb.i_cbp_luma |= nnz8x8 << i8;
+ i_qp = h->mb.i_chroma_qp;
+ }
+ }
- for( int ch = 0; ch < 2; ch++ )
+ if( chroma )
{
- ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
- p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
- p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
- h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
- if( h->mb.b_noise_reduction )
- h->quantf.denoise_dct( dct4x4, h->nr_residual_sum[2], h->nr_offset[2], 16 );
- dct4x4[0] = 0;
-
- if( h->mb.b_trellis )
- nz = x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 1, 0 );
- else
- nz = h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
-
- h->mb.cache.non_zero_count[x264_scan8[16+i8+ch*4]] = nz;
- if( nz )
+ i_qp = h->mb.i_chroma_qp;
+ for( int ch = 0; ch < 2; ch++ )
{
- h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
- h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
- h->dctf.add4x4_idct( p_fdec, dct4x4 );
+ ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
+ pixel *p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
+ pixel *p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
+ h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
+ if( h->mb.b_noise_reduction )
+ h->quantf.denoise_dct( dct4x4, h->nr_residual_sum[2], h->nr_offset[2], 16 );
+ dct4x4[0] = 0;
+
+ if( h->mb.b_trellis )
+ nz = x264_quant_4x4_trellis( h, dct4x4, CQM_4PC, i_qp, DCT_CHROMA_AC, 0, 1, 0 );
+ else
+ nz = h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
+
+ h->mb.cache.non_zero_count[x264_scan8[16+i8+ch*16]] = nz;
+ if( nz )
+ {
+ h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*16], dct4x4 );
+ h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
+ h->dctf.add4x4_idct( p_fdec, dct4x4 );
+ }
}
+ h->mb.i_cbp_chroma = 0x02;
}
}
- h->mb.i_cbp_luma &= ~(1 << i8);
- h->mb.i_cbp_luma |= nnz8x8 << i8;
- h->mb.i_cbp_chroma = 0x02;
+}
+
+void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
+{
+ if( CHROMA444 )
+ x264_macroblock_encode_p8x8_internal( h, i8, 3, 0 );
+ else
+ x264_macroblock_encode_p8x8_internal( h, i8, 1, 1 );
}
/*****************************************************************************
- * RD only, luma only
+ * RD only, luma only (for 4:2:0)
*****************************************************************************/
-void x264_macroblock_encode_p4x4( x264_t *h, int i4 )
+static ALWAYS_INLINE void x264_macroblock_encode_p4x4_internal( x264_t *h, int i4, int plane_count )
{
int i_qp = h->mb.i_qp;
- pixel *p_fenc = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[i4]];
- pixel *p_fdec = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
- int nz;
- /* Don't need motion compensation as this function is only used in qpel-RD, which caches pixel data. */
-
- if( h->mb.b_lossless )
+ for( int p = 0; p < plane_count; p++ )
{
- nz = h->zigzagf.sub_4x4( h->dct.luma4x4[i4], p_fenc, p_fdec );
- h->mb.cache.non_zero_count[x264_scan8[i4]] = nz;
- }
- else
- {
- ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
- h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
- nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 0, i4 );
- h->mb.cache.non_zero_count[x264_scan8[i4]] = nz;
- if( nz )
+ int quant_cat = p ? CQM_4PC : CQM_4PY;
+ pixel *p_fenc = &h->mb.pic.p_fenc[p][block_idx_xy_fenc[i4]];
+ pixel *p_fdec = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[i4]];
+ int nz;
+
+ /* Don't need motion compensation as this function is only used in qpel-RD, which caches pixel data. */
+
+ if( h->mb.b_lossless )
{
- h->zigzagf.scan_4x4( h->dct.luma4x4[i4], dct4x4 );
- h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PY], i_qp );
- h->dctf.add4x4_idct( p_fdec, dct4x4 );
+ nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+i4], p_fenc, p_fdec );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+i4]] = nz;
}
+ else
+ {
+ ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
+ h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
+ nz = x264_quant_4x4( h, dct4x4, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, p, i4 );
+ h->mb.cache.non_zero_count[x264_scan8[p*16+i4]] = nz;
+ if( nz )
+ {
+ h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+i4], dct4x4 );
+ h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[quant_cat], i_qp );
+ h->dctf.add4x4_idct( p_fdec, dct4x4 );
+ }
+ }
+ i_qp = h->mb.i_chroma_qp;
}
}
+
+void x264_macroblock_encode_p4x4( x264_t *h, int i8 )
+{
+ if( CHROMA444 )
+ x264_macroblock_encode_p4x4_internal( h, i8, 3 );
+ else
+ x264_macroblock_encode_p4x4_internal( h, i8, 1 );
+}
x264_macroblock_probe_skip( h, 1 )
void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode );
-void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int idx, int i_mode );
-void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int idx, int i_mode, pixel edge[33] );
-void x264_predict_lossless_16x16( x264_t *h, int i_mode );
+void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode );
+void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[33] );
+void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode );
void x264_macroblock_encode ( x264_t *h );
void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
-void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp );
-void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp );
+void x264_mb_encode_i4x4( x264_t *h, int p, int idx, int i_qp, int i_mode );
+void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_qp, int i_mode, pixel *edge );
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp );
void x264_cabac_mb_skip( x264_t *h, int b_skip );
int x264_quant_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
- int i_qp, int ctx_block_cat, int b_intra, int b_chroma );
+ int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
- int i_qp, int b_intra, int idx );
+ int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
void x264_noise_reduction_update( x264_t *h );
if( b_refine_qpel || (dir^1) != odir ) \
{ \
int stride = 16; \
- pixel *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
+ pixel *src = h->mc.get_ref( pix, &stride, &m->p_fref[0], m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
int cost = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
if( b_chroma_me && cost < bcost ) \
{ \
- h->mc.mc_chroma( pix, pix+8, 16, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
- if( m->weight[1].weightfn ) \
- m->weight[1].weightfn[x264_pixel_size[i_pixel].w>>3]( pix, 16, pix, 16, \
- &m->weight[1], x264_pixel_size[i_pixel].h>>1 ); \
- cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix, 16 ); \
- if( cost < bcost ) \
+ if( CHROMA444 ) \
{ \
- if( m->weight[2].weightfn ) \
- m->weight[2].weightfn[x264_pixel_size[i_pixel].w>>3]( pix+8, 16, pix+8, 16, \
- &m->weight[2], x264_pixel_size[i_pixel].h>>1 ); \
- cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix+8, 16 ); \
+ stride = 16; \
+ src = h->mc.get_ref( pix, &stride, &m->p_fref[4], m->i_stride[1], mx, my, bw, bh, &m->weight[1] ); \
+ cost += h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[1], FENC_STRIDE, src, stride ); \
+ if( cost < bcost ) \
+ { \
+ stride = 16; \
+ src = h->mc.get_ref( pix, &stride, &m->p_fref[8], m->i_stride[2], mx, my, bw, bh, &m->weight[2] ); \
+ cost += h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[2], FENC_STRIDE, src, stride ); \
+ } \
+ } \
+ else \
+ { \
+ h->mc.mc_chroma( pix, pix+8, 16, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
+ if( m->weight[1].weightfn ) \
+ m->weight[1].weightfn[x264_pixel_size[i_pixel].w>>3]( pix, 16, pix, 16, \
+ &m->weight[1], x264_pixel_size[i_pixel].h>>1 ); \
+ cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix, 16 ); \
+ if( cost < bcost ) \
+ { \
+ if( m->weight[2].weightfn ) \
+ m->weight[2].weightfn[x264_pixel_size[i_pixel].w>>3]( pix+8, 16, pix+8, 16, \
+ &m->weight[2], x264_pixel_size[i_pixel].h>>1 ); \
+ cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix+8, 16 ); \
+ } \
} \
} \
COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, bdir, dir ); \
const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
const int i_pixel = m->i_pixel;
- const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
+ const int b_chroma_me = h->mb.b_chroma_me && (i_pixel <= PIXEL_8x8 || CHROMA444);
const int mvy_offset = MB_INTERLACED & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
ALIGNED_ARRAY_16( pixel, pix,[64*18] ); // really 17x17x2, but round up for alignment
int i = 4 + 3*dx + dy;\
int mvx = bm##list##x+dx;\
int mvy = bm##list##y+dy;\
- stride[list][i] = bw;\
- src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none );\
+ stride[0][list][i] = bw;\
+ src[0][list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[0][list][i], &m->p_fref[0],\
+ m->i_stride[0], mvx, mvy, bw, bh, weight_none );\
if( rd )\
- h->mc.mc_chroma( pixu_buf[list][i], pixv_buf[list][i], 8, m->p_fref[4], m->i_stride[1], mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\
+ {\
+ if( CHROMA444 )\
+ {\
+ stride[1][list][i] = bw;\
+ src[1][list][i] = h->mc.get_ref( pixu_buf[list][i], &stride[1][list][i], &m->p_fref[4],\
+ m->i_stride[1], mvx, mvy, bw, bh, weight_none );\
+ stride[2][list][i] = bw;\
+ src[2][list][i] = h->mc.get_ref( pixv_buf[list][i], &stride[2][list][i], &m->p_fref[8],\
+ m->i_stride[2], mvx, mvy, bw, bh, weight_none );\
+ }\
+ else\
+ h->mc.mc_chroma( pixu_buf[list][i], pixv_buf[list][i], 8, m->p_fref[4], m->i_stride[1],\
+ mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\
+ }\
}
#define SATD_THRESH 17/16
const int bw = x264_pixel_size[i_pixel].w;
const int bh = x264_pixel_size[i_pixel].h;
ALIGNED_ARRAY_16( pixel, pixy_buf,[2],[9][16*16] );
- ALIGNED_ARRAY_16( pixel, pixu_buf,[2],[9][8*8] );
- ALIGNED_ARRAY_16( pixel, pixv_buf,[2],[9][8*8] );
- pixel *src[2][9];
+ ALIGNED_ARRAY_16( pixel, pixu_buf,[2],[9][16*16] );
+ ALIGNED_ARRAY_16( pixel, pixv_buf,[2],[9][16*16] );
+ pixel *src[3][2][9];
+ int chromasize = CHROMA444 ? 8 : 4;
pixel *pix = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
- pixel *pixu = &h->mb.pic.p_fdec[1][4*x + 4*y*FDEC_STRIDE];
- pixel *pixv = &h->mb.pic.p_fdec[2][4*x + 4*y*FDEC_STRIDE];
+ pixel *pixu = &h->mb.pic.p_fdec[1][chromasize*x + chromasize*y*FDEC_STRIDE];
+ pixel *pixv = &h->mb.pic.p_fdec[2][chromasize*x + chromasize*y*FDEC_STRIDE];
int ref0 = h->mb.cache.ref[0][s8];
int ref1 = h->mb.cache.ref[1][s8];
const int mv0y_offset = MB_INTERLACED & ref0 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
const int mv1y_offset = MB_INTERLACED & ref1 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
- int stride[2][9];
+ int stride[3][2][9];
int bm0x = m0->mv[0];
int bm0y = m0->mv[1];
int bm1x = m1->mv[0];
int i0 = 4 + 3*dia4d[j][0] + dia4d[j][1];
int i1 = 4 + 3*dia4d[j][2] + dia4d[j][3];
visited[(m0x)&7][(m0y)&7][(m1x)&7] |= (1<<((m1y)&7));
- h->mc.avg[i_pixel]( pix, FDEC_STRIDE, src[0][i0], stride[0][i0], src[1][i1], stride[1][i1], i_weight );
+ h->mc.avg[i_pixel]( pix, FDEC_STRIDE, src[0][0][i0], stride[0][0][i0], src[0][1][i1], stride[0][1][i1], i_weight );
int cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], FENC_STRIDE, pix, FDEC_STRIDE )
+ p_cost_m0x[m0x] + p_cost_m0y[m0y] + p_cost_m1x[m1x] + p_cost_m1y[m1y];
if( rd )
bcost = X264_MIN( cost, bcost );
M32( cache0_mv ) = pack16to32_mask(m0x,m0y);
M32( cache1_mv ) = pack16to32_mask(m1x,m1y);
- h->mc.avg[i_pixel+3]( pixu, FDEC_STRIDE, pixu_buf[0][i0], 8, pixu_buf[1][i1], 8, i_weight );
- h->mc.avg[i_pixel+3]( pixv, FDEC_STRIDE, pixv_buf[0][i0], 8, pixv_buf[1][i1], 8, i_weight );
+ if( CHROMA444 )
+ {
+ h->mc.avg[i_pixel]( pixu, FDEC_STRIDE, src[1][0][i0], stride[1][0][i0], src[1][1][i1], stride[1][1][i1], i_weight );
+ h->mc.avg[i_pixel]( pixv, FDEC_STRIDE, src[2][0][i0], stride[2][0][i0], src[2][1][i1], stride[2][1][i1], i_weight );
+ }
+ else
+ {
+ h->mc.avg[i_pixel+3]( pixu, FDEC_STRIDE, pixu_buf[0][i0], 8, pixu_buf[1][i1], 8, i_weight );
+ h->mc.avg[i_pixel+3]( pixv, FDEC_STRIDE, pixv_buf[0][i0], 8, pixv_buf[1][i1], 8, i_weight );
+ }
uint64_t costrd = x264_rd_cost_part( h, i_lambda2, i8*4, m0->i_pixel );
COPY2_IF_LT( bcostrd, costrd, bestj, j );
}
{ \
uint64_t cost; \
M32( cache_mv ) = pack16to32_mask(mx,my); \
- if( m->i_pixel <= PIXEL_8x8 ) \
+ if( CHROMA444 ) \
+ { \
+ h->mc.mc_luma( pixu, FDEC_STRIDE, &m->p_fref[4], m->i_stride[1], mx, my, bw, bh, &m->weight[1] ); \
+ h->mc.mc_luma( pixv, FDEC_STRIDE, &m->p_fref[8], m->i_stride[2], mx, my, bw, bh, &m->weight[2] ); \
+ } \
+ else if( m->i_pixel <= PIXEL_8x8 ) \
{ \
h->mc.mc_chroma( pixu, pixv, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
if( m->weight[1].weightfn ) \
uint16_t amvd;
pixel *pix = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
- pixel *pixu = &h->mb.pic.p_fdec[1][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
- pixel *pixv = &h->mb.pic.p_fdec[2][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
+ pixel *pixu, *pixv;
+ if( CHROMA444 )
+ {
+ pixu = &h->mb.pic.p_fdec[1][block_idx_xy_fdec[i4]];
+ pixv = &h->mb.pic.p_fdec[2][block_idx_xy_fdec[i4]];
+ }
+ else
+ {
+ pixu = &h->mb.pic.p_fdec[1][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
+ pixv = &h->mb.pic.p_fdec[2][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
+ }
h->mb.b_skip_mc = 1;
int i_ref;
const x264_weight_t *weight;
- pixel *p_fref[6];
+ pixel *p_fref[12];
pixel *p_fref_w;
pixel *p_fenc[3];
uint16_t *integral;
- int i_stride[2];
+ int i_stride[3];
ALIGNED_4( int16_t mvp[2] );
return ssd - ((uint64_t)sum * sum >> shift);
}
-static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int field, int b_store )
+static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame, int i, int b_chroma, int b_field, int b_store )
{
- int w = i ? 8 : 16;
+ int w = b_chroma ? 8 : 16;
int stride = frame->i_stride[i];
- int offset = field
+ int offset = b_field
? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride
: 16 * mb_x + w * mb_y * stride;
- stride <<= field;
- if( i )
+ stride <<= b_field;
+ if( b_chroma )
{
ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride );
+ ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2, b_store );
}
else
- return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0, b_store );
+ return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[i] + offset, stride ), 8, frame, i, b_store );
}
// Find the total AC energy of the block in all planes.
/* We don't know the super-MB mode we're going to pick yet, so
* simply try both and pick the lower of the two. */
uint32_t var_interlaced, var_progressive;
- var_interlaced = ac_energy_plane( h, mb_x, mb_y, frame, 0, 1, 1 );
- var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1 );
- var_progressive = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0 );
- var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0 );
+ var_interlaced = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 1, 1 );
+ var_progressive = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, 0, 0 );
+ if( CHROMA444 )
+ {
+ var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 1, 1 );
+ var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, 0, 0 );
+ var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 );
+ var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 );
+ }
+ else
+ {
+ var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 );
+ var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 );
+ }
var = X264_MIN( var_interlaced, var_progressive );
}
else
{
- var = ac_energy_plane( h, mb_x, mb_y, frame, 0, PARAM_INTERLACED, 1 );
- var += ac_energy_plane( h, mb_x, mb_y, frame, 1, PARAM_INTERLACED, 1 );
+ var = ac_energy_plane( h, mb_x, mb_y, frame, 0, 0, PARAM_INTERLACED, 1 );
+ if( CHROMA444 )
+ {
+ var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 );
+ var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 );
+ }
+ else
+ var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 );
}
x264_emms();
return var;
{
uint64_t ssd = frame->i_pixel_ssd[i];
uint64_t sum = frame->i_pixel_sum[i];
- int width = h->mb.i_mb_width*16>>!!i;
- int height = h->mb.i_mb_height*16>>!!i;
+ int size = CHROMA444 || !i ? 16 : 8;
+ int width = h->mb.i_mb_width*size;
+ int height = h->mb.i_mb_height*size;
frame->i_pixel_ssd[i] = ssd - (sum * sum + width * height / 2) / (width * height);
}
}
if( h->param.b_bluray_compat )
mincr = 4;
- /* High 10 doesn't require minCR, so just set the maximum to a large value. */
- if( h->sps->i_profile_idc == PROFILE_HIGH10 )
+ /* High 10 / High 4:4:4 Predictive doesn't require minCR, so just set the maximum to a large value. */
+ if( h->sps->i_profile_idc >= PROFILE_HIGH10 )
rc->frame_size_maximum = 1e9;
else
{
#include "cabac.c"
#define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \
- sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) )
+ sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) - (CHROMA444 ? 0 : (1024+12)-460) )
#define COPY_CABAC_PART( pos, size )\
memcpy( &cb->state[pos], &h->cabac.state[pos], size )
static inline int ssd_mb( x264_t *h )
{
- int chromassd = ssd_plane(h, PIXEL_8x8, 1, 0, 0) + ssd_plane(h, PIXEL_8x8, 2, 0, 0);
- chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
- return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + chromassd;
+ int chroma_size = CHROMA444 ? PIXEL_16x16 : PIXEL_8x8;
+ int chroma_ssd = ssd_plane(h, chroma_size, 1, 0, 0) + ssd_plane(h, chroma_size, 2, 0, 0);
+ chroma_ssd = ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+ return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + chroma_ssd;
}
static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
return i_ssd + i_bits;
}
-/* For small partitions (i.e. those using at most one DCT category's worth of CABAC states),
- * it's faster to copy the individual parts than to perform a whole CABAC_COPY. */
-static ALWAYS_INLINE void x264_copy_cabac_part( x264_t *h, x264_cabac_t *cb, int cat, int intra )
-{
- if( intra )
- COPY_CABAC_PART( 68, 2 ); //intra pred mode
- else
- COPY_CABAC_PART( 40, 16 ); //mvd, rounded up to 16 bytes
-
- /* 8x8dct writes CBP, while non-8x8dct writes CBF */
- if( cat != DCT_LUMA_8x8 )
- COPY_CABAC_PART( 85 + cat * 4, 4 );
- else
- COPY_CABAC_PART( 73, 4 );
-
- /* Really should be 15 bytes, but rounding up a byte saves some
- * instructions and is faster, and copying extra data doesn't hurt. */
- COPY_CABAC_PART( significant_coeff_flag_offset[MB_INTERLACED][cat], 16 );
- COPY_CABAC_PART( last_coeff_flag_offset[MB_INTERLACED][cat], 16 );
- COPY_CABAC_PART( coeff_abs_level_m1_offset[cat], 10 );
- cb->f8_bits_encoded = 0;
-}
-
/* partition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */
static uint64_t x264_rd_cost_subpart( x264_t *h, int i_lambda2, int i4, int i_pixel )
x264_macroblock_encode_p4x4( h, i4+2 );
i_ssd = ssd_plane( h, i_pixel, 0, block_idx_x[i4]*4, block_idx_y[i4]*4 );
+ if( CHROMA444 )
+ {
+ int chromassd = ssd_plane( h, i_pixel, 1, block_idx_x[i4]*4, block_idx_y[i4]*4 )
+ + ssd_plane( h, i_pixel, 2, block_idx_x[i4]*4, block_idx_y[i4]*4 );
+ chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+ i_ssd += chromassd;
+ }
if( h->param.b_cabac )
{
x264_cabac_t cabac_tmp;
- x264_copy_cabac_part( h, &cabac_tmp, DCT_LUMA_4x4, 0 );
+ COPY_CABAC;
x264_subpartition_size_cabac( h, &cabac_tmp, i4, i_pixel );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
if( i_pixel == PIXEL_8x16 )
x264_macroblock_encode_p8x8( h, i8+2 );
- chromassd = ssd_plane( h, i_pixel+3, 1, (i8&1)*4, (i8>>1)*4 )
- + ssd_plane( h, i_pixel+3, 2, (i8&1)*4, (i8>>1)*4 );
+ i_ssd = ssd_plane( h, i_pixel, 0, (i8&1)*8, (i8>>1)*8 );
+ if( CHROMA444 )
+ {
+ chromassd = ssd_plane( h, i_pixel, 1, (i8&1)*8, (i8>>1)*8 )
+ + ssd_plane( h, i_pixel, 2, (i8&1)*8, (i8>>1)*8 );
+ }
+ else
+ {
+ chromassd = ssd_plane( h, i_pixel+3, 1, (i8&1)*4, (i8>>1)*4 )
+ + ssd_plane( h, i_pixel+3, 2, (i8&1)*4, (i8>>1)*4 );
+ }
chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
- i_ssd = ssd_plane( h, i_pixel, 0, (i8&1)*8, (i8>>1)*8 ) + chromassd;
+ i_ssd += chromassd;
if( h->param.b_cabac )
{
return (i_ssd<<8) + i_bits;
}
-static uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode )
+static uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode, pixel edge[3][48] )
{
uint64_t i_ssd, i_bits;
+ int plane_count = CHROMA444 ? 3 : 1;
+ int i_qp = h->mb.i_qp;
h->mb.i_cbp_luma &= ~(1<<i8);
h->mb.b_transform_8x8 = 1;
- x264_mb_encode_i8x8( h, i8, h->mb.i_qp );
+ for( int p = 0; p < plane_count; p++ )
+ {
+ x264_mb_encode_i8x8( h, p, i8, i_qp, i_mode, edge[p] );
+ i_qp = h->mb.i_chroma_qp;
+ }
+
i_ssd = ssd_plane( h, PIXEL_8x8, 0, (i8&1)*8, (i8>>1)*8 );
+ if( CHROMA444 )
+ {
+ int chromassd = ssd_plane( h, PIXEL_8x8, 1, (i8&1)*8, (i8>>1)*8 )
+ + ssd_plane( h, PIXEL_8x8, 2, (i8&1)*8, (i8>>1)*8 );
+ chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+ i_ssd += chromassd;
+ }
if( h->param.b_cabac )
{
x264_cabac_t cabac_tmp;
- x264_copy_cabac_part( h, &cabac_tmp, DCT_LUMA_8x8, 1 );
+ COPY_CABAC;
x264_partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
static uint64_t x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode )
{
uint64_t i_ssd, i_bits;
+ int plane_count = CHROMA444 ? 3 : 1;
+ int i_qp = h->mb.i_qp;
+
+ for( int p = 0; p < plane_count; p++ )
+ {
+ x264_mb_encode_i4x4( h, p, i4, i_qp, i_mode );
+ i_qp = h->mb.i_chroma_qp;
+ }
- x264_mb_encode_i4x4( h, i4, h->mb.i_qp );
i_ssd = ssd_plane( h, PIXEL_4x4, 0, block_idx_x[i4]*4, block_idx_y[i4]*4 );
+ if( CHROMA444 )
+ {
+ int chromassd = ssd_plane( h, PIXEL_4x4, 1, block_idx_x[i4]*4, block_idx_y[i4]*4 )
+ + ssd_plane( h, PIXEL_4x4, 2, block_idx_x[i4]*4, block_idx_y[i4]*4 );
+ chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+ i_ssd += chromassd;
+ }
if( h->param.b_cabac )
{
x264_cabac_t cabac_tmp;
- x264_copy_cabac_part( h, &cabac_tmp, DCT_LUMA_4x4, 1 );
+ COPY_CABAC;
x264_partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
const udctcoef *quant_mf, const int *unquant_mf,
const int *coef_weight, const uint8_t *zigzag,
int ctx_block_cat, int i_lambda2, int b_ac,
- int dc, int i_coefs, int idx )
+ int b_chroma, int dc, int i_coefs, int idx )
{
int abs_coefs[64], signs[64];
trellis_node_t nodes[2][8];
}
i_last_nnz = i;
+ idx &= i_coefs == 64 ? 3 : 15;
for( ; i >= b_ac; i-- )
{
int d = i_coef - unquant_abs_level;
int64_t ssd;
/* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
- if( h->mb.i_psy_trellis && i && !dc && ctx_block_cat != DCT_CHROMA_AC )
+ if( h->mb.i_psy_trellis && i && !dc && !b_chroma )
{
int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][zigzag[i]] : h->mb.pic.fenc_dct4[idx][zigzag[i]];
int predicted_coef = orig_coef - i_coef * signs[i];
const udctcoef *quant_mf, const int *unquant_mf,
const int *coef_weight, const uint8_t *zigzag,
int ctx_block_cat, int i_lambda2, int b_ac,
- int dc, int i_coefs, int idx, int b_8x8 )
+ int b_chroma, int dc, int i_coefs, int idx, int b_8x8 )
{
ALIGNED_16( dctcoef quant_coefs[2][16] );
ALIGNED_16( dctcoef coefs[16] ) = {0};
int64_t score = 1ULL<<62;
int i, j;
const int f = 1<<15;
- int nC = ctx_block_cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, ctx_block_cat == DCT_LUMA_DC ? 0 : idx )];
+ int nC = ctx_block_cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, ctx_block_cat == DCT_LUMA_DC ? (idx - LUMA_DC)*16 : idx )];
/* Code for handling 8x8dct -> 4x4dct CAVLC munging. Input/output use a different
* step/start/end than internal processing. */
end = 60 + start;
step = 4;
}
+ idx &= 15;
i_lambda2 <<= LAMBDA_BITS;
delta_distortion[i] = (d0*d0 - d1*d1) * (dc?256:coef_weight[j]);
/* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
- if( h->mb.i_psy_trellis && j && !dc && ctx_block_cat != DCT_CHROMA_AC )
+ if( h->mb.i_psy_trellis && j && !dc && !b_chroma )
{
int orig_coef = b_8x8 ? h->mb.pic.fenc_dct8[idx>>2][zigzag[j]] : h->mb.pic.fenc_dct4[idx][zigzag[j]];
int predicted_coef = orig_coef - coef;
const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
int x264_quant_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
- int i_qp, int ctx_block_cat, int b_intra, int b_chroma )
+ int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx )
{
if( h->param.b_cabac )
return quant_trellis_cabac( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
NULL, ctx_block_cat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[MB_INTERLACED],
- ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, ctx_block_cat==DCT_CHROMA_DC ? 4 : 16, 0 );
+ ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, b_chroma, 1, ctx_block_cat==DCT_CHROMA_DC ? 4 : 16, idx );
+
+ if( ctx_block_cat != DCT_CHROMA_DC )
+ ctx_block_cat = DCT_LUMA_DC;
return quant_trellis_cavlc( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
NULL, ctx_block_cat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[MB_INTERLACED],
- ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, ctx_block_cat==DCT_CHROMA_DC ? 4 : 16, 0, 0 );
+ ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, b_chroma, 1, ctx_block_cat==DCT_CHROMA_DC ? 4 : 16, idx, 0 );
}
int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx )
{
- int b_ac = (ctx_block_cat == DCT_LUMA_AC || ctx_block_cat == DCT_CHROMA_AC);
+ static const uint8_t ctx_ac[14] = {0,1,0,0,1,0,0,1,0,0,0,1,0,0};
+ int b_ac = ctx_ac[ctx_block_cat];
if( h->param.b_cabac )
return quant_trellis_cabac( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
x264_dct4_weight2_zigzag[MB_INTERLACED],
x264_zigzag_scan4[MB_INTERLACED],
- ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
+ ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, b_chroma, 0, 16, idx );
return quant_trellis_cavlc( h, dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
x264_dct4_weight2_zigzag[MB_INTERLACED],
x264_zigzag_scan4[MB_INTERLACED],
- ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx, 0 );
+ ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, b_chroma, 0, 16, idx, 0 );
}
int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
- int i_qp, int b_intra, int idx )
+ int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx )
{
if( h->param.b_cabac )
{
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
x264_dct8_weight2_zigzag[MB_INTERLACED],
x264_zigzag_scan8[MB_INTERLACED],
- DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
+ ctx_block_cat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, b_chroma, 0, 64, idx );
}
/* 8x8 CAVLC is split into 4 4x4 blocks */
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
x264_dct8_weight2_zigzag[MB_INTERLACED],
x264_zigzag_scan8[MB_INTERLACED],
- DCT_LUMA_4x4, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 16, idx*4+i, 1 );
+ DCT_LUMA_4x4, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, b_chroma, 0, 16, idx*4+i, 1 );
/* Set up nonzero count for future calls */
h->mb.cache.non_zero_count[x264_scan8[idx*4+i]] = nz;
nzaccum |= nz;
const uint8_t *list = pps->scaling_list[idx];
const uint8_t *def_list = (idx==CQM_4IC) ? pps->scaling_list[CQM_4IY]
: (idx==CQM_4PC) ? pps->scaling_list[CQM_4PY]
+ : (idx==CQM_8IC+4) ? pps->scaling_list[CQM_8IY+4]
+ : (idx==CQM_8PC+4) ? pps->scaling_list[CQM_8PY+4]
: x264_cqm_jvt[idx];
if( !memcmp( list, def_list, len ) )
bs_write1( s, 0 ); // scaling_list_present_flag
sps->i_id = i_id;
sps->i_mb_width = ( param->i_width + 15 ) / 16;
sps->i_mb_height= ( param->i_height + 15 ) / 16;
+ sps->i_chroma_format_idc = param->i_csp >= X264_CSP_I444 ? 3 : 1;
sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
- if( sps->b_qpprime_y_zero_transform_bypass )
+ if( sps->b_qpprime_y_zero_transform_bypass || sps->i_chroma_format_idc == 3 )
sps->i_profile_idc = PROFILE_HIGH444_PREDICTIVE;
else if( BIT_DEPTH > 8 )
sps->i_profile_idc = PROFILE_HIGH10;
/* High 10 Intra profile */
if( param->i_keyint_max == 1 && sps->i_profile_idc == PROFILE_HIGH10 )
sps->b_constraint_set3 = 1;
+ /* High 4:4:4 Intra profile */
+ if( param->i_keyint_max == 1 && sps->i_profile_idc == PROFILE_HIGH444_PREDICTIVE )
+ sps->b_constraint_set3 = 1;
sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
/* extra slot with pyramid so that we don't have to override the
if( sps->i_profile_idc >= PROFILE_HIGH )
{
- bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
+ bs_write_ue( s, sps->i_chroma_format_idc );
+ if( sps->i_chroma_format_idc == 3 )
+ bs_write1( s, 0 ); // separate_colour_plane_flag
bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
bs_write1( s, sps->b_qpprime_y_zero_transform_bypass );
bs_write1( s, sps->b_crop );
if( sps->b_crop )
{
- bs_write_ue( s, sps->crop.i_left / 2 );
- bs_write_ue( s, sps->crop.i_right / 2 );
- bs_write_ue( s, sps->crop.i_top / 2 );
- bs_write_ue( s, sps->crop.i_bottom / 2 );
+ int cropshift = sps->i_chroma_format_idc != 3;
+ bs_write_ue( s, sps->crop.i_left >> cropshift );
+ bs_write_ue( s, sps->crop.i_right >> cropshift );
+ bs_write_ue( s, sps->crop.i_top >> cropshift );
+ bs_write_ue( s, sps->crop.i_bottom >> cropshift );
}
bs_write1( s, sps->b_vui );
pps->b_transform_8x8_mode = param->analyse.b_transform_8x8 ? 1 : 0;
pps->i_cqm_preset = param->i_cqm_preset;
+
switch( pps->i_cqm_preset )
{
case X264_CQM_FLAT:
- for( int i = 0; i < 6; i++ )
+ for( int i = 0; i < 8; i++ )
pps->scaling_list[i] = x264_cqm_flat16;
break;
case X264_CQM_JVT:
- for( int i = 0; i < 6; i++ )
+ for( int i = 0; i < 8; i++ )
pps->scaling_list[i] = x264_cqm_jvt[i];
break;
case X264_CQM_CUSTOM:
/* match the transposed DCT & zigzag */
transpose( param->cqm_4iy, 4 );
- transpose( param->cqm_4ic, 4 );
transpose( param->cqm_4py, 4 );
+ transpose( param->cqm_4ic, 4 );
transpose( param->cqm_4pc, 4 );
transpose( param->cqm_8iy, 8 );
transpose( param->cqm_8py, 8 );
+ transpose( param->cqm_8ic, 8 );
+ transpose( param->cqm_8pc, 8 );
pps->scaling_list[CQM_4IY] = param->cqm_4iy;
- pps->scaling_list[CQM_4IC] = param->cqm_4ic;
pps->scaling_list[CQM_4PY] = param->cqm_4py;
+ pps->scaling_list[CQM_4IC] = param->cqm_4ic;
pps->scaling_list[CQM_4PC] = param->cqm_4pc;
pps->scaling_list[CQM_8IY+4] = param->cqm_8iy;
pps->scaling_list[CQM_8PY+4] = param->cqm_8py;
- for( int i = 0; i < 6; i++ )
+ pps->scaling_list[CQM_8IC+4] = param->cqm_8ic;
+ pps->scaling_list[CQM_8PC+4] = param->cqm_8pc;
+ for( int i = 0; i < 8; i++ )
for( int j = 0; j < (i < 4 ? 16 : 64); j++ )
if( pps->scaling_list[i][j] == 0 )
pps->scaling_list[i] = x264_cqm_jvt[i];
}
}
-void x264_pps_write( bs_t *s, x264_pps_t *pps )
+void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps )
{
bs_realign( s );
bs_write_ue( s, pps->i_id );
bs_write1( s, 0 ); // Cr = Cb
if( pps->b_transform_8x8_mode )
{
- scaling_list_write( s, pps, CQM_8IY+4 );
- scaling_list_write( s, pps, CQM_8PY+4 );
+ if( sps->i_chroma_format_idc == 3 )
+ {
+ scaling_list_write( s, pps, CQM_8IY+4 );
+ scaling_list_write( s, pps, CQM_8IC+4 );
+ bs_write1( s, 0 ); // Cr = Cb
+ scaling_list_write( s, pps, CQM_8PY+4 );
+ scaling_list_write( s, pps, CQM_8PC+4 );
+ bs_write1( s, 0 ); // Cr = Cb
+ }
+ else
+ {
+ scaling_list_write( s, pps, CQM_8IY+4 );
+ scaling_list_write( s, pps, CQM_8PY+4 );
+ }
}
}
bs_write_se( s, pps->i_chroma_qp_index_offset );
int ret = 0;
int mbs = h->sps->i_mb_width * h->sps->i_mb_height;
int dpb = mbs * 384 * h->sps->vui.i_max_dec_frame_buffering;
- int cbp_factor = h->sps->i_profile_idc==PROFILE_HIGH10 ? 12 :
+ int cbp_factor = h->sps->i_profile_idc==PROFILE_HIGH444_PREDICTIVE ? 16 :
+ h->sps->i_profile_idc==PROFILE_HIGH10 ? 12 :
h->sps->i_profile_idc==PROFILE_HIGH ? 5 : 4;
const x264_level_t *l = x264_levels;
void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param );
void x264_sps_write( bs_t *s, x264_sps_t *sps );
void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps );
-void x264_pps_write( bs_t *s, x264_pps_t *pps );
+void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps );
void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt );
int x264_sei_version_write( x264_t *h, bs_t *s );
int x264_validate_levels( x264_t *h, int verbose );
x264_emms();
}
+static NOINLINE pixel *x264_weight_cost_init_chroma444( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dst, int p )
+{
+ int ref0_distance = fenc->i_frame - ref->i_frame - 1;
+ int i_stride = fenc->i_stride[p];
+ int i_lines = fenc->i_lines[p];
+ int i_width = fenc->i_width[p];
+
+ if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
+ {
+ for( int y = 0, mb_xy = 0, pel_offset_y = 0; y < i_lines; y += 16, pel_offset_y = y*i_stride )
+ for( int x = 0, pel_offset_x = 0; x < i_width; x += 16, mb_xy++, pel_offset_x += 16 )
+ {
+ pixel *pix = dst + pel_offset_y + pel_offset_x;
+ pixel *src = fenc->plane[p] + pel_offset_y + pel_offset_x;
+ int mvx = fenc->lowres_mvs[0][ref0_distance][mb_xy][0] / 2;
+ int mvy = fenc->lowres_mvs[0][ref0_distance][mb_xy][1] / 2;
+ /* We don't want to calculate hpels for fenc frames, so we round the motion
+ * vectors to fullpel here. It's not too bad, I guess? */
+ h->mc.copy_16x16_unaligned( pix, i_stride, src+mvx+mvy*i_stride, i_stride, 16 );
+ }
+ x264_emms();
+ return dst;
+ }
+ x264_emms();
+ return fenc->plane[p];
+}
+
static int x264_weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma )
{
/* Add cost of weights in the slice header. */
return cost;
}
+static NOINLINE unsigned int x264_weight_cost_chroma444( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w, int p )
+{
+ unsigned int cost = 0;
+ int i_stride = fenc->i_stride[p];
+ int i_lines = fenc->i_lines[p];
+ int i_width = fenc->i_width[p];
+ pixel *src = fenc->plane[p];
+ ALIGNED_ARRAY_16( pixel, buf, [16*16] );
+ int pixoff = 0;
+ if( w )
+ {
+ for( int y = 0; y < i_lines; y += 16, pixoff = y*i_stride )
+ for( int x = 0; x < i_width; x += 16, pixoff += 16 )
+ {
+ w->weightfn[16>>2]( buf, 16, &ref[pixoff], i_stride, w, 16 );
+ cost += h->pixf.mbcmp[PIXEL_16x16]( buf, 16, &src[pixoff], i_stride );
+ }
+ cost += x264_weight_slice_header_cost( h, w, 1 );
+ }
+ else
+ for( int y = 0; y < i_lines; y += 16, pixoff = y*i_stride )
+ for( int x = 0; x < i_width; x += 16, pixoff += 16 )
+ cost += h->pixf.mbcmp[PIXEL_16x16]( &ref[pixoff], 16, &src[pixoff], i_stride );
+ x264_emms();
+ return cost;
+}
+
void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
{
int i_delta_index = fenc->i_frame - ref->i_frame - 1;
}
else
{
- pixel *dstu = h->mb.p_weight_buf[0];
- pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1];
- /* Only initialize chroma data once. */
- if( plane == 1 )
- x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
- mcbuf = plane == 1 ? dstu : dstv;
+ if( CHROMA444 )
+ mcbuf = x264_weight_cost_init_chroma444( h, fenc, ref, h->mb.p_weight_buf[0], plane );
+ else
+ {
+ pixel *dstu = h->mb.p_weight_buf[0];
+ pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1];
+ /* Only initialize chroma data once. */
+ if( plane == 1 )
+ x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
+ mcbuf = plane == 1 ? dstu : dstv;
+ }
origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, NULL );
}
SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
unsigned int s;
if( plane )
- s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
+ {
+ if( CHROMA444 )
+ s = x264_weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane );
+ else
+ s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
+ }
else
s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
return csp_mask == X264_CSP_I420 ||
csp_mask == X264_CSP_I422 ||
csp_mask == X264_CSP_I444 ||
+ csp_mask == X264_CSP_YV24 ||
csp_mask == X264_CSP_YV12 ||
csp_mask == X264_CSP_NV12;
}
case X264_CSP_YV12: /* specially handled via swapping chroma */
case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+ case X264_CSP_YV24: /* specially handled via swapping chroma */
case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48 : PIX_FMT_RGB24;
/* the next 3 csps have no equivalent 16bit depth in swscale */
h->dst.pix_fmt = convert_csp_to_pix_fmt( h->dst_csp );
h->scale = h->dst;
- /* swap chroma planes if YV12 is involved, as libswscale works with I420 */
- h->pre_swap_chroma = (info->csp & X264_CSP_MASK) == X264_CSP_YV12;
- h->post_swap_chroma = (h->dst_csp & X264_CSP_MASK) == X264_CSP_YV12;
+ /* swap chroma planes if YV12/YV24 is involved, as libswscale works with I420/I444 */
+ int src_csp = info->csp & X264_CSP_MASK;
+ int dst_csp = h->dst_csp & X264_CSP_MASK;
+ h->pre_swap_chroma = src_csp == X264_CSP_YV12 || src_csp == X264_CSP_YV24;
+ h->post_swap_chroma = dst_csp == X264_CSP_YV12 || dst_csp == X264_CSP_YV24;
int src_pix_fmt = convert_csp_to_pix_fmt( info->csp );
return res;
}
+static float get_avs_version( avs_hnd_t *h )
+{
+ FAIL_IF_ERROR( !h->func.avs_function_exists( h->env, "VersionNumber" ), "VersionNumber does not exist\n" )
+ AVS_Value ver = h->func.avs_invoke( h->env, "VersionNumber", avs_new_value_array( NULL, 0 ), NULL );
+ FAIL_IF_ERROR( avs_is_error( ver ), "unable to determine avisynth version: %s\n", avs_as_error( ver ) )
+ FAIL_IF_ERROR( !avs_is_float( ver ), "VersionNumber did not return a float value\n" );
+ float ret = avs_as_float( ver );
+ h->func.avs_release_value( ver );
+ return ret;
+}
+
static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
{
FILE *fh = fopen( psz_filename, "r" );
const char *error = h->func.avs_get_error( h->env );
FAIL_IF_ERROR( error, "%s\n", error );
}
+ float avs_version = get_avs_version( h );
+ if( avs_version <= 0 )
+ return -1;
+ x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
AVS_Value arg = avs_new_value_string( psz_filename );
AVS_Value res;
char *filename_ext = get_filename_extension( psz_filename );
info->tff = avs_is_tff( vi );
}
#if !HAVE_SWSCALE
- /* if swscale is not available, convert CSPs to yv12 */
- if( !avs_is_yv12( vi ) )
+ /* if swscale is not available, convert the CSP if necessary */
+ if( (opt->output_csp == X264_CSP_I420 && !avs_is_yv12( vi )) || (opt->output_csp == X264_CSP_I444 && !avs_is_yv24( vi )) )
{
- x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to YV12\n" );
- FAIL_IF_ERROR( vi->width&1 || vi->height&1, "input clip width or height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
+ FAIL_IF_ERROR( avs_version < 2.6f && opt->output_csp == X264_CSP_I444, "avisynth >= 2.6 is required for i444 output\n" )
+
+ const char *csp = opt->output_csp == X264_CSP_I420 ? "YV12" : "YV24";
+ x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to %s\n", csp );
+ FAIL_IF_ERROR( opt->output_csp == X264_CSP_I420 && (vi->width&1 || vi->height&1),
+ "input clip width or height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
const char *arg_name[2] = { NULL, "interlaced" };
AVS_Value arg_arr[2] = { res, avs_new_value_bool( info->interlaced ) };
- AVS_Value res2 = h->func.avs_invoke( h->env, "ConvertToYV12", avs_new_value_array( arg_arr, 2 ), arg_name );
- FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to YV12\n" )
+ char conv_func[14] = { "ConvertTo" };
+ strcat( conv_func, csp );
+ AVS_Value res2 = h->func.avs_invoke( h->env, conv_func, avs_new_value_array( arg_arr, 2 ), arg_name );
+ FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s\n", csp )
res = update_clip( h, &vi, res2, res );
}
#endif
else
info->csp = X264_CSP_NONE;
#else
- info->csp = X264_CSP_I420;
+ if( avs_is_yv24( vi ) )
+ info->csp = X264_CSP_I444;
+ else if( avs_is_yv12( vi ) )
+ info->csp = X264_CSP_I420;
+ else
+ info->csp = X264_CSP_NONE;
#endif
info->vfr = 0;
[X264_CSP_I420] = { "i420", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
[X264_CSP_I422] = { "i422", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
[X264_CSP_I444] = { "i444", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
+ [X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
[X264_CSP_YV12] = { "yv12", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
char *timebase;
int seek;
int progress;
+ int output_csp; /* convert to this csp, if applicable */
} cli_input_opt_t;
/* properties of the source given by the demuxer */
/* extended colorspace list that isn't supported by libx264 but by the cli */
#define X264_CSP_I422 X264_CSP_MAX /* yuv 4:2:2 planar */
-#define X264_CSP_I444 (X264_CSP_MAX+1) /* yuv 4:4:4 planar */
-#define X264_CSP_BGR (X264_CSP_MAX+2) /* packed bgr 24bits */
-#define X264_CSP_BGRA (X264_CSP_MAX+3) /* packed bgr 32bits */
-#define X264_CSP_RGB (X264_CSP_MAX+4) /* packed rgb 24bits */
-#define X264_CSP_CLI_MAX (X264_CSP_MAX+5) /* end of list */
+#define X264_CSP_BGR (X264_CSP_MAX+1) /* packed bgr 24bits */
+#define X264_CSP_BGRA (X264_CSP_MAX+2) /* packed bgr 32bits */
+#define X264_CSP_RGB (X264_CSP_MAX+3) /* packed rgb 24bits */
+#define X264_CSP_CLI_MAX (X264_CSP_MAX+4) /* end of list */
#define X264_CSP_OTHER 0x4000 /* non x264 colorspace */
typedef struct
memset( h, 0, sizeof(*h) );
x264_param_default( &h->param );
+ h->sps->i_chroma_format_idc = 1;
h->chroma_qp_table = i_chroma_qp_table + 12;
h->param.analyse.i_luma_deadzone[0] = 0;
h->param.analyse.i_luma_deadzone[1] = 0;
x264_t h_buf;
x264_t *h = &h_buf;
memset( h, 0, sizeof(*h) );
+ h->sps->i_chroma_format_idc = 1;
x264_param_default( &h->param );
h->chroma_qp_table = i_chroma_qp_table + 12;
h->param.analyse.b_transform_8x8 = 1;
}
#define DECL_CABAC(cpu) \
-static void run_cabac_decision_##cpu( uint8_t *dst )\
+static void run_cabac_decision_##cpu( x264_t *h, uint8_t *dst )\
{\
x264_cabac_t cb;\
- x264_cabac_context_init( &cb, SLICE_TYPE_P, 26, 0 );\
+ x264_cabac_context_init( h, &cb, SLICE_TYPE_P, 26, 0 );\
x264_cabac_encode_init( &cb, dst, dst+0xff0 );\
for( int i = 0; i < 0x1000; i++ )\
x264_cabac_encode_decision_##cpu( &cb, buf1[i]>>1, buf1[i]&1 );\
}\
-static void run_cabac_bypass_##cpu( uint8_t *dst )\
+static void run_cabac_bypass_##cpu( x264_t *h, uint8_t *dst )\
{\
x264_cabac_t cb;\
- x264_cabac_context_init( &cb, SLICE_TYPE_P, 26, 0 );\
+ x264_cabac_context_init( h, &cb, SLICE_TYPE_P, 26, 0 );\
x264_cabac_encode_init( &cb, dst, dst+0xff0 );\
for( int i = 0; i < 0x1000; i++ )\
x264_cabac_encode_bypass_##cpu( &cb, buf1[i]&1 );\
}\
-static void run_cabac_terminal_##cpu( uint8_t *dst )\
+static void run_cabac_terminal_##cpu( x264_t *h, uint8_t *dst )\
{\
x264_cabac_t cb;\
- x264_cabac_context_init( &cb, SLICE_TYPE_P, 26, 0 );\
+ x264_cabac_context_init( h, &cb, SLICE_TYPE_P, 26, 0 );\
x264_cabac_encode_init( &cb, dst, dst+0xff0 );\
for( int i = 0; i < 0x1000; i++ )\
x264_cabac_encode_terminal_##cpu( &cb );\
static int check_cabac( int cpu_ref, int cpu_new )
{
int ret = 0, ok, used_asm = 1;
+ x264_t h;
+ h.sps->i_chroma_format_idc = 3;
if( cpu_ref || run_cabac_decision_c == run_cabac_decision_asm )
return 0;
- x264_cabac_init();
+ x264_cabac_init( &h );
set_func_name( "cabac_encode_decision" );
memcpy( buf4, buf3, 0x1000 );
- call_c( run_cabac_decision_c, buf3 );
- call_a( run_cabac_decision_asm, buf4 );
+ call_c( run_cabac_decision_c, &h, buf3 );
+ call_a( run_cabac_decision_asm, &h, buf4 );
ok = !memcmp( buf3, buf4, 0x1000 );
report( "cabac decision:" );
set_func_name( "cabac_encode_bypass" );
memcpy( buf4, buf3, 0x1000 );
- call_c( run_cabac_bypass_c, buf3 );
- call_a( run_cabac_bypass_asm, buf4 );
+ call_c( run_cabac_bypass_c, &h, buf3 );
+ call_a( run_cabac_bypass_asm, &h, buf4 );
ok = !memcmp( buf3, buf4, 0x1000 );
report( "cabac bypass:" );
set_func_name( "cabac_encode_terminal" );
memcpy( buf4, buf3, 0x1000 );
- call_c( run_cabac_terminal_c, buf3 );
- call_a( run_cabac_terminal_asm, buf4 );
+ call_c( run_cabac_terminal_c, &h, buf3 );
+ call_a( run_cabac_terminal_asm, &h, buf4 );
ok = !memcmp( buf3, buf4, 0x1000 );
report( "cabac terminal:" );
static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
+static const char * const output_csp_names[] = { "i420", "i444", 0 };
typedef struct
{
H1( " --input-fmt <string> Specify input file format (requires lavf support)\n" );
H1( " --input-csp <string> Specify input colorspace format for raw input\n" );
print_csp_names( longhelp );
+ H1( " --output-csp <string> Specify output colorspace [\"%s\"]\n"
+ " - %s\n", output_csp_names[0], stringify_names( buf, output_csp_names ) );
H1( " --input-depth <integer> Specify input bit depth for raw input\n" );
H1( " --input-res <intxint> Specify input resolution (width x height)\n" );
H1( " --index <string> Filename for input index file\n" );
OPT_INPUT_RES,
OPT_INPUT_CSP,
OPT_INPUT_DEPTH,
- OPT_DTS_COMPRESSION
+ OPT_DTS_COMPRESSION,
+ OPT_OUTPUT_CSP
} OptionsOPT;
static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
{ "input-csp", required_argument, NULL, OPT_INPUT_CSP },
{ "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
{ "dts-compress", no_argument, NULL, OPT_DTS_COMPRESSION },
+ { "output-csp", required_argument, NULL, OPT_OUTPUT_CSP },
{0, 0, 0, 0}
};
return 0;
}
-static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info, x264_param_t *param )
+static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info, x264_param_t *param, int output_csp )
{
x264_register_vid_filters();
param->i_height = info->height;
param->i_width = info->width;
}
- /* if the current csp is supported by libx264, have libx264 use this csp.
- * otherwise change the csp to I420 and have libx264 use this.
- * when more colorspaces are supported, this decision will need to be updated. */
+ /* force the output csp to what the user specified (or the default) */
+ param->i_csp = info->csp;
int csp = info->csp & X264_CSP_MASK;
- if( csp > X264_CSP_NONE && csp < X264_CSP_MAX )
- param->i_csp = info->csp;
- else
- param->i_csp = X264_CSP_I420 | ( info->csp & X264_CSP_HIGH_DEPTH );
+ if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) )
+ param->i_csp = X264_CSP_I420;
+ else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) )
+ param->i_csp = X264_CSP_I444;
+ param->i_csp |= info->csp & X264_CSP_HIGH_DEPTH;
+
if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
return -1;
memset( &input_opt, 0, sizeof(cli_input_opt_t) );
memset( &output_opt, 0, sizeof(cli_output_opt_t) );
input_opt.bit_depth = 8;
+ int output_csp = defaults.i_csp;
opt->b_progress = 1;
/* Presets are applied before all other options. */
param->i_frame_total = X264_MAX( atoi( optarg ), 0 );
break;
case OPT_SEEK:
- opt->i_seek = input_opt.seek = X264_MAX( atoi( optarg ), 0 );
+ opt->i_seek = X264_MAX( atoi( optarg ), 0 );
break;
case 'o':
output_filename = optarg;
case OPT_DTS_COMPRESSION:
output_opt.use_dts_compress = 1;
break;
+ case OPT_OUTPUT_CSP:
+ FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg )
+ // correct the parsed value to the libx264 csp value
+ output_csp = !output_csp ? X264_CSP_I420 : X264_CSP_I444;
+ break;
default:
generic_option:
{
info.tff = param->b_tff;
info.vfr = param->b_vfr_input;
+ input_opt.seek = opt->i_seek;
input_opt.progress = opt->b_progress;
+ input_opt.output_csp = output_csp;
if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) )
return -1;
info.tff = param->b_tff;
}
- if( init_vid_filters( vid_filters, &opt->hin, &info, param ) )
+ if( init_vid_filters( vid_filters, &opt->hin, &info, param, output_csp ) )
return -1;
/* set param flags from the post-filtered video */
#include "x264_config.h"
-#define X264_BUILD 115
+#define X264_BUILD 116
/* x264_t:
* opaque handler for encoder */
#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */
#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
-#define X264_CSP_MAX 0x0004 /* end of list */
+#define X264_CSP_I444 0x0004 /* yuv 4:4:4 planar */
+#define X264_CSP_YV24 0x0005 /* yvu 4:4:4 planar */
+#define X264_CSP_MAX 0x0006 /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
int i_cqm_preset;
char *psz_cqm_file; /* JM format */
uint8_t cqm_4iy[16]; /* used only if i_cqm_preset == X264_CQM_CUSTOM */
- uint8_t cqm_4ic[16];
uint8_t cqm_4py[16];
+ uint8_t cqm_4ic[16];
uint8_t cqm_4pc[16];
uint8_t cqm_8iy[64];
uint8_t cqm_8py[64];
+ uint8_t cqm_8ic[64];
+ uint8_t cqm_8pc[64];
/* Log */
void (*pf_log)( void *, int i_level, const char *psz, va_list );