32 #define RV40_LOWPASS(OPNAME, OP) \
33 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
34 const int h, const int C1, const int C2, const int SHIFT){\
35 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
37 for(i = 0; i < h; i++)\
39 OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
40 OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
41 OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
42 OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
43 OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
44 OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45 OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46 OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
52 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
53 const int w, const int C1, const int C2, const int SHIFT){\
54 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
56 for(i = 0; i < w; i++)\
58 const int srcB = src[-2*srcStride];\
59 const int srcA = src[-1*srcStride];\
60 const int src0 = src[0 *srcStride];\
61 const int src1 = src[1 *srcStride];\
62 const int src2 = src[2 *srcStride];\
63 const int src3 = src[3 *srcStride];\
64 const int src4 = src[4 *srcStride];\
65 const int src5 = src[5 *srcStride];\
66 const int src6 = src[6 *srcStride];\
67 const int src7 = src[7 *srcStride];\
68 const int src8 = src[8 *srcStride];\
69 const int src9 = src[9 *srcStride];\
70 const int src10 = src[10*srcStride];\
71 OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
72 OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
73 OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
74 OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
75 OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
76 OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77 OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78 OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
84 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
85 const int w, const int C1, const int C2, const int SHIFT){\
86 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
87 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
90 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
91 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
94 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
95 const int h, const int C1, const int C2, const int SHIFT){\
96 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
97 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
100 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
101 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
105 #define RV40_MC(OPNAME, SIZE) \
106 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
107 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
110 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
111 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
114 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
115 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
118 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
119 uint8_t full[SIZE*(SIZE+5)];\
120 uint8_t * const full_mid = full + SIZE*2;\
121 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
122 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
125 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
126 uint8_t full[SIZE*(SIZE+5)];\
127 uint8_t * const full_mid = full + SIZE*2;\
128 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
129 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
132 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
133 uint8_t full[SIZE*(SIZE+5)];\
134 uint8_t * const full_mid = full + SIZE*2;\
135 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
136 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
139 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
140 uint8_t full[SIZE*(SIZE+5)];\
141 uint8_t * const full_mid = full + SIZE*2;\
142 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
143 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
146 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
147 uint8_t full[SIZE*(SIZE+5)];\
148 uint8_t * const full_mid = full + SIZE*2;\
149 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
150 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
153 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
154 uint8_t full[SIZE*(SIZE+5)];\
155 uint8_t * const full_mid = full + SIZE*2;\
156 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
157 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
160 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
161 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
164 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
165 uint8_t full[SIZE*(SIZE+5)];\
166 uint8_t * const full_mid = full + SIZE*2;\
167 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
168 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
171 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
172 uint8_t full[SIZE*(SIZE+5)];\
173 uint8_t * const full_mid = full + SIZE*2;\
174 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
175 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
179 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
180 #define op_put(a, b) a = cm[b]
200 #define RV40_CHROMA_MC(OPNAME, OP)\
201 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
202 const int A = (8-x) * (8-y);\
203 const int B = ( x) * (8-y);\
204 const int C = (8-x) * ( y);\
205 const int D = ( x) * ( y);\
207 int bias = rv40_bias[y>>1][x>>1];\
209 assert(x<8 && y<8 && x>=0 && y>=0);\
212 for(i = 0; i < h; i++){\
213 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
214 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
215 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
216 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
221 const int E = B + C;\
222 const int step = C ? stride : 1;\
223 for(i = 0; i < h; i++){\
224 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
225 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
226 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
227 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
234 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
235 const int A = (8-x) * (8-y);\
236 const int B = ( x) * (8-y);\
237 const int C = (8-x) * ( y);\
238 const int D = ( x) * ( y);\
240 int bias = rv40_bias[y>>1][x>>1];\
242 assert(x<8 && y<8 && x>=0 && y>=0);\
245 for(i = 0; i < h; i++){\
246 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
247 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
248 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
249 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
250 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
251 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
252 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
253 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
258 const int E = B + C;\
259 const int step = C ? stride : 1;\
260 for(i = 0; i < h; i++){\
261 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
262 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
263 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
264 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
265 OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
266 OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
267 OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
268 OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
275 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
276 #define op_put(a, b) a = ((b)>>6)
281 #define RV40_WEIGHT_FUNC(size) \
282 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
286 for (j = 0; j < size; j++) {\
287 for (i = 0; i < size; i++)\
288 dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
294 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
298 for (j = 0; j < size; j++) {\
299 for (i = 0; i < size; i++)\
300 dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\
314 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
315 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
322 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
323 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
326 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
344 for (i = 0; i < 4; i++, src +=
stride) {
345 int diff_p1p0 = src[-2*
step] - src[-1*
step];
346 int diff_q1q0 = src[ 1*
step] - src[ 0*
step];
347 int diff_p1p2 = src[-2*
step] - src[-3*
step];
348 int diff_q1q2 = src[ 1*
step] - src[ 2*
step];
354 u = (alpha *
FFABS(t)) >> 7;
355 if (u > 3 - (filter_p1 && filter_q1))
359 if (filter_p1 && filter_q1)
362 diff =
CLIP_SYMM((t + 4) >> 3, lim_p0q0);
363 src[-1*
step] = cm[src[-1*
step] + diff];
364 src[ 0*
step] = cm[src[ 0*
step] - diff];
366 if (filter_p1 &&
FFABS(diff_p1p2) <= beta) {
367 t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
371 if (filter_q1 &&
FFABS(diff_q1q2) <= beta) {
372 t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
379 const int filter_p1,
const int filter_q1,
380 const int alpha,
const int beta,
381 const int lim_p0q0,
const int lim_q1,
385 alpha, beta, lim_p0q0, lim_q1, lim_p1);
389 const int filter_p1,
const int filter_q1,
390 const int alpha,
const int beta,
391 const int lim_p0q0,
const int lim_q1,
395 alpha, beta, lim_p0q0, lim_q1, lim_p1);
408 for(i = 0; i < 4; i++, src +=
stride){
409 int sflag, p0, q0, p1, q1;
415 sflag = (alpha *
FFABS(t)) >> 7;
419 p0 = (25*src[-3*
step] + 26*src[-2*
step] + 26*src[-1*
step] +
423 q0 = (25*src[-2*
step] + 26*src[-1*
step] + 26*src[ 0*
step] +
425 rv40_dither_r[dmode + i]) >> 7;
428 p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
429 q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
432 p1 = (25*src[-4*
step] + 26*src[-3*
step] + 26*src[-2*
step] + 26*p0 +
434 q1 = (25*src[-1*
step] + 26*q0 + 26*src[ 1*
step] + 26*src[ 2*
step] +
435 25*src[ 3*
step] + rv40_dither_r[dmode + i]) >> 7;
438 p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
439 q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
449 51*src[-3*
step] + 26*src[-4*
step] + 64) >> 7;
451 51*src[ 2*
step] + 26*src[ 3*
step] + 64) >> 7;
457 const int alpha,
const int lims,
458 const int dmode,
const int chroma)
464 const int alpha,
const int lims,
465 const int dmode,
const int chroma)
476 int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
477 int strong0 = 0, strong1 = 0;
481 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
482 sum_p1p0 += ptr[-2*
step] - ptr[-1*
step];
483 sum_q1q0 += ptr[ 1*
step] - ptr[ 0*
step];
486 *p1 =
FFABS(sum_p1p0) < (beta << 2);
487 *q1 =
FFABS(sum_q1q0) < (beta << 2);
495 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
496 sum_p1p2 += ptr[-2*
step] - ptr[-3*
step];
497 sum_q1q2 += ptr[ 1*
step] - ptr[ 2*
step];
500 strong0 = *p1 && (
FFABS(sum_p1p2) < beta2);
501 strong1 = *q1 && (
FFABS(sum_q1q2) < beta2);
503 return strong0 && strong1;
507 int beta,
int beta2,
int edge,
514 int beta,
int beta2,
int edge,