00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #include "config.h"
00077 #include "avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 #ifdef HAVE_MALLOC_H
00083 #include <malloc.h>
00084 #endif
00085
00086
00087
00088
00089
00090 #include "postprocess.h"
00091 #include "postprocess_internal.h"
00092
00093 #ifdef HAVE_ALTIVEC_H
00094 #include <altivec.h>
00095 #endif
00096
00097 #define GET_MODE_BUFFER_SIZE 500
00098 #define OPTIONS_ARRAY_SIZE 10
00099 #define BLOCK_SIZE 8
00100 #define TEMP_STRIDE 8
00101
00102
00103 #if defined(ARCH_X86)
00104 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00105 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00106 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00107 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00108 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00109 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00110 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00111 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00112 #endif
00113
00114 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00115
00116
00117 static struct PPFilter filters[]=
00118 {
00119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00121
00122
00123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00127 {"dr", "dering", 1, 5, 6, DERING},
00128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00137 {NULL, NULL,0,0,0,0}
00138 };
00139
00140 static const char *replaceTable[]=
00141 {
00142 "default", "hb:a,vb:a,dr:a",
00143 "de", "hb:a,vb:a,dr:a",
00144 "fast", "h1:a,v1:a,dr:a",
00145 "fa", "h1:a,v1:a,dr:a",
00146 "ac", "ha:a:128:7,va:a,dr:a",
00147 NULL
00148 };
00149
00150
00151 #if defined(ARCH_X86)
00152 static inline void prefetchnta(void *p)
00153 {
00154 asm volatile( "prefetchnta (%0)\n\t"
00155 : : "r" (p)
00156 );
00157 }
00158
00159 static inline void prefetcht0(void *p)
00160 {
00161 asm volatile( "prefetcht0 (%0)\n\t"
00162 : : "r" (p)
00163 );
00164 }
00165
00166 static inline void prefetcht1(void *p)
00167 {
00168 asm volatile( "prefetcht1 (%0)\n\t"
00169 : : "r" (p)
00170 );
00171 }
00172
00173 static inline void prefetcht2(void *p)
00174 {
00175 asm volatile( "prefetcht2 (%0)\n\t"
00176 : : "r" (p)
00177 );
00178 }
00179 #endif
00180
00181
00182
00186 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00187 {
00188 int numEq= 0;
00189 int y;
00190 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00191 const int dcThreshold= dcOffset*2 + 1;
00192
00193 for(y=0; y<BLOCK_SIZE; y++)
00194 {
00195 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00196 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00197 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00198 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00199 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00200 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00201 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00202 src+= stride;
00203 }
00204 return numEq > c->ppMode.flatnessThreshold;
00205 }
00206
00210 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
00211 int numEq= 0;
00212 int y;
00213 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00214 const int dcThreshold= dcOffset*2 + 1;
00215
00216 src+= stride*4;
00217 for(y=0; y<BLOCK_SIZE-1; y++)
00218 {
00219 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00220 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00221 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00222 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00223 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00224 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00225 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00226 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00227 src+= stride;
00228 }
00229 return numEq > c->ppMode.flatnessThreshold;
00230 }
00231
00232 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00233 {
00234 int i;
00235 #if 1
00236 for(i=0; i<2; i++){
00237 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00238 src += stride;
00239 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00240 src += stride;
00241 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00242 src += stride;
00243 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00244 src += stride;
00245 }
00246 #else
00247 for(i=0; i<8; i++){
00248 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00249 src += stride;
00250 }
00251 #endif
00252 return 1;
00253 }
00254
00255 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00256 {
00257 #if 1
00258 #if 1
00259 int x;
00260 src+= stride*4;
00261 for(x=0; x<BLOCK_SIZE; x+=4)
00262 {
00263 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00264 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00265 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00266 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00267 }
00268 #else
00269 int x;
00270 src+= stride*3;
00271 for(x=0; x<BLOCK_SIZE; x++)
00272 {
00273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00274 }
00275 #endif
00276 return 1;
00277 #else
00278 int x;
00279 src+= stride*4;
00280 for(x=0; x<BLOCK_SIZE; x++)
00281 {
00282 int min=255;
00283 int max=0;
00284 int y;
00285 for(y=0; y<8; y++){
00286 int v= src[x + y*stride];
00287 if(v>max) max=v;
00288 if(v<min) min=v;
00289 }
00290 if(max-min > 2*QP) return 0;
00291 }
00292 return 1;
00293 #endif
00294 }
00295
00296 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
00297 if( isHorizDC_C(src, stride, c) ){
00298 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00299 return 1;
00300 else
00301 return 0;
00302 }else{
00303 return 2;
00304 }
00305 }
00306
00307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
00308 if( isVertDC_C(src, stride, c) ){
00309 if( isVertMinMaxOk_C(src, stride, c->QP) )
00310 return 1;
00311 else
00312 return 0;
00313 }else{
00314 return 2;
00315 }
00316 }
00317
00318 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00319 {
00320 int y;
00321 for(y=0; y<BLOCK_SIZE; y++)
00322 {
00323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00324
00325 if(FFABS(middleEnergy) < 8*c->QP)
00326 {
00327 const int q=(dst[3] - dst[4])/2;
00328 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00329 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00330
00331 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00332 d= FFMAX(d, 0);
00333
00334 d= (5*d + 32) >> 6;
00335 d*= FFSIGN(-middleEnergy);
00336
00337 if(q>0)
00338 {
00339 d= d<0 ? 0 : d;
00340 d= d>q ? q : d;
00341 }
00342 else
00343 {
00344 d= d>0 ? 0 : d;
00345 d= d<q ? q : d;
00346 }
00347
00348 dst[3]-= d;
00349 dst[4]+= d;
00350 }
00351 dst+= stride;
00352 }
00353 }
00354
00359 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00360 {
00361 int y;
00362 for(y=0; y<BLOCK_SIZE; y++)
00363 {
00364 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00365 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00366
00367 int sums[10];
00368 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00369 sums[1] = sums[0] - first + dst[3];
00370 sums[2] = sums[1] - first + dst[4];
00371 sums[3] = sums[2] - first + dst[5];
00372 sums[4] = sums[3] - first + dst[6];
00373 sums[5] = sums[4] - dst[0] + dst[7];
00374 sums[6] = sums[5] - dst[1] + last;
00375 sums[7] = sums[6] - dst[2] + last;
00376 sums[8] = sums[7] - dst[3] + last;
00377 sums[9] = sums[8] - dst[4] + last;
00378
00379 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00380 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00381 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00382 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00383 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00384 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00385 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00386 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00387
00388 dst+= stride;
00389 }
00390 }
00391
00400 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00401 {
00402 int y;
00403 static uint64_t *lut= NULL;
00404 if(lut==NULL)
00405 {
00406 int i;
00407 lut = av_malloc(256*8);
00408 for(i=0; i<256; i++)
00409 {
00410 int v= i < 128 ? 2*i : 2*(i-256);
00411
00412
00413
00414
00415
00416
00417
00418
00419 uint64_t a= (v/16) & 0xFF;
00420 uint64_t b= (v*3/16) & 0xFF;
00421 uint64_t c= (v*5/16) & 0xFF;
00422 uint64_t d= (7*v/16) & 0xFF;
00423 uint64_t A= (0x100 - a)&0xFF;
00424 uint64_t B= (0x100 - b)&0xFF;
00425 uint64_t C= (0x100 - c)&0xFF;
00426 uint64_t D= (0x100 - c)&0xFF;
00427
00428 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00429 (D<<24) | (C<<16) | (B<<8) | (A);
00430
00431 }
00432 }
00433
00434 for(y=0; y<BLOCK_SIZE; y++)
00435 {
00436 int a= src[1] - src[2];
00437 int b= src[3] - src[4];
00438 int c= src[5] - src[6];
00439
00440 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00441
00442 if(d < QP)
00443 {
00444 int v = d * FFSIGN(-b);
00445
00446 src[1] +=v/8;
00447 src[2] +=v/4;
00448 src[3] +=3*v/8;
00449 src[4] -=3*v/8;
00450 src[5] -=v/4;
00451 src[6] -=v/8;
00452
00453 }
00454 src+=stride;
00455 }
00456 }
00457
00461 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00462 int y;
00463 const int QP= c->QP;
00464 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00465 const int dcThreshold= dcOffset*2 + 1;
00466
00467 src+= step*4;
00468 for(y=0; y<8; y++){
00469 int numEq= 0;
00470
00471 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00472 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00473 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00474 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00475 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00476 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00477 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00478 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00479 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00480 if(numEq > c->ppMode.flatnessThreshold){
00481 int min, max, x;
00482
00483 if(src[0] > src[step]){
00484 max= src[0];
00485 min= src[step];
00486 }else{
00487 max= src[step];
00488 min= src[0];
00489 }
00490 for(x=2; x<8; x+=2){
00491 if(src[x*step] > src[(x+1)*step]){
00492 if(src[x *step] > max) max= src[ x *step];
00493 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00494 }else{
00495 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00496 if(src[ x *step] < min) min= src[ x *step];
00497 }
00498 }
00499 if(max-min < 2*QP){
00500 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00501 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00502
00503 int sums[10];
00504 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00505 sums[1] = sums[0] - first + src[3*step];
00506 sums[2] = sums[1] - first + src[4*step];
00507 sums[3] = sums[2] - first + src[5*step];
00508 sums[4] = sums[3] - first + src[6*step];
00509 sums[5] = sums[4] - src[0*step] + src[7*step];
00510 sums[6] = sums[5] - src[1*step] + last;
00511 sums[7] = sums[6] - src[2*step] + last;
00512 sums[8] = sums[7] - src[3*step] + last;
00513 sums[9] = sums[8] - src[4*step] + last;
00514
00515 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00516 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00517 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00518 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00519 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00520 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00521 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00522 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00523 }
00524 }else{
00525 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00526
00527 if(FFABS(middleEnergy) < 8*QP)
00528 {
00529 const int q=(src[3*step] - src[4*step])/2;
00530 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00531 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00532
00533 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00534 d= FFMAX(d, 0);
00535
00536 d= (5*d + 32) >> 6;
00537 d*= FFSIGN(-middleEnergy);
00538
00539 if(q>0)
00540 {
00541 d= d<0 ? 0 : d;
00542 d= d>q ? q : d;
00543 }
00544 else
00545 {
00546 d= d>0 ? 0 : d;
00547 d= d<q ? q : d;
00548 }
00549
00550 src[3*step]-= d;
00551 src[4*step]+= d;
00552 }
00553 }
00554
00555 src += stride;
00556 }
00557
00558
00559
00560
00561
00562 }
00563
00564
00565
00566 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
00567 #define COMPILE_C
00568 #endif
00569
00570 #ifdef ARCH_POWERPC
00571 #ifdef HAVE_ALTIVEC
00572 #define COMPILE_ALTIVEC
00573 #endif //HAVE_ALTIVEC
00574 #endif //ARCH_POWERPC
00575
00576 #if defined(ARCH_X86)
00577
00578 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00579 #define COMPILE_MMX
00580 #endif
00581
00582 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
00583 #define COMPILE_MMX2
00584 #endif
00585
00586 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00587 #define COMPILE_3DNOW
00588 #endif
00589 #endif
00590
00591 #undef HAVE_MMX
00592 #undef HAVE_MMX2
00593 #undef HAVE_3DNOW
00594 #undef HAVE_ALTIVEC
00595
00596 #ifdef COMPILE_C
00597 #undef HAVE_MMX
00598 #undef HAVE_MMX2
00599 #undef HAVE_3DNOW
00600 #define RENAME(a) a ## _C
00601 #include "postprocess_template.c"
00602 #endif
00603
00604 #ifdef ARCH_POWERPC
00605 #ifdef COMPILE_ALTIVEC
00606 #undef RENAME
00607 #define HAVE_ALTIVEC
00608 #define RENAME(a) a ## _altivec
00609 #include "postprocess_altivec_template.c"
00610 #include "postprocess_template.c"
00611 #endif
00612 #endif //ARCH_POWERPC
00613
00614
00615 #ifdef COMPILE_MMX
00616 #undef RENAME
00617 #define HAVE_MMX
00618 #undef HAVE_MMX2
00619 #undef HAVE_3DNOW
00620 #define RENAME(a) a ## _MMX
00621 #include "postprocess_template.c"
00622 #endif
00623
00624
00625 #ifdef COMPILE_MMX2
00626 #undef RENAME
00627 #define HAVE_MMX
00628 #define HAVE_MMX2
00629 #undef HAVE_3DNOW
00630 #define RENAME(a) a ## _MMX2
00631 #include "postprocess_template.c"
00632 #endif
00633
00634
00635 #ifdef COMPILE_3DNOW
00636 #undef RENAME
00637 #define HAVE_MMX
00638 #undef HAVE_MMX2
00639 #define HAVE_3DNOW
00640 #define RENAME(a) a ## _3DNow
00641 #include "postprocess_template.c"
00642 #endif
00643
00644
00645
00646 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00647 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
00648 {
00649 PPContext *c= (PPContext *)vc;
00650 PPMode *ppMode= (PPMode *)vm;
00651 c->ppMode= *ppMode;
00652
00653
00654
00655
00656 #ifdef RUNTIME_CPUDETECT
00657 #if defined(ARCH_X86)
00658
00659 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00660 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00661 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00662 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00663 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00664 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00665 else
00666 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00667 #else
00668 #ifdef ARCH_POWERPC
00669 #ifdef HAVE_ALTIVEC
00670 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00671 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00672 else
00673 #endif
00674 #endif
00675 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00676 #endif
00677 #else //RUNTIME_CPUDETECT
00678 #ifdef HAVE_MMX2
00679 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00680 #elif defined (HAVE_3DNOW)
00681 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00682 #elif defined (HAVE_MMX)
00683 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00684 #elif defined (HAVE_ALTIVEC)
00685 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00686 #else
00687 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00688 #endif
00689 #endif
00690 }
00691
00692
00693
00694
00695
00696
00697 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00698 const char *const pp_help=
00699 #else
00700 const char pp_help[] =
00701 #endif
00702 "Available postprocessing filters:\n"
00703 "Filters Options\n"
00704 "short long name short long option Description\n"
00705 "* * a autoq CPU power dependent enabler\n"
00706 " c chrom chrominance filtering enabled\n"
00707 " y nochrom chrominance filtering disabled\n"
00708 " n noluma luma filtering disabled\n"
00709 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00710 " 1. difference factor: default=32, higher -> more deblocking\n"
00711 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00712 " the h & v deblocking filters share these\n"
00713 " so you can't set different thresholds for h / v\n"
00714 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00715 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00716 "va vadeblock (2 threshold) vertical deblocking filter\n"
00717 "h1 x1hdeblock experimental h deblock filter 1\n"
00718 "v1 x1vdeblock experimental v deblock filter 1\n"
00719 "dr dering deringing filter\n"
00720 "al autolevels automatic brightness / contrast\n"
00721 " f fullyrange stretch luminance to (0..255)\n"
00722 "lb linblenddeint linear blend deinterlacer\n"
00723 "li linipoldeint linear interpolating deinterlace\n"
00724 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00725 "md mediandeint median deinterlacer\n"
00726 "fd ffmpegdeint ffmpeg deinterlacer\n"
00727 "l5 lowpass5 FIR lowpass deinterlacer\n"
00728 "de default hb:a,vb:a,dr:a\n"
00729 "fa fast h1:a,v1:a,dr:a\n"
00730 "ac ha:a:128:7,va:a,dr:a\n"
00731 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00732 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00733 "fq forceQuant <quantizer> force quantizer\n"
00734 "Usage:\n"
00735 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00736 "long form example:\n"
00737 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00738 "short form example:\n"
00739 "vb:a/hb:a/lb de,-vb\n"
00740 "more examples:\n"
00741 "tn:64:128:256\n"
00742 "\n"
00743 ;
00744
00745 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
00746 {
00747 char temp[GET_MODE_BUFFER_SIZE];
00748 char *p= temp;
00749 static const char filterDelimiters[] = ",/";
00750 static const char optionDelimiters[] = ":";
00751 struct PPMode *ppMode;
00752 char *filterToken;
00753
00754 ppMode= av_malloc(sizeof(PPMode));
00755
00756 ppMode->lumMode= 0;
00757 ppMode->chromMode= 0;
00758 ppMode->maxTmpNoise[0]= 700;
00759 ppMode->maxTmpNoise[1]= 1500;
00760 ppMode->maxTmpNoise[2]= 3000;
00761 ppMode->maxAllowedY= 234;
00762 ppMode->minAllowedY= 16;
00763 ppMode->baseDcDiff= 256/8;
00764 ppMode->flatnessThreshold= 56-16-1;
00765 ppMode->maxClippedThreshold= 0.01;
00766 ppMode->error=0;
00767
00768 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
00769
00770 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00771
00772 for(;;){
00773 char *filterName;
00774 int q= 1000000;
00775 int chrom=-1;
00776 int luma=-1;
00777 char *option;
00778 char *options[OPTIONS_ARRAY_SIZE];
00779 int i;
00780 int filterNameOk=0;
00781 int numOfUnknownOptions=0;
00782 int enable=1;
00783
00784 filterToken= strtok(p, filterDelimiters);
00785 if(filterToken == NULL) break;
00786 p+= strlen(filterToken) + 1;
00787 filterName= strtok(filterToken, optionDelimiters);
00788 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00789
00790 if(*filterName == '-')
00791 {
00792 enable=0;
00793 filterName++;
00794 }
00795
00796 for(;;){
00797 option= strtok(NULL, optionDelimiters);
00798 if(option == NULL) break;
00799
00800 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00801 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00802 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00803 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00804 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00805 else
00806 {
00807 options[numOfUnknownOptions] = option;
00808 numOfUnknownOptions++;
00809 }
00810 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00811 }
00812 options[numOfUnknownOptions] = NULL;
00813
00814
00815 for(i=0; replaceTable[2*i]!=NULL; i++)
00816 {
00817 if(!strcmp(replaceTable[2*i], filterName))
00818 {
00819 int newlen= strlen(replaceTable[2*i + 1]);
00820 int plen;
00821 int spaceLeft;
00822
00823 if(p==NULL) p= temp, *p=0;
00824 else p--, *p=',';
00825
00826 plen= strlen(p);
00827 spaceLeft= p - temp + plen;
00828 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
00829 {
00830 ppMode->error++;
00831 break;
00832 }
00833 memmove(p + newlen, p, plen+1);
00834 memcpy(p, replaceTable[2*i + 1], newlen);
00835 filterNameOk=1;
00836 }
00837 }
00838
00839 for(i=0; filters[i].shortName!=NULL; i++)
00840 {
00841 if( !strcmp(filters[i].longName, filterName)
00842 || !strcmp(filters[i].shortName, filterName))
00843 {
00844 ppMode->lumMode &= ~filters[i].mask;
00845 ppMode->chromMode &= ~filters[i].mask;
00846
00847 filterNameOk=1;
00848 if(!enable) break;
00849
00850 if(q >= filters[i].minLumQuality && luma)
00851 ppMode->lumMode|= filters[i].mask;
00852 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00853 if(q >= filters[i].minChromQuality)
00854 ppMode->chromMode|= filters[i].mask;
00855
00856 if(filters[i].mask == LEVEL_FIX)
00857 {
00858 int o;
00859 ppMode->minAllowedY= 16;
00860 ppMode->maxAllowedY= 234;
00861 for(o=0; options[o]!=NULL; o++)
00862 {
00863 if( !strcmp(options[o],"fullyrange")
00864 ||!strcmp(options[o],"f"))
00865 {
00866 ppMode->minAllowedY= 0;
00867 ppMode->maxAllowedY= 255;
00868 numOfUnknownOptions--;
00869 }
00870 }
00871 }
00872 else if(filters[i].mask == TEMP_NOISE_FILTER)
00873 {
00874 int o;
00875 int numOfNoises=0;
00876
00877 for(o=0; options[o]!=NULL; o++)
00878 {
00879 char *tail;
00880 ppMode->maxTmpNoise[numOfNoises]=
00881 strtol(options[o], &tail, 0);
00882 if(tail!=options[o])
00883 {
00884 numOfNoises++;
00885 numOfUnknownOptions--;
00886 if(numOfNoises >= 3) break;
00887 }
00888 }
00889 }
00890 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00891 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
00892 {
00893 int o;
00894
00895 for(o=0; options[o]!=NULL && o<2; o++)
00896 {
00897 char *tail;
00898 int val= strtol(options[o], &tail, 0);
00899 if(tail==options[o]) break;
00900
00901 numOfUnknownOptions--;
00902 if(o==0) ppMode->baseDcDiff= val;
00903 else ppMode->flatnessThreshold= val;
00904 }
00905 }
00906 else if(filters[i].mask == FORCE_QUANT)
00907 {
00908 int o;
00909 ppMode->forcedQuant= 15;
00910
00911 for(o=0; options[o]!=NULL && o<1; o++)
00912 {
00913 char *tail;
00914 int val= strtol(options[o], &tail, 0);
00915 if(tail==options[o]) break;
00916
00917 numOfUnknownOptions--;
00918 ppMode->forcedQuant= val;
00919 }
00920 }
00921 }
00922 }
00923 if(!filterNameOk) ppMode->error++;
00924 ppMode->error += numOfUnknownOptions;
00925 }
00926
00927 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00928 if(ppMode->error)
00929 {
00930 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00931 av_free(ppMode);
00932 return NULL;
00933 }
00934 return ppMode;
00935 }
00936
00937 void pp_free_mode(pp_mode_t *mode){
00938 av_free(mode);
00939 }
00940
00941 static void reallocAlign(void **p, int alignment, int size){
00942 av_free(*p);
00943 *p= av_mallocz(size);
00944 }
00945
00946 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00947 int mbWidth = (width+15)>>4;
00948 int mbHeight= (height+15)>>4;
00949 int i;
00950
00951 c->stride= stride;
00952 c->qpStride= qpStride;
00953
00954 reallocAlign((void **)&c->tempDst, 8, stride*24);
00955 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00956 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00957 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00958 for(i=0; i<256; i++)
00959 c->yHistogram[i]= width*height/64*15/256;
00960
00961 for(i=0; i<3; i++)
00962 {
00963
00964 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
00965 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00966 }
00967
00968 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00969 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00970 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00971 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00972 }
00973
00974 static const char * context_to_name(void * ptr) {
00975 return "postproc";
00976 }
00977
00978 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00979
00980 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
00981 PPContext *c= av_malloc(sizeof(PPContext));
00982 int stride= (width+15)&(~15);
00983 int qpStride= (width+15)/16 + 2;
00984
00985 memset(c, 0, sizeof(PPContext));
00986 c->av_class = &av_codec_context_class;
00987 c->cpuCaps= cpuCaps;
00988 if(cpuCaps&PP_FORMAT){
00989 c->hChromaSubSample= cpuCaps&0x3;
00990 c->vChromaSubSample= (cpuCaps>>4)&0x3;
00991 }else{
00992 c->hChromaSubSample= 1;
00993 c->vChromaSubSample= 1;
00994 }
00995
00996 reallocBuffers(c, width, height, stride, qpStride);
00997
00998 c->frameNum=-1;
00999
01000 return c;
01001 }
01002
01003 void pp_free_context(void *vc){
01004 PPContext *c = (PPContext*)vc;
01005 int i;
01006
01007 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
01008 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
01009
01010 av_free(c->tempBlocks);
01011 av_free(c->yHistogram);
01012 av_free(c->tempDst);
01013 av_free(c->tempSrc);
01014 av_free(c->deintTemp);
01015 av_free(c->stdQPTable);
01016 av_free(c->nonBQPTable);
01017 av_free(c->forcedQPTable);
01018
01019 memset(c, 0, sizeof(PPContext));
01020
01021 av_free(c);
01022 }
01023
01024 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
01025 uint8_t * dst[3], const int dstStride[3],
01026 int width, int height,
01027 const QP_STORE_T *QP_store, int QPStride,
01028 pp_mode_t *vm, void *vc, int pict_type)
01029 {
01030 int mbWidth = (width+15)>>4;
01031 int mbHeight= (height+15)>>4;
01032 PPMode *mode = (PPMode*)vm;
01033 PPContext *c = (PPContext*)vc;
01034 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01035 int absQPStride = FFABS(QPStride);
01036
01037
01038 if(c->stride < minStride || c->qpStride < absQPStride)
01039 reallocBuffers(c, width, height,
01040 FFMAX(minStride, c->stride),
01041 FFMAX(c->qpStride, absQPStride));
01042
01043 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
01044 {
01045 int i;
01046 QP_store= c->forcedQPTable;
01047 absQPStride = QPStride = 0;
01048 if(mode->lumMode & FORCE_QUANT)
01049 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01050 else
01051 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01052 }
01053
01054 if(pict_type & PP_PICT_TYPE_QP2){
01055 int i;
01056 const int count= mbHeight * absQPStride;
01057 for(i=0; i<(count>>2); i++){
01058 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01059 }
01060 for(i<<=2; i<count; i++){
01061 c->stdQPTable[i] = QP_store[i]>>1;
01062 }
01063 QP_store= c->stdQPTable;
01064 QPStride= absQPStride;
01065 }
01066
01067 if(0){
01068 int x,y;
01069 for(y=0; y<mbHeight; y++){
01070 for(x=0; x<mbWidth; x++){
01071 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01072 }
01073 av_log(c, AV_LOG_INFO, "\n");
01074 }
01075 av_log(c, AV_LOG_INFO, "\n");
01076 }
01077
01078 if((pict_type&7)!=3)
01079 {
01080 if (QPStride >= 0) {
01081 int i;
01082 const int count= mbHeight * QPStride;
01083 for(i=0; i<(count>>2); i++){
01084 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01085 }
01086 for(i<<=2; i<count; i++){
01087 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01088 }
01089 } else {
01090 int i,j;
01091 for(i=0; i<mbHeight; i++) {
01092 for(j=0; j<absQPStride; j++) {
01093 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01094 }
01095 }
01096 }
01097 }
01098
01099 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01100 mode->lumMode, mode->chromMode);
01101
01102 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01103 width, height, QP_store, QPStride, 0, mode, c);
01104
01105 width = (width )>>c->hChromaSubSample;
01106 height = (height)>>c->vChromaSubSample;
01107
01108 if(mode->chromMode)
01109 {
01110 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01111 width, height, QP_store, QPStride, 1, mode, c);
01112 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01113 width, height, QP_store, QPStride, 2, mode, c);
01114 }
01115 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
01116 {
01117 linecpy(dst[1], src[1], height, srcStride[1]);
01118 linecpy(dst[2], src[2], height, srcStride[2]);
01119 }
01120 else
01121 {
01122 int y;
01123 for(y=0; y<height; y++)
01124 {
01125 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01126 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01127 }
01128 }
01129 }
01130