00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083 #include <stdio.h>
00084 #include <stdlib.h>
00085 #include <string.h>
00086 #include <inttypes.h>
00087 #include <assert.h>
00088 #include "config.h"
00089 #ifdef HAVE_MALLOC_H
00090 #include <malloc.h>
00091 #endif
00092 #include "rgb2rgb.h"
00093 #include "swscale.h"
00094 #include "swscale_internal.h"
00095
00096 #undef PROFILE_THE_BEAST
00097 #undef INC_SCALING
00098
00099 typedef unsigned char ubyte;
00100 typedef signed char sbyte;
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139 static
00140 const vector unsigned char
00141 perm_rgb_0 = (const vector unsigned char)AVV(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00142 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a),
00143 perm_rgb_1 = (const vector unsigned char)AVV(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00144 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f),
00145 perm_rgb_2 = (const vector unsigned char)AVV(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00146 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05),
00147 perm_rgb_3 = (const vector unsigned char)AVV(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00148 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f);
00149
00150 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
00151 do { \
00152 typeof(x0) o0,o2,o3; \
00153 o0 = vec_mergeh (x0,x1); \
00154 y0 = vec_perm (o0, x2, perm_rgb_0); \
00155 o2 = vec_perm (o0, x2, perm_rgb_1); \
00156 o3 = vec_mergel (x0,x1); \
00157 y1 = vec_perm (o3,o2,perm_rgb_2); \
00158 y2 = vec_perm (o3,o2,perm_rgb_3); \
00159 } while(0)
00160
00161 #define vec_mstbgr24(x0,x1,x2,ptr) \
00162 do { \
00163 typeof(x0) _0,_1,_2; \
00164 vec_merge3 (x0,x1,x2,_0,_1,_2); \
00165 vec_st (_0, 0, ptr++); \
00166 vec_st (_1, 0, ptr++); \
00167 vec_st (_2, 0, ptr++); \
00168 } while (0);
00169
00170 #define vec_mstrgb24(x0,x1,x2,ptr) \
00171 do { \
00172 typeof(x0) _0,_1,_2; \
00173 vec_merge3 (x2,x1,x0,_0,_1,_2); \
00174 vec_st (_0, 0, ptr++); \
00175 vec_st (_1, 0, ptr++); \
00176 vec_st (_2, 0, ptr++); \
00177 } while (0);
00178
00179
00180
00181
00182
00183 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
00184 do { \
00185 T _0,_1,_2,_3; \
00186 _0 = vec_mergeh (x0,x1); \
00187 _1 = vec_mergeh (x2,x3); \
00188 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00189 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00190 vec_st (_2, 0*16, (T *)ptr); \
00191 vec_st (_3, 1*16, (T *)ptr); \
00192 _0 = vec_mergel (x0,x1); \
00193 _1 = vec_mergel (x2,x3); \
00194 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00195 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00196 vec_st (_2, 2*16, (T *)ptr); \
00197 vec_st (_3, 3*16, (T *)ptr); \
00198 ptr += 4; \
00199 } while (0);
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218 #define vec_unh(x) \
00219 (vector signed short) \
00220 vec_perm(x,(typeof(x))AVV(0),\
00221 (vector unsigned char)AVV(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00222 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07))
00223 #define vec_unl(x) \
00224 (vector signed short) \
00225 vec_perm(x,(typeof(x))AVV(0),\
00226 (vector unsigned char)AVV(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00227 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F))
00228
00229 #define vec_clip_s16(x) \
00230 vec_max (vec_min (x, (vector signed short)AVV(235,235,235,235,235,235,235,235)),\
00231 (vector signed short)AVV( 16, 16, 16, 16, 16, 16, 16, 16))
00232
00233 #define vec_packclp(x,y) \
00234 (vector unsigned char)vec_packs \
00235 ((vector unsigned short)vec_max (x,(vector signed short) AVV(0)), \
00236 (vector unsigned short)vec_max (y,(vector signed short) AVV(0)))
00237
00238
00239
00240
00241 static inline void cvtyuvtoRGB (SwsContext *c,
00242 vector signed short Y, vector signed short U, vector signed short V,
00243 vector signed short *R, vector signed short *G, vector signed short *B)
00244 {
00245 vector signed short vx,ux,uvx;
00246
00247 Y = vec_mradds (Y, c->CY, c->OY);
00248 U = vec_sub (U,(vector signed short)
00249 vec_splat((vector signed short)AVV(128),0));
00250 V = vec_sub (V,(vector signed short)
00251 vec_splat((vector signed short)AVV(128),0));
00252
00253
00254 ux = vec_sl (U, c->CSHIFT);
00255 *B = vec_mradds (ux, c->CBU, Y);
00256
00257
00258 vx = vec_sl (V, c->CSHIFT);
00259 *R = vec_mradds (vx, c->CRV, Y);
00260
00261
00262 uvx = vec_mradds (U, c->CGU, Y);
00263 *G = vec_mradds (V, c->CGV, uvx);
00264 }
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274 #define DEFCSP420_CVT(name,out_pixels) \
00275 static int altivec_##name (SwsContext *c, \
00276 unsigned char **in, int *instrides, \
00277 int srcSliceY, int srcSliceH, \
00278 unsigned char **oplanes, int *outstrides) \
00279 { \
00280 int w = c->srcW; \
00281 int h = srcSliceH; \
00282 int i,j; \
00283 int instrides_scl[3]; \
00284 vector unsigned char y0,y1; \
00285 \
00286 vector signed char u,v; \
00287 \
00288 vector signed short Y0,Y1,Y2,Y3; \
00289 vector signed short U,V; \
00290 vector signed short vx,ux,uvx; \
00291 vector signed short vx0,ux0,uvx0; \
00292 vector signed short vx1,ux1,uvx1; \
00293 vector signed short R0,G0,B0; \
00294 vector signed short R1,G1,B1; \
00295 vector unsigned char R,G,B; \
00296 \
00297 vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00298 vector unsigned char align_perm; \
00299 \
00300 vector signed short \
00301 lCY = c->CY, \
00302 lOY = c->OY, \
00303 lCRV = c->CRV, \
00304 lCBU = c->CBU, \
00305 lCGU = c->CGU, \
00306 lCGV = c->CGV; \
00307 \
00308 vector unsigned short lCSHIFT = c->CSHIFT; \
00309 \
00310 ubyte *y1i = in[0]; \
00311 ubyte *y2i = in[0]+instrides[0]; \
00312 ubyte *ui = in[1]; \
00313 ubyte *vi = in[2]; \
00314 \
00315 vector unsigned char *oute \
00316 = (vector unsigned char *) \
00317 (oplanes[0]+srcSliceY*outstrides[0]); \
00318 vector unsigned char *outo \
00319 = (vector unsigned char *) \
00320 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
00321 \
00322 \
00323 instrides_scl[0] = instrides[0]*2-w; \
00324 instrides_scl[1] = instrides[1]-w/2; \
00325 instrides_scl[2] = instrides[2]-w/2; \
00326 \
00327 \
00328 for (i=0;i<h/2;i++) { \
00329 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
00330 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
00331 \
00332 for (j=0;j<w/16;j++) { \
00333 \
00334 y1ivP = (vector unsigned char *)y1i; \
00335 y2ivP = (vector unsigned char *)y2i; \
00336 uivP = (vector unsigned char *)ui; \
00337 vivP = (vector unsigned char *)vi; \
00338 \
00339 align_perm = vec_lvsl (0, y1i); \
00340 y0 = (vector unsigned char) \
00341 vec_perm (y1ivP[0], y1ivP[1], align_perm); \
00342 \
00343 align_perm = vec_lvsl (0, y2i); \
00344 y1 = (vector unsigned char) \
00345 vec_perm (y2ivP[0], y2ivP[1], align_perm); \
00346 \
00347 align_perm = vec_lvsl (0, ui); \
00348 u = (vector signed char) \
00349 vec_perm (uivP[0], uivP[1], align_perm); \
00350 \
00351 align_perm = vec_lvsl (0, vi); \
00352 v = (vector signed char) \
00353 vec_perm (vivP[0], vivP[1], align_perm); \
00354 \
00355 u = (vector signed char) \
00356 vec_sub (u,(vector signed char) \
00357 vec_splat((vector signed char)AVV(128),0)); \
00358 v = (vector signed char) \
00359 vec_sub (v,(vector signed char) \
00360 vec_splat((vector signed char)AVV(128),0)); \
00361 \
00362 U = vec_unpackh (u); \
00363 V = vec_unpackh (v); \
00364 \
00365 \
00366 Y0 = vec_unh (y0); \
00367 Y1 = vec_unl (y0); \
00368 Y2 = vec_unh (y1); \
00369 Y3 = vec_unl (y1); \
00370 \
00371 Y0 = vec_mradds (Y0, lCY, lOY); \
00372 Y1 = vec_mradds (Y1, lCY, lOY); \
00373 Y2 = vec_mradds (Y2, lCY, lOY); \
00374 Y3 = vec_mradds (Y3, lCY, lOY); \
00375 \
00376 \
00377 ux = vec_sl (U, lCSHIFT); \
00378 ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); \
00379 ux0 = vec_mergeh (ux,ux); \
00380 ux1 = vec_mergel (ux,ux); \
00381 \
00382 \
00383 vx = vec_sl (V, lCSHIFT); \
00384 vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); \
00385 vx0 = vec_mergeh (vx,vx); \
00386 vx1 = vec_mergel (vx,vx); \
00387 \
00388 \
00389 uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); \
00390 uvx = vec_mradds (V, lCGV, uvx); \
00391 uvx0 = vec_mergeh (uvx,uvx); \
00392 uvx1 = vec_mergel (uvx,uvx); \
00393 \
00394 R0 = vec_add (Y0,vx0); \
00395 G0 = vec_add (Y0,uvx0); \
00396 B0 = vec_add (Y0,ux0); \
00397 R1 = vec_add (Y1,vx1); \
00398 G1 = vec_add (Y1,uvx1); \
00399 B1 = vec_add (Y1,ux1); \
00400 \
00401 R = vec_packclp (R0,R1); \
00402 G = vec_packclp (G0,G1); \
00403 B = vec_packclp (B0,B1); \
00404 \
00405 out_pixels(R,G,B,oute); \
00406 \
00407 R0 = vec_add (Y2,vx0); \
00408 G0 = vec_add (Y2,uvx0); \
00409 B0 = vec_add (Y2,ux0); \
00410 R1 = vec_add (Y3,vx1); \
00411 G1 = vec_add (Y3,uvx1); \
00412 B1 = vec_add (Y3,ux1); \
00413 R = vec_packclp (R0,R1); \
00414 G = vec_packclp (G0,G1); \
00415 B = vec_packclp (B0,B1); \
00416 \
00417 \
00418 out_pixels(R,G,B,outo); \
00419 \
00420 y1i += 16; \
00421 y2i += 16; \
00422 ui += 8; \
00423 vi += 8; \
00424 \
00425 } \
00426 \
00427 outo += (outstrides[0])>>4; \
00428 oute += (outstrides[0])>>4; \
00429 \
00430 ui += instrides_scl[1]; \
00431 vi += instrides_scl[2]; \
00432 y1i += instrides_scl[0]; \
00433 y2i += instrides_scl[0]; \
00434 } \
00435 return srcSliceH; \
00436 }
00437
00438
00439 #define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),c,b,a,ptr)
00440 #define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))AVV(0)),ptr)
00441 #define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))AVV(0)),ptr)
00442 #define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,b,c,ptr)
00443 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00444 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
00445
00446 DEFCSP420_CVT (yuv2_abgr, out_abgr)
00447 #if 1
00448 DEFCSP420_CVT (yuv2_bgra, out_bgra)
00449 #else
00450 static int altivec_yuv2_bgra32 (SwsContext *c,
00451 unsigned char **in, int *instrides,
00452 int srcSliceY, int srcSliceH,
00453 unsigned char **oplanes, int *outstrides)
00454 {
00455 int w = c->srcW;
00456 int h = srcSliceH;
00457 int i,j;
00458 int instrides_scl[3];
00459 vector unsigned char y0,y1;
00460
00461 vector signed char u,v;
00462
00463 vector signed short Y0,Y1,Y2,Y3;
00464 vector signed short U,V;
00465 vector signed short vx,ux,uvx;
00466 vector signed short vx0,ux0,uvx0;
00467 vector signed short vx1,ux1,uvx1;
00468 vector signed short R0,G0,B0;
00469 vector signed short R1,G1,B1;
00470 vector unsigned char R,G,B;
00471
00472 vector unsigned char *uivP, *vivP;
00473 vector unsigned char align_perm;
00474
00475 vector signed short
00476 lCY = c->CY,
00477 lOY = c->OY,
00478 lCRV = c->CRV,
00479 lCBU = c->CBU,
00480 lCGU = c->CGU,
00481 lCGV = c->CGV;
00482
00483 vector unsigned short lCSHIFT = c->CSHIFT;
00484
00485 ubyte *y1i = in[0];
00486 ubyte *y2i = in[0]+w;
00487 ubyte *ui = in[1];
00488 ubyte *vi = in[2];
00489
00490 vector unsigned char *oute
00491 = (vector unsigned char *)
00492 (oplanes[0]+srcSliceY*outstrides[0]);
00493 vector unsigned char *outo
00494 = (vector unsigned char *)
00495 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
00496
00497
00498 instrides_scl[0] = instrides[0];
00499 instrides_scl[1] = instrides[1]-w/2;
00500 instrides_scl[2] = instrides[2]-w/2;
00501
00502
00503 for (i=0;i<h/2;i++) {
00504 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
00505 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
00506
00507 for (j=0;j<w/16;j++) {
00508
00509 y0 = vec_ldl (0,y1i);
00510 y1 = vec_ldl (0,y2i);
00511 uivP = (vector unsigned char *)ui;
00512 vivP = (vector unsigned char *)vi;
00513
00514 align_perm = vec_lvsl (0, ui);
00515 u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
00516
00517 align_perm = vec_lvsl (0, vi);
00518 v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
00519 u = (vector signed char)
00520 vec_sub (u,(vector signed char)
00521 vec_splat((vector signed char)AVV(128),0));
00522
00523 v = (vector signed char)
00524 vec_sub (v, (vector signed char)
00525 vec_splat((vector signed char)AVV(128),0));
00526
00527 U = vec_unpackh (u);
00528 V = vec_unpackh (v);
00529
00530
00531 Y0 = vec_unh (y0);
00532 Y1 = vec_unl (y0);
00533 Y2 = vec_unh (y1);
00534 Y3 = vec_unl (y1);
00535
00536 Y0 = vec_mradds (Y0, lCY, lOY);
00537 Y1 = vec_mradds (Y1, lCY, lOY);
00538 Y2 = vec_mradds (Y2, lCY, lOY);
00539 Y3 = vec_mradds (Y3, lCY, lOY);
00540
00541
00542 ux = vec_sl (U, lCSHIFT);
00543 ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0));
00544 ux0 = vec_mergeh (ux,ux);
00545 ux1 = vec_mergel (ux,ux);
00546
00547
00548 vx = vec_sl (V, lCSHIFT);
00549 vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0));
00550 vx0 = vec_mergeh (vx,vx);
00551 vx1 = vec_mergel (vx,vx);
00552
00553 uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0));
00554 uvx = vec_mradds (V, lCGV, uvx);
00555 uvx0 = vec_mergeh (uvx,uvx);
00556 uvx1 = vec_mergel (uvx,uvx);
00557 R0 = vec_add (Y0,vx0);
00558 G0 = vec_add (Y0,uvx0);
00559 B0 = vec_add (Y0,ux0);
00560 R1 = vec_add (Y1,vx1);
00561 G1 = vec_add (Y1,uvx1);
00562 B1 = vec_add (Y1,ux1);
00563 R = vec_packclp (R0,R1);
00564 G = vec_packclp (G0,G1);
00565 B = vec_packclp (B0,B1);
00566
00567 out_argb(R,G,B,oute);
00568 R0 = vec_add (Y2,vx0);
00569 G0 = vec_add (Y2,uvx0);
00570 B0 = vec_add (Y2,ux0);
00571 R1 = vec_add (Y3,vx1);
00572 G1 = vec_add (Y3,uvx1);
00573 B1 = vec_add (Y3,ux1);
00574 R = vec_packclp (R0,R1);
00575 G = vec_packclp (G0,G1);
00576 B = vec_packclp (B0,B1);
00577
00578 out_argb(R,G,B,outo);
00579 y1i += 16;
00580 y2i += 16;
00581 ui += 8;
00582 vi += 8;
00583
00584 }
00585
00586 outo += (outstrides[0])>>4;
00587 oute += (outstrides[0])>>4;
00588
00589 ui += instrides_scl[1];
00590 vi += instrides_scl[2];
00591 y1i += instrides_scl[0];
00592 y2i += instrides_scl[0];
00593 }
00594 return srcSliceH;
00595 }
00596
00597 #endif
00598
00599
00600 DEFCSP420_CVT (yuv2_rgba, out_rgba)
00601 DEFCSP420_CVT (yuv2_argb, out_argb)
00602 DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
00603 DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
00604
00605
00606
00607
00608 static
00609 const vector unsigned char
00610 demux_u = (const vector unsigned char)AVV(0x10,0x00,0x10,0x00,
00611 0x10,0x04,0x10,0x04,
00612 0x10,0x08,0x10,0x08,
00613 0x10,0x0c,0x10,0x0c),
00614 demux_v = (const vector unsigned char)AVV(0x10,0x02,0x10,0x02,
00615 0x10,0x06,0x10,0x06,
00616 0x10,0x0A,0x10,0x0A,
00617 0x10,0x0E,0x10,0x0E),
00618 demux_y = (const vector unsigned char)AVV(0x10,0x01,0x10,0x03,
00619 0x10,0x05,0x10,0x07,
00620 0x10,0x09,0x10,0x0B,
00621 0x10,0x0D,0x10,0x0F);
00622
00623
00624
00625
00626 static int altivec_uyvy_rgb32 (SwsContext *c,
00627 unsigned char **in, int *instrides,
00628 int srcSliceY, int srcSliceH,
00629 unsigned char **oplanes, int *outstrides)
00630 {
00631 int w = c->srcW;
00632 int h = srcSliceH;
00633 int i,j;
00634 vector unsigned char uyvy;
00635 vector signed short Y,U,V;
00636 vector signed short R0,G0,B0,R1,G1,B1;
00637 vector unsigned char R,G,B;
00638 vector unsigned char *out;
00639 ubyte *img;
00640
00641 img = in[0];
00642 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00643
00644 for (i=0;i<h;i++) {
00645 for (j=0;j<w/16;j++) {
00646 uyvy = vec_ld (0, img);
00647 U = (vector signed short)
00648 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u);
00649
00650 V = (vector signed short)
00651 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v);
00652
00653 Y = (vector signed short)
00654 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y);
00655
00656 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00657
00658 uyvy = vec_ld (16, img);
00659 U = (vector signed short)
00660 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u);
00661
00662 V = (vector signed short)
00663 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v);
00664
00665 Y = (vector signed short)
00666 vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y);
00667
00668 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00669
00670 R = vec_packclp (R0,R1);
00671 G = vec_packclp (G0,G1);
00672 B = vec_packclp (B0,B1);
00673
00674
00675 out_rgba (R,G,B,out);
00676
00677 img += 32;
00678 }
00679 }
00680 return srcSliceH;
00681 }
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691 SwsFunc yuv2rgb_init_altivec (SwsContext *c)
00692 {
00693 if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
00694 return NULL;
00695
00696
00697
00698
00699
00700
00701
00702
00703 if ((c->srcW & 0xf) != 0) return NULL;
00704
00705 switch (c->srcFormat) {
00706 case PIX_FMT_YUV410P:
00707 case PIX_FMT_YUV420P:
00708
00709 case PIX_FMT_GRAY8:
00710 case PIX_FMT_NV12:
00711 case PIX_FMT_NV21:
00712 if ((c->srcH & 0x1) != 0)
00713 return NULL;
00714
00715 switch(c->dstFormat){
00716 case PIX_FMT_RGB24:
00717 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00718 return altivec_yuv2_rgb24;
00719 case PIX_FMT_BGR24:
00720 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00721 return altivec_yuv2_bgr24;
00722 case PIX_FMT_ARGB:
00723 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00724 return altivec_yuv2_argb;
00725 case PIX_FMT_ABGR:
00726 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00727 return altivec_yuv2_abgr;
00728 case PIX_FMT_RGBA:
00729 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00730 return altivec_yuv2_rgba;
00731 case PIX_FMT_BGRA:
00732 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00733 return altivec_yuv2_bgra;
00734 default: return NULL;
00735 }
00736 break;
00737
00738 case PIX_FMT_UYVY422:
00739 switch(c->dstFormat){
00740 case PIX_FMT_BGR32:
00741 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00742 return altivec_uyvy_rgb32;
00743 default: return NULL;
00744 }
00745 break;
00746
00747 }
00748 return NULL;
00749 }
00750
00751 static uint16_t roundToInt16(int64_t f){
00752 int r= (f + (1<<15))>>16;
00753 if (r<-0x7FFF) return 0x8000;
00754 else if (r> 0x7FFF) return 0x7FFF;
00755 else return r;
00756 }
00757
00758 void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
00759 {
00760 union {
00761 signed short tmp[8] __attribute__ ((aligned(16)));
00762 vector signed short vec;
00763 } buf;
00764
00765 buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9;
00766 buf.tmp[1] = -256*brightness;
00767 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);
00768 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);
00769 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));
00770 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));
00771
00772
00773 c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
00774 c->CY = vec_splat ((vector signed short)buf.vec, 0);
00775 c->OY = vec_splat ((vector signed short)buf.vec, 1);
00776 c->CRV = vec_splat ((vector signed short)buf.vec, 2);
00777 c->CBU = vec_splat ((vector signed short)buf.vec, 3);
00778 c->CGU = vec_splat ((vector signed short)buf.vec, 4);
00779 c->CGV = vec_splat ((vector signed short)buf.vec, 5);
00780 #if 0
00781 {
00782 int i;
00783 char *v[6]={"cy","oy","crv","cbu","cgu","cgv"};
00784 for (i=0; i<6; i++)
00785 printf("%s %d ", v[i],buf.tmp[i] );
00786 printf("\n");
00787 }
00788 #endif
00789 return;
00790 }
00791
00792
00793 void
00794 altivec_yuv2packedX (SwsContext *c,
00795 int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00796 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00797 uint8_t *dest, int dstW, int dstY)
00798 {
00799 int i,j;
00800 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00801 vector signed short R0,G0,B0,R1,G1,B1;
00802
00803 vector unsigned char R,G,B;
00804 vector unsigned char *out,*nout;
00805
00806 vector signed short RND = vec_splat_s16(1<<3);
00807 vector unsigned short SCL = vec_splat_u16(4);
00808 unsigned long scratch[16] __attribute__ ((aligned (16)));
00809
00810 vector signed short *YCoeffs, *CCoeffs;
00811
00812 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
00813 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
00814
00815 out = (vector unsigned char *)dest;
00816
00817 for (i=0; i<dstW; i+=16){
00818 Y0 = RND;
00819 Y1 = RND;
00820
00821 for (j=0; j<lumFilterSize; j++) {
00822 X0 = vec_ld (0, &lumSrc[j][i]);
00823 X1 = vec_ld (16, &lumSrc[j][i]);
00824 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00825 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00826 }
00827
00828 U = RND;
00829 V = RND;
00830
00831 for (j=0; j<chrFilterSize; j++) {
00832 X = vec_ld (0, &chrSrc[j][i/2]);
00833 U = vec_mradds (X, CCoeffs[j], U);
00834 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00835 V = vec_mradds (X, CCoeffs[j], V);
00836 }
00837
00838
00839 Y0 = vec_sra (Y0, SCL);
00840 Y1 = vec_sra (Y1, SCL);
00841 U = vec_sra (U, SCL);
00842 V = vec_sra (V, SCL);
00843
00844 Y0 = vec_clip_s16 (Y0);
00845 Y1 = vec_clip_s16 (Y1);
00846 U = vec_clip_s16 (U);
00847 V = vec_clip_s16 (V);
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858 U0 = vec_mergeh (U,U);
00859 V0 = vec_mergeh (V,V);
00860
00861 U1 = vec_mergel (U,U);
00862 V1 = vec_mergel (V,V);
00863
00864 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00865 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00866
00867 R = vec_packclp (R0,R1);
00868 G = vec_packclp (G0,G1);
00869 B = vec_packclp (B0,B1);
00870
00871 switch(c->dstFormat) {
00872 case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
00873 case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
00874 case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
00875 case PIX_FMT_ARGB: out_argb (R,G,B,out); break;
00876 case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
00877 case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
00878 default:
00879 {
00880
00881
00882 static int printed_error_message;
00883 if (!printed_error_message) {
00884 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00885 sws_format_name(c->dstFormat));
00886 printed_error_message=1;
00887 }
00888 return;
00889 }
00890 }
00891 }
00892
00893 if (i < dstW) {
00894 i -= 16;
00895
00896 Y0 = RND;
00897 Y1 = RND;
00898
00899 for (j=0; j<lumFilterSize; j++) {
00900 X0 = vec_ld (0, &lumSrc[j][i]);
00901 X1 = vec_ld (16, &lumSrc[j][i]);
00902 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00903 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00904 }
00905
00906 U = RND;
00907 V = RND;
00908
00909 for (j=0; j<chrFilterSize; j++) {
00910 X = vec_ld (0, &chrSrc[j][i/2]);
00911 U = vec_mradds (X, CCoeffs[j], U);
00912 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00913 V = vec_mradds (X, CCoeffs[j], V);
00914 }
00915
00916
00917 Y0 = vec_sra (Y0, SCL);
00918 Y1 = vec_sra (Y1, SCL);
00919 U = vec_sra (U, SCL);
00920 V = vec_sra (V, SCL);
00921
00922 Y0 = vec_clip_s16 (Y0);
00923 Y1 = vec_clip_s16 (Y1);
00924 U = vec_clip_s16 (U);
00925 V = vec_clip_s16 (V);
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936 U0 = vec_mergeh (U,U);
00937 V0 = vec_mergeh (V,V);
00938
00939 U1 = vec_mergel (U,U);
00940 V1 = vec_mergel (V,V);
00941
00942 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00943 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00944
00945 R = vec_packclp (R0,R1);
00946 G = vec_packclp (G0,G1);
00947 B = vec_packclp (B0,B1);
00948
00949 nout = (vector unsigned char *)scratch;
00950 switch(c->dstFormat) {
00951 case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
00952 case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
00953 case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
00954 case PIX_FMT_ARGB: out_argb (R,G,B,nout); break;
00955 case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
00956 case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
00957 default:
00958
00959 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00960 sws_format_name(c->dstFormat));
00961 return;
00962 }
00963
00964 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00965 }
00966
00967 }