00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "dsputil.h"
00023 #ifdef HAVE_IPP
00024 #include "ipp.h"
00025 #endif
00026
00027 extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
00028
00029 extern void j_rev_dct_ARM(DCTELEM *data);
00030 extern void simple_idct_ARM(DCTELEM *data);
00031
00032 extern void simple_idct_armv5te(DCTELEM *data);
00033 extern void simple_idct_put_armv5te(uint8_t *dest, int line_size,
00034 DCTELEM *data);
00035 extern void simple_idct_add_armv5te(uint8_t *dest, int line_size,
00036 DCTELEM *data);
00037
00038 extern void ff_simple_idct_armv6(DCTELEM *data);
00039 extern void ff_simple_idct_put_armv6(uint8_t *dest, int line_size,
00040 DCTELEM *data);
00041 extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size,
00042 DCTELEM *data);
00043
00044
00045 static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
00046 static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
00047
00048 void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00049 void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00050 void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00051 void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00052
00053 void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00054 void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00055 void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00056
00057 void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
00058
00059 CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
00060 CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
00061 CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
00062 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
00063 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
00064 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
00065
00066 static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
00067 {
00068 asm volatile (
00069 "mov r10, #8 \n\t"
00070
00071 "1: \n\t"
00072
00073
00074 "ldr r4, [%1] \n\t"
00075
00076 "ldrsh r5, [%0] \n\t"
00077 "ldrsh r7, [%0, #2] \n\t"
00078 "and r6, r4, #0xFF \n\t"
00079 "and r8, r4, #0xFF00 \n\t"
00080 "add r6, r5, r6 \n\t"
00081 "add r8, r7, r8, lsr #8 \n\t"
00082 "mvn r5, r5 \n\t"
00083 "mvn r7, r7 \n\t"
00084 "tst r6, #0x100 \n\t"
00085 "movne r6, r5, lsr #24 \n\t"
00086 "tst r8, #0x100 \n\t"
00087 "movne r8, r7, lsr #24 \n\t"
00088 "mov r9, r6 \n\t"
00089 "ldrsh r5, [%0, #4] \n\t"
00090 "orr r9, r9, r8, lsl #8 \n\t"
00091
00092
00093 "ldrsh r7, [%0, #6] \n\t"
00094 "and r6, r4, #0xFF0000 \n\t"
00095 "and r8, r4, #0xFF000000 \n\t"
00096 "add r6, r5, r6, lsr #16 \n\t"
00097 "add r8, r7, r8, lsr #24 \n\t"
00098 "mvn r5, r5 \n\t"
00099 "mvn r7, r7 \n\t"
00100 "tst r6, #0x100 \n\t"
00101 "movne r6, r5, lsr #24 \n\t"
00102 "tst r8, #0x100 \n\t"
00103 "movne r8, r7, lsr #24 \n\t"
00104 "orr r9, r9, r6, lsl #16 \n\t"
00105 "ldr r4, [%1, #4] \n\t"
00106 "orr r9, r9, r8, lsl #24 \n\t"
00107
00108 "ldrsh r5, [%0, #8] \n\t"
00109 "str r9, [%1] \n\t"
00110
00111
00112
00113
00114
00115 "ldrsh r7, [%0, #10] \n\t"
00116 "and r6, r4, #0xFF \n\t"
00117 "and r8, r4, #0xFF00 \n\t"
00118 "add r6, r5, r6 \n\t"
00119 "add r8, r7, r8, lsr #8 \n\t"
00120 "mvn r5, r5 \n\t"
00121 "mvn r7, r7 \n\t"
00122 "tst r6, #0x100 \n\t"
00123 "movne r6, r5, lsr #24 \n\t"
00124 "tst r8, #0x100 \n\t"
00125 "movne r8, r7, lsr #24 \n\t"
00126 "mov r9, r6 \n\t"
00127 "ldrsh r5, [%0, #12] \n\t"
00128 "orr r9, r9, r8, lsl #8 \n\t"
00129
00130
00131 "ldrsh r7, [%0, #14] \n\t"
00132 "and r6, r4, #0xFF0000 \n\t"
00133 "and r8, r4, #0xFF000000 \n\t"
00134 "add r6, r5, r6, lsr #16 \n\t"
00135 "add r8, r7, r8, lsr #24 \n\t"
00136 "mvn r5, r5 \n\t"
00137 "mvn r7, r7 \n\t"
00138 "tst r6, #0x100 \n\t"
00139 "movne r6, r5, lsr #24 \n\t"
00140 "tst r8, #0x100 \n\t"
00141 "movne r8, r7, lsr #24 \n\t"
00142 "orr r9, r9, r6, lsl #16 \n\t"
00143 "add %0, %0, #16 \n\t"
00144 "orr r9, r9, r8, lsl #24 \n\t"
00145 "subs r10, r10, #1 \n\t"
00146
00147 "str r9, [%1, #4] \n\t"
00148
00149
00150
00151 "add %1, %1, %2 \n\t"
00152 "bne 1b \n\t"
00153 : "+r"(block),
00154 "+r"(dest)
00155 : "r"(line_size)
00156 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
00157 }
00158
00159
00160
00161 static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
00162 {
00163 j_rev_dct_ARM (block);
00164 ff_put_pixels_clamped(block, dest, line_size);
00165 }
00166 static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
00167 {
00168 j_rev_dct_ARM (block);
00169 ff_add_pixels_clamped(block, dest, line_size);
00170 }
00171 static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
00172 {
00173 simple_idct_ARM (block);
00174 ff_put_pixels_clamped(block, dest, line_size);
00175 }
00176 static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
00177 {
00178 simple_idct_ARM (block);
00179 ff_add_pixels_clamped(block, dest, line_size);
00180 }
00181
00182 #ifdef HAVE_IPP
00183 static void simple_idct_ipp(DCTELEM *block)
00184 {
00185 ippiDCT8x8Inv_Video_16s_C1I(block);
00186 }
00187 static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
00188 {
00189 ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
00190 }
00191
00192 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
00193
00194 static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
00195 {
00196 ippiDCT8x8Inv_Video_16s_C1I(block);
00197 #ifdef HAVE_IWMMXT
00198 add_pixels_clamped_iwmmxt(block, dest, line_size);
00199 #else
00200 add_pixels_clamped_ARM(block, dest, line_size);
00201 #endif
00202 }
00203 #endif
00204
00205 void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
00206 {
00207 int idct_algo= avctx->idct_algo;
00208
00209 ff_put_pixels_clamped = c->put_pixels_clamped;
00210 ff_add_pixels_clamped = c->add_pixels_clamped;
00211
00212 if (avctx->lowres == 0) {
00213 if(idct_algo == FF_IDCT_AUTO){
00214 #if defined(HAVE_IPP)
00215 idct_algo = FF_IDCT_IPP;
00216 #elif defined(HAVE_ARMV6)
00217 idct_algo = FF_IDCT_SIMPLEARMV6;
00218 #elif defined(HAVE_ARMV5TE)
00219 idct_algo = FF_IDCT_SIMPLEARMV5TE;
00220 #else
00221 idct_algo = FF_IDCT_ARM;
00222 #endif
00223 }
00224
00225 if(idct_algo==FF_IDCT_ARM){
00226 c->idct_put= j_rev_dct_ARM_put;
00227 c->idct_add= j_rev_dct_ARM_add;
00228 c->idct = j_rev_dct_ARM;
00229 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
00230 } else if (idct_algo==FF_IDCT_SIMPLEARM){
00231 c->idct_put= simple_idct_ARM_put;
00232 c->idct_add= simple_idct_ARM_add;
00233 c->idct = simple_idct_ARM;
00234 c->idct_permutation_type= FF_NO_IDCT_PERM;
00235 #ifdef HAVE_ARMV6
00236 } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
00237 c->idct_put= ff_simple_idct_put_armv6;
00238 c->idct_add= ff_simple_idct_add_armv6;
00239 c->idct = ff_simple_idct_armv6;
00240 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
00241 #endif
00242 #ifdef HAVE_ARMV5TE
00243 } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
00244 c->idct_put= simple_idct_put_armv5te;
00245 c->idct_add= simple_idct_add_armv5te;
00246 c->idct = simple_idct_armv5te;
00247 c->idct_permutation_type = FF_NO_IDCT_PERM;
00248 #endif
00249 #ifdef HAVE_IPP
00250 } else if (idct_algo==FF_IDCT_IPP){
00251 c->idct_put= simple_idct_ipp_put;
00252 c->idct_add= simple_idct_ipp_add;
00253 c->idct = simple_idct_ipp;
00254 c->idct_permutation_type= FF_NO_IDCT_PERM;
00255 #endif
00256 }
00257 }
00258
00259 c->put_pixels_tab[0][0] = put_pixels16_arm;
00260 c->put_pixels_tab[0][1] = put_pixels16_x2_arm;
00261 c->put_pixels_tab[0][2] = put_pixels16_y2_arm;
00262 c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
00263 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
00264 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm;
00265 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm;
00266 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
00267 c->put_pixels_tab[1][0] = put_pixels8_arm;
00268 c->put_pixels_tab[1][1] = put_pixels8_x2_arm;
00269 c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
00270 c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
00271 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;
00272 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm;
00273 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm;
00274 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
00275
00276 #ifdef HAVE_IWMMXT
00277 dsputil_init_iwmmxt(c, avctx);
00278 #endif
00279 }