00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "dsputil.h"
00024 #define c1 1.38703984532214752434
00025 #define c2 1.30656296487637657577
00026 #define c3 1.17587560241935884520
00027 #define c4 1.00000000000000000000
00028 #define c5 0.78569495838710234903
00029 #define c6 0.54119610014619712324
00030 #define c7 0.27589937928294311353
00031
00032 static const float even_table[] __attribute__ ((aligned(8))) = {
00033 c4, c4, c4, c4,
00034 c2, c6,-c6,-c2,
00035 c4,-c4,-c4, c4,
00036 c6,-c2, c2,-c6
00037 };
00038
00039 static const float odd_table[] __attribute__ ((aligned(8))) = {
00040 c1, c3, c5, c7,
00041 c3,-c7,-c1,-c5,
00042 c5,-c1, c7, c3,
00043 c7,-c5, c3,-c1
00044 };
00045
00046 #undef c1
00047 #undef c2
00048 #undef c3
00049 #undef c4
00050 #undef c5
00051 #undef c6
00052 #undef c7
00053
00054 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
00055
00056 #define load_matrix(table) \
00057 __asm__ volatile( \
00058 " fschg\n" \
00059 " fmov @%0+,xd0\n" \
00060 " fmov @%0+,xd2\n" \
00061 " fmov @%0+,xd4\n" \
00062 " fmov @%0+,xd6\n" \
00063 " fmov @%0+,xd8\n" \
00064 " fmov @%0+,xd10\n" \
00065 " fmov @%0+,xd12\n" \
00066 " fmov @%0+,xd14\n" \
00067 " fschg\n" \
00068 :\
00069 : "r"(table)\
00070 : "0" \
00071 )
00072
00073 #define ftrv() \
00074 __asm__ volatile("ftrv xmtrx,fv0" \
00075 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
00076 : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
00077
00078 #define DEFREG \
00079 register float fr0 __asm__("fr0"); \
00080 register float fr1 __asm__("fr1"); \
00081 register float fr2 __asm__("fr2"); \
00082 register float fr3 __asm__("fr3")
00083
00084 #else
00085
00086
00087
00088 static void ftrv_(const float xf[],float fv[])
00089 {
00090 float f0,f1,f2,f3;
00091 f0 = fv[0];
00092 f1 = fv[1];
00093 f2 = fv[2];
00094 f3 = fv[3];
00095 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
00096 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
00097 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
00098 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
00099 }
00100
00101 static void load_matrix_(float xf[],const float table[])
00102 {
00103 int i;
00104 for(i=0;i<16;i++) xf[i]=table[i];
00105 }
00106
00107 #define ftrv() ftrv_(xf,fv)
00108 #define load_matrix(table) load_matrix_(xf,table)
00109
00110 #define DEFREG \
00111 float fv[4],xf[16]
00112
00113 #define fr0 fv[0]
00114 #define fr1 fv[1]
00115 #define fr2 fv[2]
00116 #define fr3 fv[3]
00117
00118 #endif
00119
00120 #if 1
00121 #define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
00122 #else
00123 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
00124 #endif
00125
00126
00127
00128
00129 #if 1
00130
00131
00132 void idct_sh4(DCTELEM *block)
00133 {
00134 DEFREG;
00135
00136 int i;
00137 float tblock[8*8],*fblock;
00138 int ofs1,ofs2,ofs3;
00139
00140 #if defined(__SH4__)
00141 #error "FIXME!! change to single float"
00142 #endif
00143
00144
00145
00146
00147 load_matrix(even_table);
00148
00149 fblock = tblock+4;
00150 i = 8;
00151 do {
00152 fr0 = block[0];
00153 fr1 = block[2];
00154 fr2 = block[4];
00155 fr3 = block[6];
00156 block+=8;
00157 ftrv();
00158 *--fblock = fr3;
00159 *--fblock = fr2;
00160 *--fblock = fr1;
00161 *--fblock = fr0;
00162 fblock+=8+4;
00163 } while(--i);
00164 block-=8*8;
00165 fblock-=8*8+4;
00166
00167 load_matrix(odd_table);
00168
00169 i = 8;
00170
00171
00172
00173
00174
00175 do {
00176 float t0,t1,t2,t3;
00177 fr0 = block[1];
00178 fr1 = block[3];
00179 fr2 = block[5];
00180 fr3 = block[7];
00181 block+=8;
00182 ftrv();
00183 t0 = *fblock++;
00184 t1 = *fblock++;
00185 t2 = *fblock++;
00186 t3 = *fblock++;
00187 fblock+=4;
00188 *--fblock = t0 - fr0;
00189 *--fblock = t1 - fr1;
00190 *--fblock = t2 - fr2;
00191 *--fblock = t3 - fr3;
00192 *--fblock = t3 + fr3;
00193 *--fblock = t2 + fr2;
00194 *--fblock = t1 + fr1;
00195 *--fblock = t0 + fr0;
00196 fblock+=8;
00197 } while(--i);
00198 block-=8*8;
00199 fblock-=8*8;
00200
00201
00202
00203
00204 load_matrix(even_table);
00205
00206 ofs1 = sizeof(float)*2*8;
00207 ofs2 = sizeof(float)*4*8;
00208 ofs3 = sizeof(float)*6*8;
00209
00210 i = 8;
00211
00212 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
00213
00214 do {
00215 fr0 = OA(fblock, 0);
00216 fr1 = OA(fblock,ofs1);
00217 fr2 = OA(fblock,ofs2);
00218 fr3 = OA(fblock,ofs3);
00219 ftrv();
00220 OA(fblock,0 ) = fr0;
00221 OA(fblock,ofs1) = fr1;
00222 OA(fblock,ofs2) = fr2;
00223 OA(fblock,ofs3) = fr3;
00224 fblock++;
00225 } while(--i);
00226 fblock-=8;
00227
00228 load_matrix(odd_table);
00229
00230 i=8;
00231 do {
00232 float t0,t1,t2,t3;
00233 t0 = OA(fblock, 0);
00234 t1 = OA(fblock,ofs1);
00235 t2 = OA(fblock,ofs2);
00236 t3 = OA(fblock,ofs3);
00237 fblock+=8;
00238 fr0 = OA(fblock, 0);
00239 fr1 = OA(fblock,ofs1);
00240 fr2 = OA(fblock,ofs2);
00241 fr3 = OA(fblock,ofs3);
00242 fblock+=-8+1;
00243 ftrv();
00244 block[8*0] = DESCALE(t0 + fr0,3);
00245 block[8*7] = DESCALE(t0 - fr0,3);
00246 block[8*1] = DESCALE(t1 + fr1,3);
00247 block[8*6] = DESCALE(t1 - fr1,3);
00248 block[8*2] = DESCALE(t2 + fr2,3);
00249 block[8*5] = DESCALE(t2 - fr2,3);
00250 block[8*3] = DESCALE(t3 + fr3,3);
00251 block[8*4] = DESCALE(t3 - fr3,3);
00252 block++;
00253 } while(--i);
00254
00255 #if defined(__SH4__)
00256 #error "FIXME!! change to double"
00257 #endif
00258 }
00259 #else
00260 void idct_sh4(DCTELEM *block)
00261 {
00262 DEFREG;
00263
00264 int i;
00265 float tblock[8*8],*fblock;
00266
00267
00268
00269
00270 load_matrix(even_table);
00271
00272 fblock = tblock;
00273 i = 8;
00274 do {
00275 fr0 = block[0];
00276 fr1 = block[2];
00277 fr2 = block[4];
00278 fr3 = block[6];
00279 block+=8;
00280 ftrv();
00281 fblock[0] = fr0;
00282 fblock[2] = fr1;
00283 fblock[4] = fr2;
00284 fblock[6] = fr3;
00285 fblock+=8;
00286 } while(--i);
00287 block-=8*8;
00288 fblock-=8*8;
00289
00290 load_matrix(odd_table);
00291
00292 i = 8;
00293
00294 do {
00295 float t0,t1,t2,t3;
00296 fr0 = block[1];
00297 fr1 = block[3];
00298 fr2 = block[5];
00299 fr3 = block[7];
00300 block+=8;
00301 ftrv();
00302 t0 = fblock[0];
00303 t1 = fblock[2];
00304 t2 = fblock[4];
00305 t3 = fblock[6];
00306 fblock[0] = t0 + fr0;
00307 fblock[7] = t0 - fr0;
00308 fblock[1] = t1 + fr1;
00309 fblock[6] = t1 - fr1;
00310 fblock[2] = t2 + fr2;
00311 fblock[5] = t2 - fr2;
00312 fblock[3] = t3 + fr3;
00313 fblock[4] = t3 - fr3;
00314 fblock+=8;
00315 } while(--i);
00316 block-=8*8;
00317 fblock-=8*8;
00318
00319
00320
00321
00322 load_matrix(even_table);
00323
00324 i = 8;
00325
00326 do {
00327 fr0 = fblock[8*0];
00328 fr1 = fblock[8*2];
00329 fr2 = fblock[8*4];
00330 fr3 = fblock[8*6];
00331 ftrv();
00332 fblock[8*0] = fr0;
00333 fblock[8*2] = fr1;
00334 fblock[8*4] = fr2;
00335 fblock[8*6] = fr3;
00336 fblock++;
00337 } while(--i);
00338 fblock-=8;
00339
00340 load_matrix(odd_table);
00341
00342 i=8;
00343 do {
00344 float t0,t1,t2,t3;
00345 fr0 = fblock[8*1];
00346 fr1 = fblock[8*3];
00347 fr2 = fblock[8*5];
00348 fr3 = fblock[8*7];
00349 ftrv();
00350 t0 = fblock[8*0];
00351 t1 = fblock[8*2];
00352 t2 = fblock[8*4];
00353 t3 = fblock[8*6];
00354 fblock++;
00355 block[8*0] = DESCALE(t0 + fr0,3);
00356 block[8*7] = DESCALE(t0 - fr0,3);
00357 block[8*1] = DESCALE(t1 + fr1,3);
00358 block[8*6] = DESCALE(t1 - fr1,3);
00359 block[8*2] = DESCALE(t2 + fr2,3);
00360 block[8*5] = DESCALE(t2 - fr2,3);
00361 block[8*3] = DESCALE(t3 + fr3,3);
00362 block[8*4] = DESCALE(t3 - fr3,3);
00363 block++;
00364 } while(--i);
00365 }
00366 #endif