00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00033 {
00034 MOVQ_BFE(mm6);
00035 __asm __volatile(
00036 "lea (%3, %3), %%"REG_a" \n\t"
00037 ASMALIGN(3)
00038 "1: \n\t"
00039 "movq (%1), %%mm0 \n\t"
00040 "movq 1(%1), %%mm1 \n\t"
00041 "movq (%1, %3), %%mm2 \n\t"
00042 "movq 1(%1, %3), %%mm3 \n\t"
00043 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00044 "movq %%mm4, (%2) \n\t"
00045 "movq %%mm5, (%2, %3) \n\t"
00046 "add %%"REG_a", %1 \n\t"
00047 "add %%"REG_a", %2 \n\t"
00048 "movq (%1), %%mm0 \n\t"
00049 "movq 1(%1), %%mm1 \n\t"
00050 "movq (%1, %3), %%mm2 \n\t"
00051 "movq 1(%1, %3), %%mm3 \n\t"
00052 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00053 "movq %%mm4, (%2) \n\t"
00054 "movq %%mm5, (%2, %3) \n\t"
00055 "add %%"REG_a", %1 \n\t"
00056 "add %%"REG_a", %2 \n\t"
00057 "subl $4, %0 \n\t"
00058 "jnz 1b \n\t"
00059 :"+g"(h), "+S"(pixels), "+D"(block)
00060 :"r"((long)line_size)
00061 :REG_a, "memory");
00062 }
00063
00064 static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00065 {
00066 MOVQ_BFE(mm6);
00067 __asm __volatile(
00068 "testl $1, %0 \n\t"
00069 " jz 1f \n\t"
00070 "movq (%1), %%mm0 \n\t"
00071 "movq (%2), %%mm1 \n\t"
00072 "add %4, %1 \n\t"
00073 "add $8, %2 \n\t"
00074 PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
00075 "movq %%mm4, (%3) \n\t"
00076 "add %5, %3 \n\t"
00077 "decl %0 \n\t"
00078 ASMALIGN(3)
00079 "1: \n\t"
00080 "movq (%1), %%mm0 \n\t"
00081 "movq (%2), %%mm1 \n\t"
00082 "add %4, %1 \n\t"
00083 "movq (%1), %%mm2 \n\t"
00084 "movq 8(%2), %%mm3 \n\t"
00085 "add %4, %1 \n\t"
00086 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00087 "movq %%mm4, (%3) \n\t"
00088 "add %5, %3 \n\t"
00089 "movq %%mm5, (%3) \n\t"
00090 "add %5, %3 \n\t"
00091 "movq (%1), %%mm0 \n\t"
00092 "movq 16(%2), %%mm1 \n\t"
00093 "add %4, %1 \n\t"
00094 "movq (%1), %%mm2 \n\t"
00095 "movq 24(%2), %%mm3 \n\t"
00096 "add %4, %1 \n\t"
00097 "add $32, %2 \n\t"
00098 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00099 "movq %%mm4, (%3) \n\t"
00100 "add %5, %3 \n\t"
00101 "movq %%mm5, (%3) \n\t"
00102 "add %5, %3 \n\t"
00103 "subl $4, %0 \n\t"
00104 "jnz 1b \n\t"
00105 #ifdef PIC
00106 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00107 #else
00108 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00109 #endif
00110 :"S"((long)src1Stride), "D"((long)dstStride)
00111 :"memory");
00112 }
00113
00114 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00115 {
00116 MOVQ_BFE(mm6);
00117 __asm __volatile(
00118 "lea (%3, %3), %%"REG_a" \n\t"
00119 ASMALIGN(3)
00120 "1: \n\t"
00121 "movq (%1), %%mm0 \n\t"
00122 "movq 1(%1), %%mm1 \n\t"
00123 "movq (%1, %3), %%mm2 \n\t"
00124 "movq 1(%1, %3), %%mm3 \n\t"
00125 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00126 "movq %%mm4, (%2) \n\t"
00127 "movq %%mm5, (%2, %3) \n\t"
00128 "movq 8(%1), %%mm0 \n\t"
00129 "movq 9(%1), %%mm1 \n\t"
00130 "movq 8(%1, %3), %%mm2 \n\t"
00131 "movq 9(%1, %3), %%mm3 \n\t"
00132 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00133 "movq %%mm4, 8(%2) \n\t"
00134 "movq %%mm5, 8(%2, %3) \n\t"
00135 "add %%"REG_a", %1 \n\t"
00136 "add %%"REG_a", %2 \n\t"
00137 "movq (%1), %%mm0 \n\t"
00138 "movq 1(%1), %%mm1 \n\t"
00139 "movq (%1, %3), %%mm2 \n\t"
00140 "movq 1(%1, %3), %%mm3 \n\t"
00141 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00142 "movq %%mm4, (%2) \n\t"
00143 "movq %%mm5, (%2, %3) \n\t"
00144 "movq 8(%1), %%mm0 \n\t"
00145 "movq 9(%1), %%mm1 \n\t"
00146 "movq 8(%1, %3), %%mm2 \n\t"
00147 "movq 9(%1, %3), %%mm3 \n\t"
00148 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00149 "movq %%mm4, 8(%2) \n\t"
00150 "movq %%mm5, 8(%2, %3) \n\t"
00151 "add %%"REG_a", %1 \n\t"
00152 "add %%"REG_a", %2 \n\t"
00153 "subl $4, %0 \n\t"
00154 "jnz 1b \n\t"
00155 :"+g"(h), "+S"(pixels), "+D"(block)
00156 :"r"((long)line_size)
00157 :REG_a, "memory");
00158 }
00159
00160 static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00161 {
00162 MOVQ_BFE(mm6);
00163 __asm __volatile(
00164 "testl $1, %0 \n\t"
00165 " jz 1f \n\t"
00166 "movq (%1), %%mm0 \n\t"
00167 "movq (%2), %%mm1 \n\t"
00168 "movq 8(%1), %%mm2 \n\t"
00169 "movq 8(%2), %%mm3 \n\t"
00170 "add %4, %1 \n\t"
00171 "add $16, %2 \n\t"
00172 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00173 "movq %%mm4, (%3) \n\t"
00174 "movq %%mm5, 8(%3) \n\t"
00175 "add %5, %3 \n\t"
00176 "decl %0 \n\t"
00177 ASMALIGN(3)
00178 "1: \n\t"
00179 "movq (%1), %%mm0 \n\t"
00180 "movq (%2), %%mm1 \n\t"
00181 "movq 8(%1), %%mm2 \n\t"
00182 "movq 8(%2), %%mm3 \n\t"
00183 "add %4, %1 \n\t"
00184 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00185 "movq %%mm4, (%3) \n\t"
00186 "movq %%mm5, 8(%3) \n\t"
00187 "add %5, %3 \n\t"
00188 "movq (%1), %%mm0 \n\t"
00189 "movq 16(%2), %%mm1 \n\t"
00190 "movq 8(%1), %%mm2 \n\t"
00191 "movq 24(%2), %%mm3 \n\t"
00192 "add %4, %1 \n\t"
00193 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00194 "movq %%mm4, (%3) \n\t"
00195 "movq %%mm5, 8(%3) \n\t"
00196 "add %5, %3 \n\t"
00197 "add $32, %2 \n\t"
00198 "subl $2, %0 \n\t"
00199 "jnz 1b \n\t"
00200 #ifdef PIC
00201 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00202 #else
00203 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00204 #endif
00205 :"S"((long)src1Stride), "D"((long)dstStride)
00206 :"memory");
00207 }
00208
00209 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00210 {
00211 MOVQ_BFE(mm6);
00212 __asm __volatile(
00213 "lea (%3, %3), %%"REG_a" \n\t"
00214 "movq (%1), %%mm0 \n\t"
00215 ASMALIGN(3)
00216 "1: \n\t"
00217 "movq (%1, %3), %%mm1 \n\t"
00218 "movq (%1, %%"REG_a"),%%mm2 \n\t"
00219 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
00220 "movq %%mm4, (%2) \n\t"
00221 "movq %%mm5, (%2, %3) \n\t"
00222 "add %%"REG_a", %1 \n\t"
00223 "add %%"REG_a", %2 \n\t"
00224 "movq (%1, %3), %%mm1 \n\t"
00225 "movq (%1, %%"REG_a"),%%mm0 \n\t"
00226 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
00227 "movq %%mm4, (%2) \n\t"
00228 "movq %%mm5, (%2, %3) \n\t"
00229 "add %%"REG_a", %1 \n\t"
00230 "add %%"REG_a", %2 \n\t"
00231 "subl $4, %0 \n\t"
00232 "jnz 1b \n\t"
00233 :"+g"(h), "+S"(pixels), "+D"(block)
00234 :"r"((long)line_size)
00235 :REG_a, "memory");
00236 }
00237
00238 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00239 {
00240 MOVQ_ZERO(mm7);
00241 SET_RND(mm6);
00242 __asm __volatile(
00243 "movq (%1), %%mm0 \n\t"
00244 "movq 1(%1), %%mm4 \n\t"
00245 "movq %%mm0, %%mm1 \n\t"
00246 "movq %%mm4, %%mm5 \n\t"
00247 "punpcklbw %%mm7, %%mm0 \n\t"
00248 "punpcklbw %%mm7, %%mm4 \n\t"
00249 "punpckhbw %%mm7, %%mm1 \n\t"
00250 "punpckhbw %%mm7, %%mm5 \n\t"
00251 "paddusw %%mm0, %%mm4 \n\t"
00252 "paddusw %%mm1, %%mm5 \n\t"
00253 "xor %%"REG_a", %%"REG_a" \n\t"
00254 "add %3, %1 \n\t"
00255 ASMALIGN(3)
00256 "1: \n\t"
00257 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00258 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
00259 "movq %%mm0, %%mm1 \n\t"
00260 "movq %%mm2, %%mm3 \n\t"
00261 "punpcklbw %%mm7, %%mm0 \n\t"
00262 "punpcklbw %%mm7, %%mm2 \n\t"
00263 "punpckhbw %%mm7, %%mm1 \n\t"
00264 "punpckhbw %%mm7, %%mm3 \n\t"
00265 "paddusw %%mm2, %%mm0 \n\t"
00266 "paddusw %%mm3, %%mm1 \n\t"
00267 "paddusw %%mm6, %%mm4 \n\t"
00268 "paddusw %%mm6, %%mm5 \n\t"
00269 "paddusw %%mm0, %%mm4 \n\t"
00270 "paddusw %%mm1, %%mm5 \n\t"
00271 "psrlw $2, %%mm4 \n\t"
00272 "psrlw $2, %%mm5 \n\t"
00273 "packuswb %%mm5, %%mm4 \n\t"
00274 "movq %%mm4, (%2, %%"REG_a") \n\t"
00275 "add %3, %%"REG_a" \n\t"
00276
00277 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00278 "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
00279 "movq %%mm2, %%mm3 \n\t"
00280 "movq %%mm4, %%mm5 \n\t"
00281 "punpcklbw %%mm7, %%mm2 \n\t"
00282 "punpcklbw %%mm7, %%mm4 \n\t"
00283 "punpckhbw %%mm7, %%mm3 \n\t"
00284 "punpckhbw %%mm7, %%mm5 \n\t"
00285 "paddusw %%mm2, %%mm4 \n\t"
00286 "paddusw %%mm3, %%mm5 \n\t"
00287 "paddusw %%mm6, %%mm0 \n\t"
00288 "paddusw %%mm6, %%mm1 \n\t"
00289 "paddusw %%mm4, %%mm0 \n\t"
00290 "paddusw %%mm5, %%mm1 \n\t"
00291 "psrlw $2, %%mm0 \n\t"
00292 "psrlw $2, %%mm1 \n\t"
00293 "packuswb %%mm1, %%mm0 \n\t"
00294 "movq %%mm0, (%2, %%"REG_a") \n\t"
00295 "add %3, %%"REG_a" \n\t"
00296
00297 "subl $2, %0 \n\t"
00298 "jnz 1b \n\t"
00299 :"+g"(h), "+S"(pixels)
00300 :"D"(block), "r"((long)line_size)
00301 :REG_a, "memory");
00302 }
00303
00304
00305 static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00306 {
00307 MOVQ_BFE(mm6);
00308 JUMPALIGN();
00309 do {
00310 __asm __volatile(
00311 "movd %0, %%mm0 \n\t"
00312 "movd %1, %%mm1 \n\t"
00313 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00314 "movd %%mm2, %0 \n\t"
00315 :"+m"(*block)
00316 :"m"(*pixels)
00317 :"memory");
00318 pixels += line_size;
00319 block += line_size;
00320 }
00321 while (--h);
00322 }
00323
00324
00325 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00326 {
00327 MOVQ_BFE(mm6);
00328 JUMPALIGN();
00329 do {
00330 __asm __volatile(
00331 "movq %0, %%mm0 \n\t"
00332 "movq %1, %%mm1 \n\t"
00333 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00334 "movq %%mm2, %0 \n\t"
00335 :"+m"(*block)
00336 :"m"(*pixels)
00337 :"memory");
00338 pixels += line_size;
00339 block += line_size;
00340 }
00341 while (--h);
00342 }
00343
00344 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00345 {
00346 MOVQ_BFE(mm6);
00347 JUMPALIGN();
00348 do {
00349 __asm __volatile(
00350 "movq %0, %%mm0 \n\t"
00351 "movq %1, %%mm1 \n\t"
00352 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00353 "movq %%mm2, %0 \n\t"
00354 "movq 8%0, %%mm0 \n\t"
00355 "movq 8%1, %%mm1 \n\t"
00356 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00357 "movq %%mm2, 8%0 \n\t"
00358 :"+m"(*block)
00359 :"m"(*pixels)
00360 :"memory");
00361 pixels += line_size;
00362 block += line_size;
00363 }
00364 while (--h);
00365 }
00366
00367 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00368 {
00369 MOVQ_BFE(mm6);
00370 JUMPALIGN();
00371 do {
00372 __asm __volatile(
00373 "movq %1, %%mm0 \n\t"
00374 "movq 1%1, %%mm1 \n\t"
00375 "movq %0, %%mm3 \n\t"
00376 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00377 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00378 "movq %%mm0, %0 \n\t"
00379 :"+m"(*block)
00380 :"m"(*pixels)
00381 :"memory");
00382 pixels += line_size;
00383 block += line_size;
00384 } while (--h);
00385 }
00386
00387 static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00388 {
00389 MOVQ_BFE(mm6);
00390 JUMPALIGN();
00391 do {
00392 __asm __volatile(
00393 "movq %1, %%mm0 \n\t"
00394 "movq %2, %%mm1 \n\t"
00395 "movq %0, %%mm3 \n\t"
00396 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00397 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00398 "movq %%mm0, %0 \n\t"
00399 :"+m"(*dst)
00400 :"m"(*src1), "m"(*src2)
00401 :"memory");
00402 dst += dstStride;
00403 src1 += src1Stride;
00404 src2 += 8;
00405 } while (--h);
00406 }
00407
00408 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00409 {
00410 MOVQ_BFE(mm6);
00411 JUMPALIGN();
00412 do {
00413 __asm __volatile(
00414 "movq %1, %%mm0 \n\t"
00415 "movq 1%1, %%mm1 \n\t"
00416 "movq %0, %%mm3 \n\t"
00417 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00418 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00419 "movq %%mm0, %0 \n\t"
00420 "movq 8%1, %%mm0 \n\t"
00421 "movq 9%1, %%mm1 \n\t"
00422 "movq 8%0, %%mm3 \n\t"
00423 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00424 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00425 "movq %%mm0, 8%0 \n\t"
00426 :"+m"(*block)
00427 :"m"(*pixels)
00428 :"memory");
00429 pixels += line_size;
00430 block += line_size;
00431 } while (--h);
00432 }
00433
00434 static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00435 {
00436 MOVQ_BFE(mm6);
00437 JUMPALIGN();
00438 do {
00439 __asm __volatile(
00440 "movq %1, %%mm0 \n\t"
00441 "movq %2, %%mm1 \n\t"
00442 "movq %0, %%mm3 \n\t"
00443 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00444 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00445 "movq %%mm0, %0 \n\t"
00446 "movq 8%1, %%mm0 \n\t"
00447 "movq 8%2, %%mm1 \n\t"
00448 "movq 8%0, %%mm3 \n\t"
00449 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00450 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00451 "movq %%mm0, 8%0 \n\t"
00452 :"+m"(*dst)
00453 :"m"(*src1), "m"(*src2)
00454 :"memory");
00455 dst += dstStride;
00456 src1 += src1Stride;
00457 src2 += 16;
00458 } while (--h);
00459 }
00460
00461 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00462 {
00463 MOVQ_BFE(mm6);
00464 __asm __volatile(
00465 "lea (%3, %3), %%"REG_a" \n\t"
00466 "movq (%1), %%mm0 \n\t"
00467 ASMALIGN(3)
00468 "1: \n\t"
00469 "movq (%1, %3), %%mm1 \n\t"
00470 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00471 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
00472 "movq (%2), %%mm3 \n\t"
00473 PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
00474 "movq (%2, %3), %%mm3 \n\t"
00475 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
00476 "movq %%mm0, (%2) \n\t"
00477 "movq %%mm1, (%2, %3) \n\t"
00478 "add %%"REG_a", %1 \n\t"
00479 "add %%"REG_a", %2 \n\t"
00480
00481 "movq (%1, %3), %%mm1 \n\t"
00482 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00483 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
00484 "movq (%2), %%mm3 \n\t"
00485 PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
00486 "movq (%2, %3), %%mm3 \n\t"
00487 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
00488 "movq %%mm2, (%2) \n\t"
00489 "movq %%mm1, (%2, %3) \n\t"
00490 "add %%"REG_a", %1 \n\t"
00491 "add %%"REG_a", %2 \n\t"
00492
00493 "subl $4, %0 \n\t"
00494 "jnz 1b \n\t"
00495 :"+g"(h), "+S"(pixels), "+D"(block)
00496 :"r"((long)line_size)
00497 :REG_a, "memory");
00498 }
00499
00500
00501 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00502 {
00503 MOVQ_ZERO(mm7);
00504 SET_RND(mm6);
00505 __asm __volatile(
00506 "movq (%1), %%mm0 \n\t"
00507 "movq 1(%1), %%mm4 \n\t"
00508 "movq %%mm0, %%mm1 \n\t"
00509 "movq %%mm4, %%mm5 \n\t"
00510 "punpcklbw %%mm7, %%mm0 \n\t"
00511 "punpcklbw %%mm7, %%mm4 \n\t"
00512 "punpckhbw %%mm7, %%mm1 \n\t"
00513 "punpckhbw %%mm7, %%mm5 \n\t"
00514 "paddusw %%mm0, %%mm4 \n\t"
00515 "paddusw %%mm1, %%mm5 \n\t"
00516 "xor %%"REG_a", %%"REG_a" \n\t"
00517 "add %3, %1 \n\t"
00518 ASMALIGN(3)
00519 "1: \n\t"
00520 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00521 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
00522 "movq %%mm0, %%mm1 \n\t"
00523 "movq %%mm2, %%mm3 \n\t"
00524 "punpcklbw %%mm7, %%mm0 \n\t"
00525 "punpcklbw %%mm7, %%mm2 \n\t"
00526 "punpckhbw %%mm7, %%mm1 \n\t"
00527 "punpckhbw %%mm7, %%mm3 \n\t"
00528 "paddusw %%mm2, %%mm0 \n\t"
00529 "paddusw %%mm3, %%mm1 \n\t"
00530 "paddusw %%mm6, %%mm4 \n\t"
00531 "paddusw %%mm6, %%mm5 \n\t"
00532 "paddusw %%mm0, %%mm4 \n\t"
00533 "paddusw %%mm1, %%mm5 \n\t"
00534 "psrlw $2, %%mm4 \n\t"
00535 "psrlw $2, %%mm5 \n\t"
00536 "movq (%2, %%"REG_a"), %%mm3 \n\t"
00537 "packuswb %%mm5, %%mm4 \n\t"
00538 "pcmpeqd %%mm2, %%mm2 \n\t"
00539 "paddb %%mm2, %%mm2 \n\t"
00540 PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
00541 "movq %%mm5, (%2, %%"REG_a") \n\t"
00542 "add %3, %%"REG_a" \n\t"
00543
00544 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00545 "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
00546 "movq %%mm2, %%mm3 \n\t"
00547 "movq %%mm4, %%mm5 \n\t"
00548 "punpcklbw %%mm7, %%mm2 \n\t"
00549 "punpcklbw %%mm7, %%mm4 \n\t"
00550 "punpckhbw %%mm7, %%mm3 \n\t"
00551 "punpckhbw %%mm7, %%mm5 \n\t"
00552 "paddusw %%mm2, %%mm4 \n\t"
00553 "paddusw %%mm3, %%mm5 \n\t"
00554 "paddusw %%mm6, %%mm0 \n\t"
00555 "paddusw %%mm6, %%mm1 \n\t"
00556 "paddusw %%mm4, %%mm0 \n\t"
00557 "paddusw %%mm5, %%mm1 \n\t"
00558 "psrlw $2, %%mm0 \n\t"
00559 "psrlw $2, %%mm1 \n\t"
00560 "movq (%2, %%"REG_a"), %%mm3 \n\t"
00561 "packuswb %%mm1, %%mm0 \n\t"
00562 "pcmpeqd %%mm2, %%mm2 \n\t"
00563 "paddb %%mm2, %%mm2 \n\t"
00564 PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
00565 "movq %%mm1, (%2, %%"REG_a") \n\t"
00566 "add %3, %%"REG_a" \n\t"
00567
00568 "subl $2, %0 \n\t"
00569 "jnz 1b \n\t"
00570 :"+g"(h), "+S"(pixels)
00571 :"D"(block), "r"((long)line_size)
00572 :REG_a, "memory");
00573 }
00574
00575
00576 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00577 DEF(put, pixels8_y2)(block , pixels , line_size, h);
00578 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
00579 }
00580
00581 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00582 DEF(put, pixels8_xy2)(block , pixels , line_size, h);
00583 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
00584 }
00585
00586 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00587 DEF(avg, pixels8_y2)(block , pixels , line_size, h);
00588 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
00589 }
00590
00591 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00592 DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
00593 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
00594 }