00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00035 {
00036 __asm __volatile(
00037 "lea (%3, %3), %%"REG_a" \n\t"
00038 "1: \n\t"
00039 "movq (%1), %%mm0 \n\t"
00040 "movq (%1, %3), %%mm1 \n\t"
00041 PAVGB" 1(%1), %%mm0 \n\t"
00042 PAVGB" 1(%1, %3), %%mm1 \n\t"
00043 "movq %%mm0, (%2) \n\t"
00044 "movq %%mm1, (%2, %3) \n\t"
00045 "add %%"REG_a", %1 \n\t"
00046 "add %%"REG_a", %2 \n\t"
00047 "movq (%1), %%mm0 \n\t"
00048 "movq (%1, %3), %%mm1 \n\t"
00049 PAVGB" 1(%1), %%mm0 \n\t"
00050 PAVGB" 1(%1, %3), %%mm1 \n\t"
00051 "add %%"REG_a", %1 \n\t"
00052 "movq %%mm0, (%2) \n\t"
00053 "movq %%mm1, (%2, %3) \n\t"
00054 "add %%"REG_a", %2 \n\t"
00055 "subl $4, %0 \n\t"
00056 "jnz 1b \n\t"
00057 :"+g"(h), "+S"(pixels), "+D"(block)
00058 :"r" ((long)line_size)
00059 :"%"REG_a, "memory");
00060 }
00061
00062 static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00063 {
00064 __asm __volatile(
00065 "testl $1, %0 \n\t"
00066 " jz 1f \n\t"
00067 "movd (%1), %%mm0 \n\t"
00068 "movd (%2), %%mm1 \n\t"
00069 "add %4, %1 \n\t"
00070 "add $4, %2 \n\t"
00071 PAVGB" %%mm1, %%mm0 \n\t"
00072 "movd %%mm0, (%3) \n\t"
00073 "add %5, %3 \n\t"
00074 "decl %0 \n\t"
00075 "1: \n\t"
00076 "movd (%1), %%mm0 \n\t"
00077 "add %4, %1 \n\t"
00078 "movd (%1), %%mm1 \n\t"
00079 "movd (%2), %%mm2 \n\t"
00080 "movd 4(%2), %%mm3 \n\t"
00081 "add %4, %1 \n\t"
00082 PAVGB" %%mm2, %%mm0 \n\t"
00083 PAVGB" %%mm3, %%mm1 \n\t"
00084 "movd %%mm0, (%3) \n\t"
00085 "add %5, %3 \n\t"
00086 "movd %%mm1, (%3) \n\t"
00087 "add %5, %3 \n\t"
00088 "movd (%1), %%mm0 \n\t"
00089 "add %4, %1 \n\t"
00090 "movd (%1), %%mm1 \n\t"
00091 "movd 8(%2), %%mm2 \n\t"
00092 "movd 12(%2), %%mm3 \n\t"
00093 "add %4, %1 \n\t"
00094 PAVGB" %%mm2, %%mm0 \n\t"
00095 PAVGB" %%mm3, %%mm1 \n\t"
00096 "movd %%mm0, (%3) \n\t"
00097 "add %5, %3 \n\t"
00098 "movd %%mm1, (%3) \n\t"
00099 "add %5, %3 \n\t"
00100 "add $16, %2 \n\t"
00101 "subl $4, %0 \n\t"
00102 "jnz 1b \n\t"
00103 #ifdef PIC
00104 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00105 #else
00106 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00107 #endif
00108 :"S"((long)src1Stride), "D"((long)dstStride)
00109 :"memory");
00110 }
00111
00112
00113 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00114 {
00115 __asm __volatile(
00116 "testl $1, %0 \n\t"
00117 " jz 1f \n\t"
00118 "movq (%1), %%mm0 \n\t"
00119 "movq (%2), %%mm1 \n\t"
00120 "add %4, %1 \n\t"
00121 "add $8, %2 \n\t"
00122 PAVGB" %%mm1, %%mm0 \n\t"
00123 "movq %%mm0, (%3) \n\t"
00124 "add %5, %3 \n\t"
00125 "decl %0 \n\t"
00126 "1: \n\t"
00127 "movq (%1), %%mm0 \n\t"
00128 "add %4, %1 \n\t"
00129 "movq (%1), %%mm1 \n\t"
00130 "add %4, %1 \n\t"
00131 PAVGB" (%2), %%mm0 \n\t"
00132 PAVGB" 8(%2), %%mm1 \n\t"
00133 "movq %%mm0, (%3) \n\t"
00134 "add %5, %3 \n\t"
00135 "movq %%mm1, (%3) \n\t"
00136 "add %5, %3 \n\t"
00137 "movq (%1), %%mm0 \n\t"
00138 "add %4, %1 \n\t"
00139 "movq (%1), %%mm1 \n\t"
00140 "add %4, %1 \n\t"
00141 PAVGB" 16(%2), %%mm0 \n\t"
00142 PAVGB" 24(%2), %%mm1 \n\t"
00143 "movq %%mm0, (%3) \n\t"
00144 "add %5, %3 \n\t"
00145 "movq %%mm1, (%3) \n\t"
00146 "add %5, %3 \n\t"
00147 "add $32, %2 \n\t"
00148 "subl $4, %0 \n\t"
00149 "jnz 1b \n\t"
00150 #ifdef PIC
00151 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00152 #else
00153 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00154 #endif
00155 :"S"((long)src1Stride), "D"((long)dstStride)
00156 :"memory");
00157
00158
00159
00160
00161 }
00162
00163 static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00164 {
00165 __asm __volatile(
00166 "pcmpeqb %%mm6, %%mm6 \n\t"
00167 "testl $1, %0 \n\t"
00168 " jz 1f \n\t"
00169 "movq (%1), %%mm0 \n\t"
00170 "movq (%2), %%mm1 \n\t"
00171 "add %4, %1 \n\t"
00172 "add $8, %2 \n\t"
00173 "pxor %%mm6, %%mm0 \n\t"
00174 "pxor %%mm6, %%mm1 \n\t"
00175 PAVGB" %%mm1, %%mm0 \n\t"
00176 "pxor %%mm6, %%mm0 \n\t"
00177 "movq %%mm0, (%3) \n\t"
00178 "add %5, %3 \n\t"
00179 "decl %0 \n\t"
00180 "1: \n\t"
00181 "movq (%1), %%mm0 \n\t"
00182 "add %4, %1 \n\t"
00183 "movq (%1), %%mm1 \n\t"
00184 "add %4, %1 \n\t"
00185 "movq (%2), %%mm2 \n\t"
00186 "movq 8(%2), %%mm3 \n\t"
00187 "pxor %%mm6, %%mm0 \n\t"
00188 "pxor %%mm6, %%mm1 \n\t"
00189 "pxor %%mm6, %%mm2 \n\t"
00190 "pxor %%mm6, %%mm3 \n\t"
00191 PAVGB" %%mm2, %%mm0 \n\t"
00192 PAVGB" %%mm3, %%mm1 \n\t"
00193 "pxor %%mm6, %%mm0 \n\t"
00194 "pxor %%mm6, %%mm1 \n\t"
00195 "movq %%mm0, (%3) \n\t"
00196 "add %5, %3 \n\t"
00197 "movq %%mm1, (%3) \n\t"
00198 "add %5, %3 \n\t"
00199 "movq (%1), %%mm0 \n\t"
00200 "add %4, %1 \n\t"
00201 "movq (%1), %%mm1 \n\t"
00202 "add %4, %1 \n\t"
00203 "movq 16(%2), %%mm2 \n\t"
00204 "movq 24(%2), %%mm3 \n\t"
00205 "pxor %%mm6, %%mm0 \n\t"
00206 "pxor %%mm6, %%mm1 \n\t"
00207 "pxor %%mm6, %%mm2 \n\t"
00208 "pxor %%mm6, %%mm3 \n\t"
00209 PAVGB" %%mm2, %%mm0 \n\t"
00210 PAVGB" %%mm3, %%mm1 \n\t"
00211 "pxor %%mm6, %%mm0 \n\t"
00212 "pxor %%mm6, %%mm1 \n\t"
00213 "movq %%mm0, (%3) \n\t"
00214 "add %5, %3 \n\t"
00215 "movq %%mm1, (%3) \n\t"
00216 "add %5, %3 \n\t"
00217 "add $32, %2 \n\t"
00218 "subl $4, %0 \n\t"
00219 "jnz 1b \n\t"
00220 #ifdef PIC
00221 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00222 #else
00223 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00224 #endif
00225 :"S"((long)src1Stride), "D"((long)dstStride)
00226 :"memory");
00227
00228
00229
00230
00231 }
00232
00233 static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00234 {
00235 __asm __volatile(
00236 "testl $1, %0 \n\t"
00237 " jz 1f \n\t"
00238 "movd (%1), %%mm0 \n\t"
00239 "movd (%2), %%mm1 \n\t"
00240 "add %4, %1 \n\t"
00241 "add $4, %2 \n\t"
00242 PAVGB" %%mm1, %%mm0 \n\t"
00243 PAVGB" (%3), %%mm0 \n\t"
00244 "movd %%mm0, (%3) \n\t"
00245 "add %5, %3 \n\t"
00246 "decl %0 \n\t"
00247 "1: \n\t"
00248 "movd (%1), %%mm0 \n\t"
00249 "add %4, %1 \n\t"
00250 "movd (%1), %%mm1 \n\t"
00251 "add %4, %1 \n\t"
00252 PAVGB" (%2), %%mm0 \n\t"
00253 PAVGB" 4(%2), %%mm1 \n\t"
00254 PAVGB" (%3), %%mm0 \n\t"
00255 "movd %%mm0, (%3) \n\t"
00256 "add %5, %3 \n\t"
00257 PAVGB" (%3), %%mm1 \n\t"
00258 "movd %%mm1, (%3) \n\t"
00259 "add %5, %3 \n\t"
00260 "movd (%1), %%mm0 \n\t"
00261 "add %4, %1 \n\t"
00262 "movd (%1), %%mm1 \n\t"
00263 "add %4, %1 \n\t"
00264 PAVGB" 8(%2), %%mm0 \n\t"
00265 PAVGB" 12(%2), %%mm1 \n\t"
00266 PAVGB" (%3), %%mm0 \n\t"
00267 "movd %%mm0, (%3) \n\t"
00268 "add %5, %3 \n\t"
00269 PAVGB" (%3), %%mm1 \n\t"
00270 "movd %%mm1, (%3) \n\t"
00271 "add %5, %3 \n\t"
00272 "add $16, %2 \n\t"
00273 "subl $4, %0 \n\t"
00274 "jnz 1b \n\t"
00275 #ifdef PIC
00276 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00277 #else
00278 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00279 #endif
00280 :"S"((long)src1Stride), "D"((long)dstStride)
00281 :"memory");
00282 }
00283
00284
00285 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00286 {
00287 __asm __volatile(
00288 "testl $1, %0 \n\t"
00289 " jz 1f \n\t"
00290 "movq (%1), %%mm0 \n\t"
00291 "movq (%2), %%mm1 \n\t"
00292 "add %4, %1 \n\t"
00293 "add $8, %2 \n\t"
00294 PAVGB" %%mm1, %%mm0 \n\t"
00295 PAVGB" (%3), %%mm0 \n\t"
00296 "movq %%mm0, (%3) \n\t"
00297 "add %5, %3 \n\t"
00298 "decl %0 \n\t"
00299 "1: \n\t"
00300 "movq (%1), %%mm0 \n\t"
00301 "add %4, %1 \n\t"
00302 "movq (%1), %%mm1 \n\t"
00303 "add %4, %1 \n\t"
00304 PAVGB" (%2), %%mm0 \n\t"
00305 PAVGB" 8(%2), %%mm1 \n\t"
00306 PAVGB" (%3), %%mm0 \n\t"
00307 "movq %%mm0, (%3) \n\t"
00308 "add %5, %3 \n\t"
00309 PAVGB" (%3), %%mm1 \n\t"
00310 "movq %%mm1, (%3) \n\t"
00311 "add %5, %3 \n\t"
00312 "movq (%1), %%mm0 \n\t"
00313 "add %4, %1 \n\t"
00314 "movq (%1), %%mm1 \n\t"
00315 "add %4, %1 \n\t"
00316 PAVGB" 16(%2), %%mm0 \n\t"
00317 PAVGB" 24(%2), %%mm1 \n\t"
00318 PAVGB" (%3), %%mm0 \n\t"
00319 "movq %%mm0, (%3) \n\t"
00320 "add %5, %3 \n\t"
00321 PAVGB" (%3), %%mm1 \n\t"
00322 "movq %%mm1, (%3) \n\t"
00323 "add %5, %3 \n\t"
00324 "add $32, %2 \n\t"
00325 "subl $4, %0 \n\t"
00326 "jnz 1b \n\t"
00327 #ifdef PIC
00328 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00329 #else
00330 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00331 #endif
00332 :"S"((long)src1Stride), "D"((long)dstStride)
00333 :"memory");
00334
00335
00336
00337
00338 }
00339
00340 static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00341 {
00342 __asm __volatile(
00343 "lea (%3, %3), %%"REG_a" \n\t"
00344 "1: \n\t"
00345 "movq (%1), %%mm0 \n\t"
00346 "movq (%1, %3), %%mm1 \n\t"
00347 "movq 8(%1), %%mm2 \n\t"
00348 "movq 8(%1, %3), %%mm3 \n\t"
00349 PAVGB" 1(%1), %%mm0 \n\t"
00350 PAVGB" 1(%1, %3), %%mm1 \n\t"
00351 PAVGB" 9(%1), %%mm2 \n\t"
00352 PAVGB" 9(%1, %3), %%mm3 \n\t"
00353 "movq %%mm0, (%2) \n\t"
00354 "movq %%mm1, (%2, %3) \n\t"
00355 "movq %%mm2, 8(%2) \n\t"
00356 "movq %%mm3, 8(%2, %3) \n\t"
00357 "add %%"REG_a", %1 \n\t"
00358 "add %%"REG_a", %2 \n\t"
00359 "movq (%1), %%mm0 \n\t"
00360 "movq (%1, %3), %%mm1 \n\t"
00361 "movq 8(%1), %%mm2 \n\t"
00362 "movq 8(%1, %3), %%mm3 \n\t"
00363 PAVGB" 1(%1), %%mm0 \n\t"
00364 PAVGB" 1(%1, %3), %%mm1 \n\t"
00365 PAVGB" 9(%1), %%mm2 \n\t"
00366 PAVGB" 9(%1, %3), %%mm3 \n\t"
00367 "add %%"REG_a", %1 \n\t"
00368 "movq %%mm0, (%2) \n\t"
00369 "movq %%mm1, (%2, %3) \n\t"
00370 "movq %%mm2, 8(%2) \n\t"
00371 "movq %%mm3, 8(%2, %3) \n\t"
00372 "add %%"REG_a", %2 \n\t"
00373 "subl $4, %0 \n\t"
00374 "jnz 1b \n\t"
00375 :"+g"(h), "+S"(pixels), "+D"(block)
00376 :"r" ((long)line_size)
00377 :"%"REG_a, "memory");
00378 }
00379
00380 static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00381 {
00382 __asm __volatile(
00383 "testl $1, %0 \n\t"
00384 " jz 1f \n\t"
00385 "movq (%1), %%mm0 \n\t"
00386 "movq 8(%1), %%mm1 \n\t"
00387 PAVGB" (%2), %%mm0 \n\t"
00388 PAVGB" 8(%2), %%mm1 \n\t"
00389 "add %4, %1 \n\t"
00390 "add $16, %2 \n\t"
00391 "movq %%mm0, (%3) \n\t"
00392 "movq %%mm1, 8(%3) \n\t"
00393 "add %5, %3 \n\t"
00394 "decl %0 \n\t"
00395 "1: \n\t"
00396 "movq (%1), %%mm0 \n\t"
00397 "movq 8(%1), %%mm1 \n\t"
00398 "add %4, %1 \n\t"
00399 PAVGB" (%2), %%mm0 \n\t"
00400 PAVGB" 8(%2), %%mm1 \n\t"
00401 "movq %%mm0, (%3) \n\t"
00402 "movq %%mm1, 8(%3) \n\t"
00403 "add %5, %3 \n\t"
00404 "movq (%1), %%mm0 \n\t"
00405 "movq 8(%1), %%mm1 \n\t"
00406 "add %4, %1 \n\t"
00407 PAVGB" 16(%2), %%mm0 \n\t"
00408 PAVGB" 24(%2), %%mm1 \n\t"
00409 "movq %%mm0, (%3) \n\t"
00410 "movq %%mm1, 8(%3) \n\t"
00411 "add %5, %3 \n\t"
00412 "add $32, %2 \n\t"
00413 "subl $2, %0 \n\t"
00414 "jnz 1b \n\t"
00415 #ifdef PIC
00416 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00417 #else
00418 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00419 #endif
00420 :"S"((long)src1Stride), "D"((long)dstStride)
00421 :"memory");
00422
00423
00424
00425
00426 }
00427
00428 static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00429 {
00430 __asm __volatile(
00431 "testl $1, %0 \n\t"
00432 " jz 1f \n\t"
00433 "movq (%1), %%mm0 \n\t"
00434 "movq 8(%1), %%mm1 \n\t"
00435 PAVGB" (%2), %%mm0 \n\t"
00436 PAVGB" 8(%2), %%mm1 \n\t"
00437 "add %4, %1 \n\t"
00438 "add $16, %2 \n\t"
00439 PAVGB" (%3), %%mm0 \n\t"
00440 PAVGB" 8(%3), %%mm1 \n\t"
00441 "movq %%mm0, (%3) \n\t"
00442 "movq %%mm1, 8(%3) \n\t"
00443 "add %5, %3 \n\t"
00444 "decl %0 \n\t"
00445 "1: \n\t"
00446 "movq (%1), %%mm0 \n\t"
00447 "movq 8(%1), %%mm1 \n\t"
00448 "add %4, %1 \n\t"
00449 PAVGB" (%2), %%mm0 \n\t"
00450 PAVGB" 8(%2), %%mm1 \n\t"
00451 PAVGB" (%3), %%mm0 \n\t"
00452 PAVGB" 8(%3), %%mm1 \n\t"
00453 "movq %%mm0, (%3) \n\t"
00454 "movq %%mm1, 8(%3) \n\t"
00455 "add %5, %3 \n\t"
00456 "movq (%1), %%mm0 \n\t"
00457 "movq 8(%1), %%mm1 \n\t"
00458 "add %4, %1 \n\t"
00459 PAVGB" 16(%2), %%mm0 \n\t"
00460 PAVGB" 24(%2), %%mm1 \n\t"
00461 PAVGB" (%3), %%mm0 \n\t"
00462 PAVGB" 8(%3), %%mm1 \n\t"
00463 "movq %%mm0, (%3) \n\t"
00464 "movq %%mm1, 8(%3) \n\t"
00465 "add %5, %3 \n\t"
00466 "add $32, %2 \n\t"
00467 "subl $2, %0 \n\t"
00468 "jnz 1b \n\t"
00469 #ifdef PIC
00470 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00471 #else
00472 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00473 #endif
00474 :"S"((long)src1Stride), "D"((long)dstStride)
00475 :"memory");
00476
00477
00478
00479
00480 }
00481
00482 static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00483 {
00484 __asm __volatile(
00485 "pcmpeqb %%mm6, %%mm6 \n\t"
00486 "testl $1, %0 \n\t"
00487 " jz 1f \n\t"
00488 "movq (%1), %%mm0 \n\t"
00489 "movq 8(%1), %%mm1 \n\t"
00490 "movq (%2), %%mm2 \n\t"
00491 "movq 8(%2), %%mm3 \n\t"
00492 "pxor %%mm6, %%mm0 \n\t"
00493 "pxor %%mm6, %%mm1 \n\t"
00494 "pxor %%mm6, %%mm2 \n\t"
00495 "pxor %%mm6, %%mm3 \n\t"
00496 PAVGB" %%mm2, %%mm0 \n\t"
00497 PAVGB" %%mm3, %%mm1 \n\t"
00498 "pxor %%mm6, %%mm0 \n\t"
00499 "pxor %%mm6, %%mm1 \n\t"
00500 "add %4, %1 \n\t"
00501 "add $16, %2 \n\t"
00502 "movq %%mm0, (%3) \n\t"
00503 "movq %%mm1, 8(%3) \n\t"
00504 "add %5, %3 \n\t"
00505 "decl %0 \n\t"
00506 "1: \n\t"
00507 "movq (%1), %%mm0 \n\t"
00508 "movq 8(%1), %%mm1 \n\t"
00509 "add %4, %1 \n\t"
00510 "movq (%2), %%mm2 \n\t"
00511 "movq 8(%2), %%mm3 \n\t"
00512 "pxor %%mm6, %%mm0 \n\t"
00513 "pxor %%mm6, %%mm1 \n\t"
00514 "pxor %%mm6, %%mm2 \n\t"
00515 "pxor %%mm6, %%mm3 \n\t"
00516 PAVGB" %%mm2, %%mm0 \n\t"
00517 PAVGB" %%mm3, %%mm1 \n\t"
00518 "pxor %%mm6, %%mm0 \n\t"
00519 "pxor %%mm6, %%mm1 \n\t"
00520 "movq %%mm0, (%3) \n\t"
00521 "movq %%mm1, 8(%3) \n\t"
00522 "add %5, %3 \n\t"
00523 "movq (%1), %%mm0 \n\t"
00524 "movq 8(%1), %%mm1 \n\t"
00525 "add %4, %1 \n\t"
00526 "movq 16(%2), %%mm2 \n\t"
00527 "movq 24(%2), %%mm3 \n\t"
00528 "pxor %%mm6, %%mm0 \n\t"
00529 "pxor %%mm6, %%mm1 \n\t"
00530 "pxor %%mm6, %%mm2 \n\t"
00531 "pxor %%mm6, %%mm3 \n\t"
00532 PAVGB" %%mm2, %%mm0 \n\t"
00533 PAVGB" %%mm3, %%mm1 \n\t"
00534 "pxor %%mm6, %%mm0 \n\t"
00535 "pxor %%mm6, %%mm1 \n\t"
00536 "movq %%mm0, (%3) \n\t"
00537 "movq %%mm1, 8(%3) \n\t"
00538 "add %5, %3 \n\t"
00539 "add $32, %2 \n\t"
00540 "subl $2, %0 \n\t"
00541 "jnz 1b \n\t"
00542 #ifdef PIC
00543 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00544 #else
00545 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00546 #endif
00547 :"S"((long)src1Stride), "D"((long)dstStride)
00548 :"memory");
00549
00550
00551
00552
00553 }
00554
00555
00556 static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00557 {
00558 MOVQ_BONE(mm6);
00559 __asm __volatile(
00560 "lea (%3, %3), %%"REG_a" \n\t"
00561 "1: \n\t"
00562 "movq (%1), %%mm0 \n\t"
00563 "movq (%1, %3), %%mm2 \n\t"
00564 "movq 1(%1), %%mm1 \n\t"
00565 "movq 1(%1, %3), %%mm3 \n\t"
00566 "add %%"REG_a", %1 \n\t"
00567 "psubusb %%mm6, %%mm0 \n\t"
00568 "psubusb %%mm6, %%mm2 \n\t"
00569 PAVGB" %%mm1, %%mm0 \n\t"
00570 PAVGB" %%mm3, %%mm2 \n\t"
00571 "movq %%mm0, (%2) \n\t"
00572 "movq %%mm2, (%2, %3) \n\t"
00573 "movq (%1), %%mm0 \n\t"
00574 "movq 1(%1), %%mm1 \n\t"
00575 "movq (%1, %3), %%mm2 \n\t"
00576 "movq 1(%1, %3), %%mm3 \n\t"
00577 "add %%"REG_a", %2 \n\t"
00578 "add %%"REG_a", %1 \n\t"
00579 "psubusb %%mm6, %%mm0 \n\t"
00580 "psubusb %%mm6, %%mm2 \n\t"
00581 PAVGB" %%mm1, %%mm0 \n\t"
00582 PAVGB" %%mm3, %%mm2 \n\t"
00583 "movq %%mm0, (%2) \n\t"
00584 "movq %%mm2, (%2, %3) \n\t"
00585 "add %%"REG_a", %2 \n\t"
00586 "subl $4, %0 \n\t"
00587 "jnz 1b \n\t"
00588 :"+g"(h), "+S"(pixels), "+D"(block)
00589 :"r" ((long)line_size)
00590 :"%"REG_a, "memory");
00591 }
00592
00593 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00594 {
00595 __asm __volatile(
00596 "lea (%3, %3), %%"REG_a" \n\t"
00597 "movq (%1), %%mm0 \n\t"
00598 "sub %3, %2 \n\t"
00599 "1: \n\t"
00600 "movq (%1, %3), %%mm1 \n\t"
00601 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00602 "add %%"REG_a", %1 \n\t"
00603 PAVGB" %%mm1, %%mm0 \n\t"
00604 PAVGB" %%mm2, %%mm1 \n\t"
00605 "movq %%mm0, (%2, %3) \n\t"
00606 "movq %%mm1, (%2, %%"REG_a") \n\t"
00607 "movq (%1, %3), %%mm1 \n\t"
00608 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00609 "add %%"REG_a", %2 \n\t"
00610 "add %%"REG_a", %1 \n\t"
00611 PAVGB" %%mm1, %%mm2 \n\t"
00612 PAVGB" %%mm0, %%mm1 \n\t"
00613 "movq %%mm2, (%2, %3) \n\t"
00614 "movq %%mm1, (%2, %%"REG_a") \n\t"
00615 "add %%"REG_a", %2 \n\t"
00616 "subl $4, %0 \n\t"
00617 "jnz 1b \n\t"
00618 :"+g"(h), "+S"(pixels), "+D" (block)
00619 :"r" ((long)line_size)
00620 :"%"REG_a, "memory");
00621 }
00622
00623
00624 static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00625 {
00626 MOVQ_BONE(mm6);
00627 __asm __volatile(
00628 "lea (%3, %3), %%"REG_a" \n\t"
00629 "movq (%1), %%mm0 \n\t"
00630 "sub %3, %2 \n\t"
00631 "1: \n\t"
00632 "movq (%1, %3), %%mm1 \n\t"
00633 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00634 "add %%"REG_a", %1 \n\t"
00635 "psubusb %%mm6, %%mm1 \n\t"
00636 PAVGB" %%mm1, %%mm0 \n\t"
00637 PAVGB" %%mm2, %%mm1 \n\t"
00638 "movq %%mm0, (%2, %3) \n\t"
00639 "movq %%mm1, (%2, %%"REG_a") \n\t"
00640 "movq (%1, %3), %%mm1 \n\t"
00641 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00642 "add %%"REG_a", %2 \n\t"
00643 "add %%"REG_a", %1 \n\t"
00644 "psubusb %%mm6, %%mm1 \n\t"
00645 PAVGB" %%mm1, %%mm2 \n\t"
00646 PAVGB" %%mm0, %%mm1 \n\t"
00647 "movq %%mm2, (%2, %3) \n\t"
00648 "movq %%mm1, (%2, %%"REG_a") \n\t"
00649 "add %%"REG_a", %2 \n\t"
00650 "subl $4, %0 \n\t"
00651 "jnz 1b \n\t"
00652 :"+g"(h), "+S"(pixels), "+D" (block)
00653 :"r" ((long)line_size)
00654 :"%"REG_a, "memory");
00655 }
00656
00657 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00658 {
00659 __asm __volatile(
00660 "lea (%3, %3), %%"REG_a" \n\t"
00661 "1: \n\t"
00662 "movq (%2), %%mm0 \n\t"
00663 "movq (%2, %3), %%mm1 \n\t"
00664 PAVGB" (%1), %%mm0 \n\t"
00665 PAVGB" (%1, %3), %%mm1 \n\t"
00666 "movq %%mm0, (%2) \n\t"
00667 "movq %%mm1, (%2, %3) \n\t"
00668 "add %%"REG_a", %1 \n\t"
00669 "add %%"REG_a", %2 \n\t"
00670 "movq (%2), %%mm0 \n\t"
00671 "movq (%2, %3), %%mm1 \n\t"
00672 PAVGB" (%1), %%mm0 \n\t"
00673 PAVGB" (%1, %3), %%mm1 \n\t"
00674 "add %%"REG_a", %1 \n\t"
00675 "movq %%mm0, (%2) \n\t"
00676 "movq %%mm1, (%2, %3) \n\t"
00677 "add %%"REG_a", %2 \n\t"
00678 "subl $4, %0 \n\t"
00679 "jnz 1b \n\t"
00680 :"+g"(h), "+S"(pixels), "+D"(block)
00681 :"r" ((long)line_size)
00682 :"%"REG_a, "memory");
00683 }
00684
00685 static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00686 {
00687 __asm __volatile(
00688 "lea (%3, %3), %%"REG_a" \n\t"
00689 "1: \n\t"
00690 "movq (%1), %%mm0 \n\t"
00691 "movq (%1, %3), %%mm2 \n\t"
00692 PAVGB" 1(%1), %%mm0 \n\t"
00693 PAVGB" 1(%1, %3), %%mm2 \n\t"
00694 PAVGB" (%2), %%mm0 \n\t"
00695 PAVGB" (%2, %3), %%mm2 \n\t"
00696 "add %%"REG_a", %1 \n\t"
00697 "movq %%mm0, (%2) \n\t"
00698 "movq %%mm2, (%2, %3) \n\t"
00699 "movq (%1), %%mm0 \n\t"
00700 "movq (%1, %3), %%mm2 \n\t"
00701 PAVGB" 1(%1), %%mm0 \n\t"
00702 PAVGB" 1(%1, %3), %%mm2 \n\t"
00703 "add %%"REG_a", %2 \n\t"
00704 "add %%"REG_a", %1 \n\t"
00705 PAVGB" (%2), %%mm0 \n\t"
00706 PAVGB" (%2, %3), %%mm2 \n\t"
00707 "movq %%mm0, (%2) \n\t"
00708 "movq %%mm2, (%2, %3) \n\t"
00709 "add %%"REG_a", %2 \n\t"
00710 "subl $4, %0 \n\t"
00711 "jnz 1b \n\t"
00712 :"+g"(h), "+S"(pixels), "+D"(block)
00713 :"r" ((long)line_size)
00714 :"%"REG_a, "memory");
00715 }
00716
00717 static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00718 {
00719 __asm __volatile(
00720 "lea (%3, %3), %%"REG_a" \n\t"
00721 "movq (%1), %%mm0 \n\t"
00722 "sub %3, %2 \n\t"
00723 "1: \n\t"
00724 "movq (%1, %3), %%mm1 \n\t"
00725 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00726 "add %%"REG_a", %1 \n\t"
00727 PAVGB" %%mm1, %%mm0 \n\t"
00728 PAVGB" %%mm2, %%mm1 \n\t"
00729 "movq (%2, %3), %%mm3 \n\t"
00730 "movq (%2, %%"REG_a"), %%mm4 \n\t"
00731 PAVGB" %%mm3, %%mm0 \n\t"
00732 PAVGB" %%mm4, %%mm1 \n\t"
00733 "movq %%mm0, (%2, %3) \n\t"
00734 "movq %%mm1, (%2, %%"REG_a") \n\t"
00735 "movq (%1, %3), %%mm1 \n\t"
00736 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00737 PAVGB" %%mm1, %%mm2 \n\t"
00738 PAVGB" %%mm0, %%mm1 \n\t"
00739 "add %%"REG_a", %2 \n\t"
00740 "add %%"REG_a", %1 \n\t"
00741 "movq (%2, %3), %%mm3 \n\t"
00742 "movq (%2, %%"REG_a"), %%mm4 \n\t"
00743 PAVGB" %%mm3, %%mm2 \n\t"
00744 PAVGB" %%mm4, %%mm1 \n\t"
00745 "movq %%mm2, (%2, %3) \n\t"
00746 "movq %%mm1, (%2, %%"REG_a") \n\t"
00747 "add %%"REG_a", %2 \n\t"
00748 "subl $4, %0 \n\t"
00749 "jnz 1b \n\t"
00750 :"+g"(h), "+S"(pixels), "+D"(block)
00751 :"r" ((long)line_size)
00752 :"%"REG_a, "memory");
00753 }
00754
00755
00756
00757 static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00758 {
00759 MOVQ_BONE(mm6);
00760 __asm __volatile(
00761 "lea (%3, %3), %%"REG_a" \n\t"
00762 "movq (%1), %%mm0 \n\t"
00763 PAVGB" 1(%1), %%mm0 \n\t"
00764 ASMALIGN(3)
00765 "1: \n\t"
00766 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00767 "movq (%1, %3), %%mm1 \n\t"
00768 "psubusb %%mm6, %%mm2 \n\t"
00769 PAVGB" 1(%1, %3), %%mm1 \n\t"
00770 PAVGB" 1(%1, %%"REG_a"), %%mm2 \n\t"
00771 "add %%"REG_a", %1 \n\t"
00772 PAVGB" %%mm1, %%mm0 \n\t"
00773 PAVGB" %%mm2, %%mm1 \n\t"
00774 PAVGB" (%2), %%mm0 \n\t"
00775 PAVGB" (%2, %3), %%mm1 \n\t"
00776 "movq %%mm0, (%2) \n\t"
00777 "movq %%mm1, (%2, %3) \n\t"
00778 "movq (%1, %3), %%mm1 \n\t"
00779 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00780 PAVGB" 1(%1, %3), %%mm1 \n\t"
00781 PAVGB" 1(%1, %%"REG_a"), %%mm0 \n\t"
00782 "add %%"REG_a", %2 \n\t"
00783 "add %%"REG_a", %1 \n\t"
00784 PAVGB" %%mm1, %%mm2 \n\t"
00785 PAVGB" %%mm0, %%mm1 \n\t"
00786 PAVGB" (%2), %%mm2 \n\t"
00787 PAVGB" (%2, %3), %%mm1 \n\t"
00788 "movq %%mm2, (%2) \n\t"
00789 "movq %%mm1, (%2, %3) \n\t"
00790 "add %%"REG_a", %2 \n\t"
00791 "subl $4, %0 \n\t"
00792 "jnz 1b \n\t"
00793 :"+g"(h), "+S"(pixels), "+D"(block)
00794 :"r" ((long)line_size)
00795 :"%"REG_a, "memory");
00796 }
00797
00798 static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00799 {
00800 do {
00801 asm volatile(
00802 "movd (%1), %%mm0 \n\t"
00803 "movd (%1, %2), %%mm1 \n\t"
00804 "movd (%1, %2, 2), %%mm2 \n\t"
00805 "movd (%1, %3), %%mm3 \n\t"
00806 PAVGB" (%0), %%mm0 \n\t"
00807 PAVGB" (%0, %2), %%mm1 \n\t"
00808 PAVGB" (%0, %2, 2), %%mm2 \n\t"
00809 PAVGB" (%0, %3), %%mm3 \n\t"
00810 "movd %%mm0, (%1) \n\t"
00811 "movd %%mm1, (%1, %2) \n\t"
00812 "movd %%mm2, (%1, %2, 2) \n\t"
00813 "movd %%mm3, (%1, %3) \n\t"
00814 ::"S"(pixels), "D"(block),
00815 "r" ((long)line_size), "r"(3L*line_size)
00816 :"memory");
00817 block += 4*line_size;
00818 pixels += 4*line_size;
00819 h -= 4;
00820 } while(h > 0);
00821 }
00822
00823
00824 static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00825 DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
00826 DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
00827 }
00828 static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00829 DEF(put_pixels8_y2)(block , pixels , line_size, h);
00830 DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
00831 }
00832 static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00833 DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
00834 DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
00835 }
00836 static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00837 DEF(avg_pixels8)(block , pixels , line_size, h);
00838 DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
00839 }
00840 static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00841 DEF(avg_pixels8_x2)(block , pixels , line_size, h);
00842 DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
00843 }
00844 static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00845 DEF(avg_pixels8_y2)(block , pixels , line_size, h);
00846 DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
00847 }
00848 static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00849 DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
00850 DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
00851 }
00852
00853 #define QPEL_2TAP_L3(OPNAME) \
00854 static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
00855 asm volatile(\
00856 "1: \n\t"\
00857 "movq (%1,%2), %%mm0 \n\t"\
00858 "movq 8(%1,%2), %%mm1 \n\t"\
00859 PAVGB" (%1,%3), %%mm0 \n\t"\
00860 PAVGB" 8(%1,%3), %%mm1 \n\t"\
00861 PAVGB" (%1), %%mm0 \n\t"\
00862 PAVGB" 8(%1), %%mm1 \n\t"\
00863 STORE_OP( (%1,%4),%%mm0)\
00864 STORE_OP(8(%1,%4),%%mm1)\
00865 "movq %%mm0, (%1,%4) \n\t"\
00866 "movq %%mm1, 8(%1,%4) \n\t"\
00867 "add %5, %1 \n\t"\
00868 "decl %0 \n\t"\
00869 "jnz 1b \n\t"\
00870 :"+g"(h), "+r"(src)\
00871 :"r"((long)off1), "r"((long)off2),\
00872 "r"((long)(dst-src)), "r"((long)stride)\
00873 :"memory"\
00874 );\
00875 }\
00876 static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
00877 asm volatile(\
00878 "1: \n\t"\
00879 "movq (%1,%2), %%mm0 \n\t"\
00880 PAVGB" (%1,%3), %%mm0 \n\t"\
00881 PAVGB" (%1), %%mm0 \n\t"\
00882 STORE_OP((%1,%4),%%mm0)\
00883 "movq %%mm0, (%1,%4) \n\t"\
00884 "add %5, %1 \n\t"\
00885 "decl %0 \n\t"\
00886 "jnz 1b \n\t"\
00887 :"+g"(h), "+r"(src)\
00888 :"r"((long)off1), "r"((long)off2),\
00889 "r"((long)(dst-src)), "r"((long)stride)\
00890 :"memory"\
00891 );\
00892 }
00893
00894 #define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
00895 QPEL_2TAP_L3(avg_)
00896 #undef STORE_OP
00897 #define STORE_OP(a,b)
00898 QPEL_2TAP_L3(put_)
00899 #undef STORE_OP
00900 #undef QPEL_2TAP_L3