00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include "dsputil.h"
00027
00028 #include "gcc_fixes.h"
00029
00030 #include "dsputil_altivec.h"
00031
00032 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
00033 int size) {
00034 int i, size16;
00035 vector signed char vpix1;
00036 vector signed short vpix2, vdiff, vpix1l,vpix1h;
00037 union { vector signed int vscore;
00038 int32_t score[4];
00039 } u;
00040 u.vscore = vec_splat_s32(0);
00041
00042
00043
00044 #define vec_unaligned_load(b) \
00045 vec_perm(vec_ld(0,b),vec_ld(15,b),vec_lvsl(0, b));
00046
00047 size16 = size >> 4;
00048 while(size16) {
00049
00050
00051
00052 vpix1 = vec_unaligned_load(pix1);
00053 vpix2 = vec_unaligned_load(pix2);
00054 pix2 += 8;
00055
00056 vpix1h = vec_unpackh(vpix1);
00057 vdiff = vec_sub(vpix1h, vpix2);
00058 vpix1l = vec_unpackl(vpix1);
00059
00060 vpix2 = vec_unaligned_load(pix2);
00061 u.vscore = vec_msum(vdiff, vdiff, u.vscore);
00062 vdiff = vec_sub(vpix1l, vpix2);
00063 u.vscore = vec_msum(vdiff, vdiff, u.vscore);
00064 pix1 += 16;
00065 pix2 += 8;
00066 size16--;
00067 }
00068 u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
00069
00070 size %= 16;
00071 for (i = 0; i < size; i++) {
00072 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
00073 }
00074 return u.score[3];
00075 }
00076
00077 void int_init_altivec(DSPContext* c, AVCodecContext *avctx)
00078 {
00079 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
00080 }