00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "dsputil.h"
00024
00025 #include "gcc_fixes.h"
00026
00027 #include "dsputil_ppc.h"
00028 #include "util_altivec.h"
00029
00030
00031
00032
00033
00034 #define GMC1_PERF_COND (h==8)
00035 void gmc1_altivec(uint8_t *dst , uint8_t *src , int stride, int h, int x16, int y16, int rounder)
00036 {
00037 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
00038 const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) =
00039 {rounder, rounder, rounder, rounder,
00040 rounder, rounder, rounder, rounder};
00041 const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
00042 {
00043 (16-x16)*(16-y16),
00044 ( x16)*(16-y16),
00045 (16-x16)*( y16),
00046 ( x16)*( y16),
00047 0, 0, 0, 0
00048 };
00049 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
00050 register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8);
00051 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
00052 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
00053 int i;
00054 unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
00055 unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
00056
00057
00058 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
00059
00060 tempA = vec_ld(0, (unsigned short*)ABCD);
00061 Av = vec_splat(tempA, 0);
00062 Bv = vec_splat(tempA, 1);
00063 Cv = vec_splat(tempA, 2);
00064 Dv = vec_splat(tempA, 3);
00065
00066 rounderV = vec_ld(0, (unsigned short*)rounder_a);
00067
00068
00069
00070
00071
00072
00073 src_0 = vec_ld(0, src);
00074 src_1 = vec_ld(16, src);
00075 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
00076
00077 if (src_really_odd != 0x0000000F)
00078 {
00079 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
00080 }
00081 else
00082 {
00083 srcvB = src_1;
00084 }
00085 srcvA = vec_mergeh(vczero, srcvA);
00086 srcvB = vec_mergeh(vczero, srcvB);
00087
00088 for(i=0; i<h; i++)
00089 {
00090 dst_odd = (unsigned long)dst & 0x0000000F;
00091 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
00092
00093 dstv = vec_ld(0, dst);
00094
00095
00096
00097
00098
00099 src_0 = vec_ld(stride + 0, src);
00100 src_1 = vec_ld(stride + 16, src);
00101 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
00102
00103 if (src_really_odd != 0x0000000F)
00104 {
00105 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
00106 }
00107 else
00108 {
00109 srcvD = src_1;
00110 }
00111
00112 srcvC = vec_mergeh(vczero, srcvC);
00113 srcvD = vec_mergeh(vczero, srcvD);
00114
00115
00116
00117
00118
00119 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
00120 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
00121 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
00122 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
00123
00124 srcvA = srcvC;
00125 srcvB = srcvD;
00126
00127 tempD = vec_sr(tempD, vcsr8);
00128
00129 dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
00130
00131 if (dst_odd)
00132 {
00133 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
00134 }
00135 else
00136 {
00137 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
00138 }
00139
00140 vec_st(dstv2, 0, dst);
00141
00142 dst += stride;
00143 src += stride;
00144 }
00145
00146 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
00147 }