00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef FFMPEG_DSPUTIL_MMX_H
00023 #define FFMPEG_DSPUTIL_MMX_H
00024
00025 #include <stdint.h>
00026
00027 typedef struct { uint64_t a, b; } xmm_t;
00028
00029 extern const uint64_t ff_bone;
00030 extern const uint64_t ff_wtwo;
00031
00032 extern const uint64_t ff_pdw_80000000[2];
00033
00034 extern const uint64_t ff_pw_3;
00035 extern const uint64_t ff_pw_4;
00036 extern const xmm_t ff_pw_5;
00037 extern const uint64_t ff_pw_8;
00038 extern const uint64_t ff_pw_15;
00039 extern const xmm_t ff_pw_16;
00040 extern const uint64_t ff_pw_20;
00041 extern const xmm_t ff_pw_32;
00042 extern const uint64_t ff_pw_42;
00043 extern const uint64_t ff_pw_64;
00044 extern const uint64_t ff_pw_96;
00045 extern const uint64_t ff_pw_128;
00046
00047 extern const uint64_t ff_pb_1;
00048 extern const uint64_t ff_pb_3;
00049 extern const uint64_t ff_pb_7;
00050 extern const uint64_t ff_pb_3F;
00051 extern const uint64_t ff_pb_A1;
00052 extern const uint64_t ff_pb_FC;
00053
00054 extern const double ff_pd_1[2];
00055 extern const double ff_pd_2[2];
00056
00057
00058 #define SUMSUB_BA( a, b ) \
00059 "paddw "#b", "#a" \n\t"\
00060 "paddw "#b", "#b" \n\t"\
00061 "psubw "#a", "#b" \n\t"
00062
00063 #define SBUTTERFLY(a,b,t,n,m)\
00064 "mov" #m " " #a ", " #t " \n\t" \
00065 "punpckl" #n " " #b ", " #a " \n\t" \
00066 "punpckh" #n " " #b ", " #t " \n\t" \
00067
00068 #define TRANSPOSE4(a,b,c,d,t)\
00069 SBUTTERFLY(a,b,t,wd,q) \
00070 SBUTTERFLY(c,d,b,wd,q) \
00071 SBUTTERFLY(a,c,d,dq,q) \
00072 SBUTTERFLY(t,b,c,dq,q)
00073
00074 #ifdef ARCH_X86_64
00075
00076 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
00077 SBUTTERFLY(a,b,%%xmm8,wd,dqa)\
00078 SBUTTERFLY(c,d,b,wd,dqa)\
00079 SBUTTERFLY(e,f,d,wd,dqa)\
00080 SBUTTERFLY(g,h,f,wd,dqa)\
00081 SBUTTERFLY(a,c,h,dq,dqa)\
00082 SBUTTERFLY(%%xmm8,b,c,dq,dqa)\
00083 SBUTTERFLY(e,g,b,dq,dqa)\
00084 SBUTTERFLY(d,f,g,dq,dqa)\
00085 SBUTTERFLY(a,e,f,qdq,dqa)\
00086 SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\
00087 SBUTTERFLY(h,b,d,qdq,dqa)\
00088 SBUTTERFLY(c,g,b,qdq,dqa)\
00089 "movdqa %%xmm8, "#g" \n\t"
00090 #else
00091 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
00092 "movdqa "#h", "#t" \n\t"\
00093 SBUTTERFLY(a,b,h,wd,dqa)\
00094 "movdqa "#h", 16"#t" \n\t"\
00095 "movdqa "#t", "#h" \n\t"\
00096 SBUTTERFLY(c,d,b,wd,dqa)\
00097 SBUTTERFLY(e,f,d,wd,dqa)\
00098 SBUTTERFLY(g,h,f,wd,dqa)\
00099 SBUTTERFLY(a,c,h,dq,dqa)\
00100 "movdqa "#h", "#t" \n\t"\
00101 "movdqa 16"#t", "#h" \n\t"\
00102 SBUTTERFLY(h,b,c,dq,dqa)\
00103 SBUTTERFLY(e,g,b,dq,dqa)\
00104 SBUTTERFLY(d,f,g,dq,dqa)\
00105 SBUTTERFLY(a,e,f,qdq,dqa)\
00106 SBUTTERFLY(h,d,e,qdq,dqa)\
00107 "movdqa "#h", 16"#t" \n\t"\
00108 "movdqa "#t", "#h" \n\t"\
00109 SBUTTERFLY(h,b,d,qdq,dqa)\
00110 SBUTTERFLY(c,g,b,qdq,dqa)\
00111 "movdqa 16"#t", "#g" \n\t"
00112 #endif
00113
00114 #endif