00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "avcodec.h"
00022 #include "dsputil.h"
00023 #include "snow.h"
00024
00025 #include "rangecoder.h"
00026 #include "mathops.h"
00027
00028 #include "mpegvideo.h"
00029
00030 #undef NDEBUG
00031 #include <assert.h>
00032
00033 static const int8_t quant3[256]={
00034 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00042 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00043 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00044 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00049 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00050 };
00051 static const int8_t quant3b[256]={
00052 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00053 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00060 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00061 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00062 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00068 };
00069 static const int8_t quant3bA[256]={
00070 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00071 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00072 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00073 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00086 };
00087 static const int8_t quant5[256]={
00088 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00091 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00096 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00097 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00098 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00104 };
00105 static const int8_t quant7[256]={
00106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00122 };
00123 static const int8_t quant9[256]={
00124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00140 };
00141 static const int8_t quant11[256]={
00142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00158 };
00159 static const int8_t quant13[256]={
00160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00176 };
00177
00178 #if 0 //64*cubic
00179 static const uint8_t obmc32[1024]={
00180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00212
00213 };
00214 static const uint8_t obmc16[256]={
00215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00231
00232 };
00233 #elif 1 // 64*linear
00234 static const uint8_t obmc32[1024]={
00235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00267
00268 };
00269 static const uint8_t obmc16[256]={
00270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00286
00287 };
00288 #else //64*cos
00289 static const uint8_t obmc32[1024]={
00290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00322
00323 };
00324 static const uint8_t obmc16[256]={
00325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00341
00342 };
00343 #endif
00344
00345
00346 static const uint8_t obmc8[64]={
00347 4, 12, 20, 28, 28, 20, 12, 4,
00348 12, 36, 60, 84, 84, 60, 36, 12,
00349 20, 60,100,140,140,100, 60, 20,
00350 28, 84,140,196,196,140, 84, 28,
00351 28, 84,140,196,196,140, 84, 28,
00352 20, 60,100,140,140,100, 60, 20,
00353 12, 36, 60, 84, 84, 60, 36, 12,
00354 4, 12, 20, 28, 28, 20, 12, 4,
00355
00356 };
00357
00358
00359 static const uint8_t obmc4[16]={
00360 16, 48, 48, 16,
00361 48,144,144, 48,
00362 48,144,144, 48,
00363 16, 48, 48, 16,
00364
00365 };
00366
00367 static const uint8_t * const obmc_tab[4]={
00368 obmc32, obmc16, obmc8, obmc4
00369 };
00370
00371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00372
00373 typedef struct BlockNode{
00374 int16_t mx;
00375 int16_t my;
00376 uint8_t ref;
00377 uint8_t color[3];
00378 uint8_t type;
00379
00380 #define BLOCK_INTRA 1
00381 #define BLOCK_OPT 2
00382
00383 uint8_t level;
00384 }BlockNode;
00385
00386 static const BlockNode null_block= {
00387 .color= {128,128,128},
00388 .mx= 0,
00389 .my= 0,
00390 .ref= 0,
00391 .type= 0,
00392 .level= 0,
00393 };
00394
00395 #define LOG2_MB_SIZE 4
00396 #define MB_SIZE (1<<LOG2_MB_SIZE)
00397 #define ENCODER_EXTRA_BITS 4
00398 #define HTAPS_MAX 8
00399
00400 typedef struct x_and_coeff{
00401 int16_t x;
00402 uint16_t coeff;
00403 } x_and_coeff;
00404
00405 typedef struct SubBand{
00406 int level;
00407 int stride;
00408 int width;
00409 int height;
00410 int qlog;
00411 DWTELEM *buf;
00412 IDWTELEM *ibuf;
00413 int buf_x_offset;
00414 int buf_y_offset;
00415 int stride_line;
00416 x_and_coeff * x_coeff;
00417 struct SubBand *parent;
00418 uint8_t state[ 7 + 512][32];
00419 }SubBand;
00420
00421 typedef struct Plane{
00422 int width;
00423 int height;
00424 SubBand band[MAX_DECOMPOSITIONS][4];
00425
00426 int htaps;
00427 int8_t hcoeff[HTAPS_MAX/2];
00428 int diag_mc;
00429 int fast_mc;
00430
00431 int last_htaps;
00432 int8_t last_hcoeff[HTAPS_MAX/2];
00433 int last_diag_mc;
00434 }Plane;
00435
00436 typedef struct SnowContext{
00437
00438
00439 AVCodecContext *avctx;
00440 RangeCoder c;
00441 DSPContext dsp;
00442 AVFrame new_picture;
00443 AVFrame input_picture;
00444 AVFrame current_picture;
00445 AVFrame last_picture[MAX_REF_FRAMES];
00446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00447 AVFrame mconly_picture;
00448
00449 uint8_t header_state[32];
00450 uint8_t block_state[128 + 32*128];
00451 int keyframe;
00452 int always_reset;
00453 int version;
00454 int spatial_decomposition_type;
00455 int last_spatial_decomposition_type;
00456 int temporal_decomposition_type;
00457 int spatial_decomposition_count;
00458 int last_spatial_decomposition_count;
00459 int temporal_decomposition_count;
00460 int max_ref_frames;
00461 int ref_frames;
00462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00463 uint32_t *ref_scores[MAX_REF_FRAMES];
00464 DWTELEM *spatial_dwt_buffer;
00465 IDWTELEM *spatial_idwt_buffer;
00466 int colorspace_type;
00467 int chroma_h_shift;
00468 int chroma_v_shift;
00469 int spatial_scalability;
00470 int qlog;
00471 int last_qlog;
00472 int lambda;
00473 int lambda2;
00474 int pass1_rc;
00475 int mv_scale;
00476 int last_mv_scale;
00477 int qbias;
00478 int last_qbias;
00479 #define QBIAS_SHIFT 3
00480 int b_width;
00481 int b_height;
00482 int block_max_depth;
00483 int last_block_max_depth;
00484 Plane plane[MAX_PLANES];
00485 BlockNode *block;
00486 #define ME_CACHE_SIZE 1024
00487 int me_cache[ME_CACHE_SIZE];
00488 int me_cache_generation;
00489 slice_buffer sb;
00490
00491 MpegEncContext m;
00492
00493 uint8_t *scratchbuf;
00494 }SnowContext;
00495
00496 typedef struct {
00497 IDWTELEM *b0;
00498 IDWTELEM *b1;
00499 IDWTELEM *b2;
00500 IDWTELEM *b3;
00501 int y;
00502 } DWTCompose;
00503
00504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
00505
00506
00507 static void iterative_me(SnowContext *s);
00508
00509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
00510 {
00511 int i;
00512
00513 buf->base_buffer = base_buffer;
00514 buf->line_count = line_count;
00515 buf->line_width = line_width;
00516 buf->data_count = max_allocated_lines;
00517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
00518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
00519
00520 for(i = 0; i < max_allocated_lines; i++){
00521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
00522 }
00523
00524 buf->data_stack_top = max_allocated_lines - 1;
00525 }
00526
00527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
00528 {
00529 int offset;
00530 IDWTELEM * buffer;
00531
00532 assert(buf->data_stack_top >= 0);
00533
00534 if (buf->line[line])
00535 return buf->line[line];
00536
00537 offset = buf->line_width * line;
00538 buffer = buf->data_stack[buf->data_stack_top];
00539 buf->data_stack_top--;
00540 buf->line[line] = buffer;
00541
00542 return buffer;
00543 }
00544
00545 static void slice_buffer_release(slice_buffer * buf, int line)
00546 {
00547 int offset;
00548 IDWTELEM * buffer;
00549
00550 assert(line >= 0 && line < buf->line_count);
00551 assert(buf->line[line]);
00552
00553 offset = buf->line_width * line;
00554 buffer = buf->line[line];
00555 buf->data_stack_top++;
00556 buf->data_stack[buf->data_stack_top] = buffer;
00557 buf->line[line] = NULL;
00558 }
00559
00560 static void slice_buffer_flush(slice_buffer * buf)
00561 {
00562 int i;
00563 for(i = 0; i < buf->line_count; i++){
00564 if (buf->line[i])
00565 slice_buffer_release(buf, i);
00566 }
00567 }
00568
00569 static void slice_buffer_destroy(slice_buffer * buf)
00570 {
00571 int i;
00572 slice_buffer_flush(buf);
00573
00574 for(i = buf->data_count - 1; i >= 0; i--){
00575 av_freep(&buf->data_stack[i]);
00576 }
00577 av_freep(&buf->data_stack);
00578 av_freep(&buf->line);
00579 }
00580
00581 #ifdef __sgi
00582
00583 #undef qexp
00584 #endif
00585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00586 static uint8_t qexp[QROOT];
00587
00588 static inline int mirror(int v, int m){
00589 while((unsigned)v > (unsigned)m){
00590 v=-v;
00591 if(v<0) v+= 2*m;
00592 }
00593 return v;
00594 }
00595
00596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00597 int i;
00598
00599 if(v){
00600 const int a= FFABS(v);
00601 const int e= av_log2(a);
00602 #if 1
00603 const int el= FFMIN(e, 10);
00604 put_rac(c, state+0, 0);
00605
00606 for(i=0; i<el; i++){
00607 put_rac(c, state+1+i, 1);
00608 }
00609 for(; i<e; i++){
00610 put_rac(c, state+1+9, 1);
00611 }
00612 put_rac(c, state+1+FFMIN(i,9), 0);
00613
00614 for(i=e-1; i>=el; i--){
00615 put_rac(c, state+22+9, (a>>i)&1);
00616 }
00617 for(; i>=0; i--){
00618 put_rac(c, state+22+i, (a>>i)&1);
00619 }
00620
00621 if(is_signed)
00622 put_rac(c, state+11 + el, v < 0);
00623 #else
00624
00625 put_rac(c, state+0, 0);
00626 if(e<=9){
00627 for(i=0; i<e; i++){
00628 put_rac(c, state+1+i, 1);
00629 }
00630 put_rac(c, state+1+i, 0);
00631
00632 for(i=e-1; i>=0; i--){
00633 put_rac(c, state+22+i, (a>>i)&1);
00634 }
00635
00636 if(is_signed)
00637 put_rac(c, state+11 + e, v < 0);
00638 }else{
00639 for(i=0; i<e; i++){
00640 put_rac(c, state+1+FFMIN(i,9), 1);
00641 }
00642 put_rac(c, state+1+FFMIN(i,9), 0);
00643
00644 for(i=e-1; i>=0; i--){
00645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1);
00646 }
00647
00648 if(is_signed)
00649 put_rac(c, state+11 + FFMIN(e,10), v < 0);
00650 }
00651 #endif
00652 }else{
00653 put_rac(c, state+0, 1);
00654 }
00655 }
00656
00657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00658 if(get_rac(c, state+0))
00659 return 0;
00660 else{
00661 int i, e, a;
00662 e= 0;
00663 while(get_rac(c, state+1 + FFMIN(e,9))){
00664 e++;
00665 }
00666
00667 a= 1;
00668 for(i=e-1; i>=0; i--){
00669 a += a + get_rac(c, state+22 + FFMIN(i,9));
00670 }
00671
00672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10)))
00673 return -a;
00674 else
00675 return a;
00676 }
00677 }
00678
00679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00680 int i;
00681 int r= log2>=0 ? 1<<log2 : 1;
00682
00683 assert(v>=0);
00684 assert(log2>=-4);
00685
00686 while(v >= r){
00687 put_rac(c, state+4+log2, 1);
00688 v -= r;
00689 log2++;
00690 if(log2>0) r+=r;
00691 }
00692 put_rac(c, state+4+log2, 0);
00693
00694 for(i=log2-1; i>=0; i--){
00695 put_rac(c, state+31-i, (v>>i)&1);
00696 }
00697 }
00698
00699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00700 int i;
00701 int r= log2>=0 ? 1<<log2 : 1;
00702 int v=0;
00703
00704 assert(log2>=-4);
00705
00706 while(get_rac(c, state+4+log2)){
00707 v+= r;
00708 log2++;
00709 if(log2>0) r+=r;
00710 }
00711
00712 for(i=log2-1; i>=0; i--){
00713 v+= get_rac(c, state+31-i)<<i;
00714 }
00715
00716 return v;
00717 }
00718
00719 static av_always_inline void
00720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00721 int dst_step, int src_step, int ref_step,
00722 int width, int mul, int add, int shift,
00723 int highpass, int inverse){
00724 const int mirror_left= !highpass;
00725 const int mirror_right= (width&1) ^ highpass;
00726 const int w= (width>>1) - 1 + (highpass & width);
00727 int i;
00728
00729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00730 if(mirror_left){
00731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00732 dst += dst_step;
00733 src += src_step;
00734 }
00735
00736 for(i=0; i<w; i++){
00737 dst[i*dst_step] =
00738 LIFT(src[i*src_step],
00739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00740 inverse);
00741 }
00742
00743 if(mirror_right){
00744 dst[w*dst_step] =
00745 LIFT(src[w*src_step],
00746 ((mul*2*ref[w*ref_step]+add)>>shift),
00747 inverse);
00748 }
00749 }
00750
00751 static av_always_inline void
00752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00753 int dst_step, int src_step, int ref_step,
00754 int width, int mul, int add, int shift,
00755 int highpass, int inverse){
00756 const int mirror_left= !highpass;
00757 const int mirror_right= (width&1) ^ highpass;
00758 const int w= (width>>1) - 1 + (highpass & width);
00759 int i;
00760
00761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00762 if(mirror_left){
00763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00764 dst += dst_step;
00765 src += src_step;
00766 }
00767
00768 for(i=0; i<w; i++){
00769 dst[i*dst_step] =
00770 LIFT(src[i*src_step],
00771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00772 inverse);
00773 }
00774
00775 if(mirror_right){
00776 dst[w*dst_step] =
00777 LIFT(src[w*src_step],
00778 ((mul*2*ref[w*ref_step]+add)>>shift),
00779 inverse);
00780 }
00781 }
00782
00783 #ifndef liftS
00784 static av_always_inline void
00785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00786 int dst_step, int src_step, int ref_step,
00787 int width, int mul, int add, int shift,
00788 int highpass, int inverse){
00789 const int mirror_left= !highpass;
00790 const int mirror_right= (width&1) ^ highpass;
00791 const int w= (width>>1) - 1 + (highpass & width);
00792 int i;
00793
00794 assert(shift == 4);
00795 #define LIFTS(src, ref, inv) \
00796 ((inv) ? \
00797 (src) + (((ref) + 4*(src))>>shift): \
00798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00799 if(mirror_left){
00800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00801 dst += dst_step;
00802 src += src_step;
00803 }
00804
00805 for(i=0; i<w; i++){
00806 dst[i*dst_step] =
00807 LIFTS(src[i*src_step],
00808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00809 inverse);
00810 }
00811
00812 if(mirror_right){
00813 dst[w*dst_step] =
00814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00815 }
00816 }
00817 static av_always_inline void
00818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00819 int dst_step, int src_step, int ref_step,
00820 int width, int mul, int add, int shift,
00821 int highpass, int inverse){
00822 const int mirror_left= !highpass;
00823 const int mirror_right= (width&1) ^ highpass;
00824 const int w= (width>>1) - 1 + (highpass & width);
00825 int i;
00826
00827 assert(shift == 4);
00828 #define LIFTS(src, ref, inv) \
00829 ((inv) ? \
00830 (src) + (((ref) + 4*(src))>>shift): \
00831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00832 if(mirror_left){
00833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00834 dst += dst_step;
00835 src += src_step;
00836 }
00837
00838 for(i=0; i<w; i++){
00839 dst[i*dst_step] =
00840 LIFTS(src[i*src_step],
00841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00842 inverse);
00843 }
00844
00845 if(mirror_right){
00846 dst[w*dst_step] =
00847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00848 }
00849 }
00850 #endif
00851
00852 static void horizontal_decompose53i(DWTELEM *b, int width){
00853 DWTELEM temp[width];
00854 const int width2= width>>1;
00855 int x;
00856 const int w2= (width+1)>>1;
00857
00858 for(x=0; x<width2; x++){
00859 temp[x ]= b[2*x ];
00860 temp[x+w2]= b[2*x + 1];
00861 }
00862 if(width&1)
00863 temp[x ]= b[2*x ];
00864 #if 0
00865 {
00866 int A1,A2,A3,A4;
00867 A2= temp[1 ];
00868 A4= temp[0 ];
00869 A1= temp[0+width2];
00870 A1 -= (A2 + A4)>>1;
00871 A4 += (A1 + 1)>>1;
00872 b[0+width2] = A1;
00873 b[0 ] = A4;
00874 for(x=1; x+1<width2; x+=2){
00875 A3= temp[x+width2];
00876 A4= temp[x+1 ];
00877 A3 -= (A2 + A4)>>1;
00878 A2 += (A1 + A3 + 2)>>2;
00879 b[x+width2] = A3;
00880 b[x ] = A2;
00881
00882 A1= temp[x+1+width2];
00883 A2= temp[x+2 ];
00884 A1 -= (A2 + A4)>>1;
00885 A4 += (A1 + A3 + 2)>>2;
00886 b[x+1+width2] = A1;
00887 b[x+1 ] = A4;
00888 }
00889 A3= temp[width-1];
00890 A3 -= A2;
00891 A2 += (A1 + A3 + 2)>>2;
00892 b[width -1] = A3;
00893 b[width2-1] = A2;
00894 }
00895 #else
00896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
00897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
00898 #endif
00899 }
00900
00901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00902 int i;
00903
00904 for(i=0; i<width; i++){
00905 b1[i] -= (b0[i] + b2[i])>>1;
00906 }
00907 }
00908
00909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00910 int i;
00911
00912 for(i=0; i<width; i++){
00913 b1[i] += (b0[i] + b2[i] + 2)>>2;
00914 }
00915 }
00916
00917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
00918 int y;
00919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
00920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
00921
00922 for(y=-2; y<height; y+=2){
00923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
00924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
00925
00926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
00927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
00928
00929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
00930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
00931
00932 b0=b2;
00933 b1=b3;
00934 }
00935 }
00936
00937 static void horizontal_decompose97i(DWTELEM *b, int width){
00938 DWTELEM temp[width];
00939 const int w2= (width+1)>>1;
00940
00941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
00942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
00943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
00944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
00945 }
00946
00947
00948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00949 int i;
00950
00951 for(i=0; i<width; i++){
00952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
00953 }
00954 }
00955
00956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00957 int i;
00958
00959 for(i=0; i<width; i++){
00960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
00961 }
00962 }
00963
00964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00965 int i;
00966
00967 for(i=0; i<width; i++){
00968 #ifdef liftS
00969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
00970 #else
00971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
00972 #endif
00973 }
00974 }
00975
00976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00977 int i;
00978
00979 for(i=0; i<width; i++){
00980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
00981 }
00982 }
00983
00984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
00985 int y;
00986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
00987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
00988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
00989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
00990
00991 for(y=-4; y<height; y+=2){
00992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
00993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
00994
00995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
00996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
00997
00998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
00999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
01000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
01001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
01002
01003 b0=b2;
01004 b1=b3;
01005 b2=b4;
01006 b3=b5;
01007 }
01008 }
01009
01010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01011 int level;
01012
01013 for(level=0; level<decomposition_count; level++){
01014 switch(type){
01015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
01016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
01017 }
01018 }
01019 }
01020
01021 static void horizontal_compose53i(IDWTELEM *b, int width){
01022 IDWTELEM temp[width];
01023 const int width2= width>>1;
01024 const int w2= (width+1)>>1;
01025 int x;
01026
01027 #if 0
01028 int A1,A2,A3,A4;
01029 A2= temp[1 ];
01030 A4= temp[0 ];
01031 A1= temp[0+width2];
01032 A1 -= (A2 + A4)>>1;
01033 A4 += (A1 + 1)>>1;
01034 b[0+width2] = A1;
01035 b[0 ] = A4;
01036 for(x=1; x+1<width2; x+=2){
01037 A3= temp[x+width2];
01038 A4= temp[x+1 ];
01039 A3 -= (A2 + A4)>>1;
01040 A2 += (A1 + A3 + 2)>>2;
01041 b[x+width2] = A3;
01042 b[x ] = A2;
01043
01044 A1= temp[x+1+width2];
01045 A2= temp[x+2 ];
01046 A1 -= (A2 + A4)>>1;
01047 A4 += (A1 + A3 + 2)>>2;
01048 b[x+1+width2] = A1;
01049 b[x+1 ] = A4;
01050 }
01051 A3= temp[width-1];
01052 A3 -= A2;
01053 A2 += (A1 + A3 + 2)>>2;
01054 b[width -1] = A3;
01055 b[width2-1] = A2;
01056 #else
01057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
01058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
01059 #endif
01060 for(x=0; x<width2; x++){
01061 b[2*x ]= temp[x ];
01062 b[2*x + 1]= temp[x+w2];
01063 }
01064 if(width&1)
01065 b[2*x ]= temp[x ];
01066 }
01067
01068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01069 int i;
01070
01071 for(i=0; i<width; i++){
01072 b1[i] += (b0[i] + b2[i])>>1;
01073 }
01074 }
01075
01076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01077 int i;
01078
01079 for(i=0; i<width; i++){
01080 b1[i] -= (b0[i] + b2[i] + 2)>>2;
01081 }
01082 }
01083
01084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
01085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
01086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
01087 cs->y = -1;
01088 }
01089
01090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
01091 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
01092 cs->b1 = buffer + mirror(-1 , height-1)*stride;
01093 cs->y = -1;
01094 }
01095
01096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
01097 int y= cs->y;
01098
01099 IDWTELEM *b0= cs->b0;
01100 IDWTELEM *b1= cs->b1;
01101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
01102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
01103
01104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01106
01107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01109
01110 cs->b0 = b2;
01111 cs->b1 = b3;
01112 cs->y += 2;
01113 }
01114
01115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
01116 int y= cs->y;
01117 IDWTELEM *b0= cs->b0;
01118 IDWTELEM *b1= cs->b1;
01119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
01120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
01121
01122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01124
01125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01127
01128 cs->b0 = b2;
01129 cs->b1 = b3;
01130 cs->y += 2;
01131 }
01132
01133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
01134 DWTCompose cs;
01135 spatial_compose53i_init(&cs, buffer, height, stride);
01136 while(cs.y <= height)
01137 spatial_compose53i_dy(&cs, buffer, width, height, stride);
01138 }
01139
01140
01141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
01142 IDWTELEM temp[width];
01143 const int w2= (width+1)>>1;
01144
01145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
01146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
01147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
01148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
01149 }
01150
01151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01152 int i;
01153
01154 for(i=0; i<width; i++){
01155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01156 }
01157 }
01158
01159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01160 int i;
01161
01162 for(i=0; i<width; i++){
01163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
01164 }
01165 }
01166
01167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01168 int i;
01169
01170 for(i=0; i<width; i++){
01171 #ifdef liftS
01172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
01173 #else
01174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
01175 #endif
01176 }
01177 }
01178
01179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01180 int i;
01181
01182 for(i=0; i<width; i++){
01183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
01184 }
01185 }
01186
01187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
01188 int i;
01189
01190 for(i=0; i<width; i++){
01191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
01192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
01193 #ifdef liftS
01194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
01195 #else
01196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
01197 #endif
01198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01199 }
01200 }
01201
01202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
01203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
01204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
01205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
01206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
01207 cs->y = -3;
01208 }
01209
01210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
01211 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
01212 cs->b1 = buffer + mirror(-3 , height-1)*stride;
01213 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
01214 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
01215 cs->y = -3;
01216 }
01217
01218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
01219 int y = cs->y;
01220
01221 IDWTELEM *b0= cs->b0;
01222 IDWTELEM *b1= cs->b1;
01223 IDWTELEM *b2= cs->b2;
01224 IDWTELEM *b3= cs->b3;
01225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
01226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
01227
01228 if(y>0 && y+4<height){
01229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
01230 }else{
01231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01235 }
01236
01237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
01238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
01239
01240 cs->b0=b2;
01241 cs->b1=b3;
01242 cs->b2=b4;
01243 cs->b3=b5;
01244 cs->y += 2;
01245 }
01246
01247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
01248 int y = cs->y;
01249 IDWTELEM *b0= cs->b0;
01250 IDWTELEM *b1= cs->b1;
01251 IDWTELEM *b2= cs->b2;
01252 IDWTELEM *b3= cs->b3;
01253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01255
01256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01260
01261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
01262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
01263
01264 cs->b0=b2;
01265 cs->b1=b3;
01266 cs->b2=b4;
01267 cs->b3=b5;
01268 cs->y += 2;
01269 }
01270
01271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
01272 DWTCompose cs;
01273 spatial_compose97i_init(&cs, buffer, height, stride);
01274 while(cs.y <= height)
01275 spatial_compose97i_dy(&cs, buffer, width, height, stride);
01276 }
01277
01278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
01279 int level;
01280 for(level=decomposition_count-1; level>=0; level--){
01281 switch(type){
01282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01284 }
01285 }
01286 }
01287
01288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01289 int level;
01290 for(level=decomposition_count-1; level>=0; level--){
01291 switch(type){
01292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
01293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
01294 }
01295 }
01296 }
01297
01298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
01299 const int support = type==1 ? 3 : 5;
01300 int level;
01301 if(type==2) return;
01302
01303 for(level=decomposition_count-1; level>=0; level--){
01304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01305 switch(type){
01306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01307 break;
01308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01309 break;
01310 }
01311 }
01312 }
01313 }
01314
01315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
01316 const int support = type==1 ? 3 : 5;
01317 int level;
01318 if(type==2) return;
01319
01320 for(level=decomposition_count-1; level>=0; level--){
01321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01322 switch(type){
01323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01324 break;
01325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01326 break;
01327 }
01328 }
01329 }
01330 }
01331
01332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01333 DWTCompose cs[MAX_DECOMPOSITIONS];
01334 int y;
01335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
01336 for(y=0; y<height; y+=4)
01337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
01338 }
01339
01340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01341 const int w= b->width;
01342 const int h= b->height;
01343 int x, y;
01344
01345 if(1){
01346 int run=0;
01347 int runs[w*h];
01348 int run_index=0;
01349 int max_index;
01350
01351 for(y=0; y<h; y++){
01352 for(x=0; x<w; x++){
01353 int v, p=0;
01354 int l=0, lt=0, t=0, rt=0;
01355 v= src[x + y*stride];
01356
01357 if(y){
01358 t= src[x + (y-1)*stride];
01359 if(x){
01360 lt= src[x - 1 + (y-1)*stride];
01361 }
01362 if(x + 1 < w){
01363 rt= src[x + 1 + (y-1)*stride];
01364 }
01365 }
01366 if(x){
01367 l= src[x - 1 + y*stride];
01368
01369
01370
01371
01372 }
01373 if(parent){
01374 int px= x>>1;
01375 int py= y>>1;
01376 if(px<b->parent->width && py<b->parent->height)
01377 p= parent[px + py*2*stride];
01378 }
01379 if(!(l|lt|t|rt|p)){
01380 if(v){
01381 runs[run_index++]= run;
01382 run=0;
01383 }else{
01384 run++;
01385 }
01386 }
01387 }
01388 }
01389 max_index= run_index;
01390 runs[run_index++]= run;
01391 run_index=0;
01392 run= runs[run_index++];
01393
01394 put_symbol2(&s->c, b->state[30], max_index, 0);
01395 if(run_index <= max_index)
01396 put_symbol2(&s->c, b->state[1], run, 3);
01397
01398 for(y=0; y<h; y++){
01399 if(s->c.bytestream_end - s->c.bytestream < w*40){
01400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
01401 return -1;
01402 }
01403 for(x=0; x<w; x++){
01404 int v, p=0;
01405 int l=0, lt=0, t=0, rt=0;
01406 v= src[x + y*stride];
01407
01408 if(y){
01409 t= src[x + (y-1)*stride];
01410 if(x){
01411 lt= src[x - 1 + (y-1)*stride];
01412 }
01413 if(x + 1 < w){
01414 rt= src[x + 1 + (y-1)*stride];
01415 }
01416 }
01417 if(x){
01418 l= src[x - 1 + y*stride];
01419
01420
01421
01422
01423 }
01424 if(parent){
01425 int px= x>>1;
01426 int py= y>>1;
01427 if(px<b->parent->width && py<b->parent->height)
01428 p= parent[px + py*2*stride];
01429 }
01430 if(l|lt|t|rt|p){
01431 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01432
01433 put_rac(&s->c, &b->state[0][context], !!v);
01434 }else{
01435 if(!run){
01436 run= runs[run_index++];
01437
01438 if(run_index <= max_index)
01439 put_symbol2(&s->c, b->state[1], run, 3);
01440 assert(v);
01441 }else{
01442 run--;
01443 assert(!v);
01444 }
01445 }
01446 if(v){
01447 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01448 int l2= 2*FFABS(l) + (l<0);
01449 int t2= 2*FFABS(t) + (t<0);
01450
01451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
01452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
01453 }
01454 }
01455 }
01456 }
01457 return 0;
01458 }
01459
01460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01461
01462
01463 return encode_subband_c0run(s, b, src, parent, stride, orientation);
01464
01465 }
01466
01467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
01468 const int w= b->width;
01469 const int h= b->height;
01470 int x,y;
01471
01472 if(1){
01473 int run, runs;
01474 x_and_coeff *xc= b->x_coeff;
01475 x_and_coeff *prev_xc= NULL;
01476 x_and_coeff *prev2_xc= xc;
01477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
01478 x_and_coeff *prev_parent_xc= parent_xc;
01479
01480 runs= get_symbol2(&s->c, b->state[30], 0);
01481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01482 else run= INT_MAX;
01483
01484 for(y=0; y<h; y++){
01485 int v=0;
01486 int lt=0, t=0, rt=0;
01487
01488 if(y && prev_xc->x == 0){
01489 rt= prev_xc->coeff;
01490 }
01491 for(x=0; x<w; x++){
01492 int p=0;
01493 const int l= v;
01494
01495 lt= t; t= rt;
01496
01497 if(y){
01498 if(prev_xc->x <= x)
01499 prev_xc++;
01500 if(prev_xc->x == x + 1)
01501 rt= prev_xc->coeff;
01502 else
01503 rt=0;
01504 }
01505 if(parent_xc){
01506 if(x>>1 > parent_xc->x){
01507 parent_xc++;
01508 }
01509 if(x>>1 == parent_xc->x){
01510 p= parent_xc->coeff;
01511 }
01512 }
01513 if(l|lt|t|rt|p){
01514 int context= av_log2(3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
01515
01516 v=get_rac(&s->c, &b->state[0][context]);
01517 if(v){
01518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
01519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
01520
01521 xc->x=x;
01522 (xc++)->coeff= v;
01523 }
01524 }else{
01525 if(!run){
01526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01527 else run= INT_MAX;
01528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
01529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
01530
01531 xc->x=x;
01532 (xc++)->coeff= v;
01533 }else{
01534 int max_run;
01535 run--;
01536 v=0;
01537
01538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
01539 else max_run= FFMIN(run, w-x-1);
01540 if(parent_xc)
01541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
01542 x+= max_run;
01543 run-= max_run;
01544 }
01545 }
01546 }
01547 (xc++)->x= w+1;
01548 prev_xc= prev2_xc;
01549 prev2_xc= xc;
01550
01551 if(parent_xc){
01552 if(y&1){
01553 while(parent_xc->x != parent->width+1)
01554 parent_xc++;
01555 parent_xc++;
01556 prev_parent_xc= parent_xc;
01557 }else{
01558 parent_xc= prev_parent_xc;
01559 }
01560 }
01561 }
01562
01563 (xc++)->x= w+1;
01564 }
01565 }
01566
01567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
01568 const int w= b->width;
01569 int y;
01570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01573 int new_index = 0;
01574
01575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
01576 qadd= 0;
01577 qmul= 1<<QEXPSHIFT;
01578 }
01579
01580
01581 if (start_y != 0)
01582 new_index = save_state[0];
01583
01584
01585 for(y=start_y; y<h; y++){
01586 int x = 0;
01587 int v;
01588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
01589 memset(line, 0, b->width*sizeof(IDWTELEM));
01590 v = b->x_coeff[new_index].coeff;
01591 x = b->x_coeff[new_index++].x;
01592 while(x < w){
01593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
01594 register int u= -(v&1);
01595 line[x] = (t^u) - u;
01596
01597 v = b->x_coeff[new_index].coeff;
01598 x = b->x_coeff[new_index++].x;
01599 }
01600 }
01601
01602
01603 save_state[0] = new_index;
01604
01605 return;
01606 }
01607
01608 static void reset_contexts(SnowContext *s){
01609 int plane_index, level, orientation;
01610
01611 for(plane_index=0; plane_index<3; plane_index++){
01612 for(level=0; level<MAX_DECOMPOSITIONS; level++){
01613 for(orientation=level ? 1:0; orientation<4; orientation++){
01614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
01615 }
01616 }
01617 }
01618 memset(s->header_state, MID_STATE, sizeof(s->header_state));
01619 memset(s->block_state, MID_STATE, sizeof(s->block_state));
01620 }
01621
01622 static int alloc_blocks(SnowContext *s){
01623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
01624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
01625
01626 s->b_width = w;
01627 s->b_height= h;
01628
01629 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
01630 return 0;
01631 }
01632
01633 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
01634 uint8_t *bytestream= d->bytestream;
01635 uint8_t *bytestream_start= d->bytestream_start;
01636 *d= *s;
01637 d->bytestream= bytestream;
01638 d->bytestream_start= bytestream_start;
01639 }
01640
01641
01642 static int pix_sum(uint8_t * pix, int line_size, int w)
01643 {
01644 int s, i, j;
01645
01646 s = 0;
01647 for (i = 0; i < w; i++) {
01648 for (j = 0; j < w; j++) {
01649 s += pix[0];
01650 pix ++;
01651 }
01652 pix += line_size - w;
01653 }
01654 return s;
01655 }
01656
01657
01658 static int pix_norm1(uint8_t * pix, int line_size, int w)
01659 {
01660 int s, i, j;
01661 uint32_t *sq = ff_squareTbl + 256;
01662
01663 s = 0;
01664 for (i = 0; i < w; i++) {
01665 for (j = 0; j < w; j ++) {
01666 s += sq[pix[0]];
01667 pix ++;
01668 }
01669 pix += line_size - w;
01670 }
01671 return s;
01672 }
01673
01674 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
01675 const int w= s->b_width << s->block_max_depth;
01676 const int rem_depth= s->block_max_depth - level;
01677 const int index= (x + y*w) << rem_depth;
01678 const int block_w= 1<<rem_depth;
01679 BlockNode block;
01680 int i,j;
01681
01682 block.color[0]= l;
01683 block.color[1]= cb;
01684 block.color[2]= cr;
01685 block.mx= mx;
01686 block.my= my;
01687 block.ref= ref;
01688 block.type= type;
01689 block.level= level;
01690
01691 for(j=0; j<block_w; j++){
01692 for(i=0; i<block_w; i++){
01693 s->block[index + i + j*w]= block;
01694 }
01695 }
01696 }
01697
01698 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
01699 const int offset[3]= {
01700 y*c-> stride + x,
01701 ((y*c->uvstride + x)>>1),
01702 ((y*c->uvstride + x)>>1),
01703 };
01704 int i;
01705 for(i=0; i<3; i++){
01706 c->src[0][i]= src [i];
01707 c->ref[0][i]= ref [i] + offset[i];
01708 }
01709 assert(!ref_index);
01710 }
01711
01712 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
01713 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
01714 if(s->ref_frames == 1){
01715 *mx = mid_pred(left->mx, top->mx, tr->mx);
01716 *my = mid_pred(left->my, top->my, tr->my);
01717 }else{
01718 const int *scale = scale_mv_ref[ref];
01719 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
01720 (top ->mx * scale[top ->ref] + 128) >>8,
01721 (tr ->mx * scale[tr ->ref] + 128) >>8);
01722 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
01723 (top ->my * scale[top ->ref] + 128) >>8,
01724 (tr ->my * scale[tr ->ref] + 128) >>8);
01725 }
01726 }
01727
01728
01729 #define P_LEFT P[1]
01730 #define P_TOP P[2]
01731 #define P_TOPRIGHT P[3]
01732 #define P_MEDIAN P[4]
01733 #define P_MV1 P[9]
01734 #define FLAG_QPEL 1 //must be 1
01735
01736 static int encode_q_branch(SnowContext *s, int level, int x, int y){
01737 uint8_t p_buffer[1024];
01738 uint8_t i_buffer[1024];
01739 uint8_t p_state[sizeof(s->block_state)];
01740 uint8_t i_state[sizeof(s->block_state)];
01741 RangeCoder pc, ic;
01742 uint8_t *pbbak= s->c.bytestream;
01743 uint8_t *pbbak_start= s->c.bytestream_start;
01744 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
01745 const int w= s->b_width << s->block_max_depth;
01746 const int h= s->b_height << s->block_max_depth;
01747 const int rem_depth= s->block_max_depth - level;
01748 const int index= (x + y*w) << rem_depth;
01749 const int block_w= 1<<(LOG2_MB_SIZE - level);
01750 int trx= (x+1)<<rem_depth;
01751 int try= (y+1)<<rem_depth;
01752 const BlockNode *left = x ? &s->block[index-1] : &null_block;
01753 const BlockNode *top = y ? &s->block[index-w] : &null_block;
01754 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
01755 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
01756 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
01757 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
01758 int pl = left->color[0];
01759 int pcb= left->color[1];
01760 int pcr= left->color[2];
01761 int pmx, pmy;
01762 int mx=0, my=0;
01763 int l,cr,cb;
01764 const int stride= s->current_picture.linesize[0];
01765 const int uvstride= s->current_picture.linesize[1];
01766 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
01767 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
01768 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
01769 int P[10][2];
01770 int16_t last_mv[3][2];
01771 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL);
01772 const int shift= 1+qpel;
01773 MotionEstContext *c= &s->m.me;
01774 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01775 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
01776 int my_context= av_log2(2*FFABS(left->my - top->my));
01777 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01778 int ref, best_ref, ref_score, ref_mx, ref_my;
01779
01780 assert(sizeof(s->block_state) >= 256);
01781 if(s->keyframe){
01782 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01783 return 0;
01784 }
01785
01786
01787
01788 P_LEFT[0]= left->mx;
01789 P_LEFT[1]= left->my;
01790 P_TOP [0]= top->mx;
01791 P_TOP [1]= top->my;
01792 P_TOPRIGHT[0]= tr->mx;
01793 P_TOPRIGHT[1]= tr->my;
01794
01795 last_mv[0][0]= s->block[index].mx;
01796 last_mv[0][1]= s->block[index].my;
01797 last_mv[1][0]= right->mx;
01798 last_mv[1][1]= right->my;
01799 last_mv[2][0]= bottom->mx;
01800 last_mv[2][1]= bottom->my;
01801
01802 s->m.mb_stride=2;
01803 s->m.mb_x=
01804 s->m.mb_y= 0;
01805 c->skip= 0;
01806
01807 assert(c-> stride == stride);
01808 assert(c->uvstride == uvstride);
01809
01810 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
01811 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
01812 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
01813 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
01814
01815 c->xmin = - x*block_w - 16+2;
01816 c->ymin = - y*block_w - 16+2;
01817 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01818 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01819
01820 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
01821 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
01822 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
01823 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
01824 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
01825 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
01826 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
01827
01828 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
01829 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
01830
01831 if (!y) {
01832 c->pred_x= P_LEFT[0];
01833 c->pred_y= P_LEFT[1];
01834 } else {
01835 c->pred_x = P_MEDIAN[0];
01836 c->pred_y = P_MEDIAN[1];
01837 }
01838
01839 score= INT_MAX;
01840 best_ref= 0;
01841 for(ref=0; ref<s->ref_frames; ref++){
01842 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
01843
01844 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, 0, last_mv,
01845 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
01846
01847 assert(ref_mx >= c->xmin);
01848 assert(ref_mx <= c->xmax);
01849 assert(ref_my >= c->ymin);
01850 assert(ref_my <= c->ymax);
01851
01852 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
01853 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
01854 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
01855 if(s->ref_mvs[ref]){
01856 s->ref_mvs[ref][index][0]= ref_mx;
01857 s->ref_mvs[ref][index][1]= ref_my;
01858 s->ref_scores[ref][index]= ref_score;
01859 }
01860 if(score > ref_score){
01861 score= ref_score;
01862 best_ref= ref;
01863 mx= ref_mx;
01864 my= ref_my;
01865 }
01866 }
01867
01868
01869
01870 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
01871 pc= s->c;
01872 pc.bytestream_start=
01873 pc.bytestream= p_buffer;
01874 memcpy(p_state, s->block_state, sizeof(s->block_state));
01875
01876 if(level!=s->block_max_depth)
01877 put_rac(&pc, &p_state[4 + s_context], 1);
01878 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
01879 if(s->ref_frames > 1)
01880 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
01881 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
01882 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
01883 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
01884 p_len= pc.bytestream - pc.bytestream_start;
01885 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
01886
01887 block_s= block_w*block_w;
01888 sum = pix_sum(current_data[0], stride, block_w);
01889 l= (sum + block_s/2)/block_s;
01890 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
01891
01892 block_s= block_w*block_w>>2;
01893 sum = pix_sum(current_data[1], uvstride, block_w>>1);
01894 cb= (sum + block_s/2)/block_s;
01895
01896 sum = pix_sum(current_data[2], uvstride, block_w>>1);
01897 cr= (sum + block_s/2)/block_s;
01898
01899
01900 ic= s->c;
01901 ic.bytestream_start=
01902 ic.bytestream= i_buffer;
01903 memcpy(i_state, s->block_state, sizeof(s->block_state));
01904 if(level!=s->block_max_depth)
01905 put_rac(&ic, &i_state[4 + s_context], 1);
01906 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
01907 put_symbol(&ic, &i_state[32], l-pl , 1);
01908 put_symbol(&ic, &i_state[64], cb-pcb, 1);
01909 put_symbol(&ic, &i_state[96], cr-pcr, 1);
01910 i_len= ic.bytestream - ic.bytestream_start;
01911 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
01912
01913
01914 assert(iscore < 255*255*256 + s->lambda2*10);
01915 assert(iscore >= 0);
01916 assert(l>=0 && l<=255);
01917 assert(pl>=0 && pl<=255);
01918
01919 if(level==0){
01920 int varc= iscore >> 8;
01921 int vard= score >> 8;
01922 if (vard <= 64 || vard < varc)
01923 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
01924 else
01925 c->scene_change_score+= s->m.qscale;
01926 }
01927
01928 if(level!=s->block_max_depth){
01929 put_rac(&s->c, &s->block_state[4 + s_context], 0);
01930 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
01931 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
01932 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
01933 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
01934 score2+= s->lambda2>>FF_LAMBDA_SHIFT;
01935
01936 if(score2 < score && score2 < iscore)
01937 return score2;
01938 }
01939
01940 if(iscore < score){
01941 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
01942 memcpy(pbbak, i_buffer, i_len);
01943 s->c= ic;
01944 s->c.bytestream_start= pbbak_start;
01945 s->c.bytestream= pbbak + i_len;
01946 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
01947 memcpy(s->block_state, i_state, sizeof(s->block_state));
01948 return iscore;
01949 }else{
01950 memcpy(pbbak, p_buffer, p_len);
01951 s->c= pc;
01952 s->c.bytestream_start= pbbak_start;
01953 s->c.bytestream= pbbak + p_len;
01954 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
01955 memcpy(s->block_state, p_state, sizeof(s->block_state));
01956 return score;
01957 }
01958 }
01959
01960 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
01961 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
01962 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
01963 }else{
01964 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
01965 }
01966 }
01967
01968 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
01969 const int w= s->b_width << s->block_max_depth;
01970 const int rem_depth= s->block_max_depth - level;
01971 const int index= (x + y*w) << rem_depth;
01972 int trx= (x+1)<<rem_depth;
01973 BlockNode *b= &s->block[index];
01974 const BlockNode *left = x ? &s->block[index-1] : &null_block;
01975 const BlockNode *top = y ? &s->block[index-w] : &null_block;
01976 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
01977 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
01978 int pl = left->color[0];
01979 int pcb= left->color[1];
01980 int pcr= left->color[2];
01981 int pmx, pmy;
01982 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01983 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
01984 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
01985 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01986
01987 if(s->keyframe){
01988 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01989 return;
01990 }
01991
01992 if(level!=s->block_max_depth){
01993 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
01994 put_rac(&s->c, &s->block_state[4 + s_context], 1);
01995 }else{
01996 put_rac(&s->c, &s->block_state[4 + s_context], 0);
01997 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
01998 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
01999 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02000 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02001 return;
02002 }
02003 }
02004 if(b->type & BLOCK_INTRA){
02005 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02006 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02007 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02008 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02009 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02010 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02011 }else{
02012 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02013 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02014 if(s->ref_frames > 1)
02015 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02016 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02017 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02018 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02019 }
02020 }
02021
02022 static void decode_q_branch(SnowContext *s, int level, int x, int y){
02023 const int w= s->b_width << s->block_max_depth;
02024 const int rem_depth= s->block_max_depth - level;
02025 const int index= (x + y*w) << rem_depth;
02026 int trx= (x+1)<<rem_depth;
02027 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02028 const BlockNode *top = y ? &s->block[index-w] : &null_block;
02029 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
02030 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
02031 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02032
02033 if(s->keyframe){
02034 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
02035 return;
02036 }
02037
02038 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
02039 int type, mx, my;
02040 int l = left->color[0];
02041 int cb= left->color[1];
02042 int cr= left->color[2];
02043 int ref = 0;
02044 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02045 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
02046 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
02047
02048 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
02049
02050 if(type){
02051 pred_mv(s, &mx, &my, 0, left, top, tr);
02052 l += get_symbol(&s->c, &s->block_state[32], 1);
02053 cb+= get_symbol(&s->c, &s->block_state[64], 1);
02054 cr+= get_symbol(&s->c, &s->block_state[96], 1);
02055 }else{
02056 if(s->ref_frames > 1)
02057 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
02058 pred_mv(s, &mx, &my, ref, left, top, tr);
02059 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
02060 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
02061 }
02062 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
02063 }else{
02064 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
02065 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
02066 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
02067 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
02068 }
02069 }
02070
02071 static void encode_blocks(SnowContext *s, int search){
02072 int x, y;
02073 int w= s->b_width;
02074 int h= s->b_height;
02075
02076 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
02077 iterative_me(s);
02078
02079 for(y=0; y<h; y++){
02080 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){
02081 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02082 return;
02083 }
02084 for(x=0; x<w; x++){
02085 if(s->avctx->me_method == ME_ITER || !search)
02086 encode_q_branch2(s, 0, x, y);
02087 else
02088 encode_q_branch (s, 0, x, y);
02089 }
02090 }
02091 }
02092
02093 static void decode_blocks(SnowContext *s){
02094 int x, y;
02095 int w= s->b_width;
02096 int h= s->b_height;
02097
02098 for(y=0; y<h; y++){
02099 for(x=0; x<w; x++){
02100 decode_q_branch(s, 0, x, y);
02101 }
02102 }
02103 }
02104
02105 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
02106 static const uint8_t weight[64]={
02107 8,7,6,5,4,3,2,1,
02108 7,7,0,0,0,0,0,1,
02109 6,0,6,0,0,0,2,0,
02110 5,0,0,5,0,3,0,0,
02111 4,0,0,0,4,0,0,0,
02112 3,0,0,5,0,3,0,0,
02113 2,0,6,0,0,0,2,0,
02114 1,7,0,0,0,0,0,1,
02115 };
02116
02117 static const uint8_t brane[256]={
02118 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
02119 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
02120 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
02121 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
02122 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
02123 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
02124 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
02125 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
02126 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
02127 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
02128 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
02129 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
02130 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
02131 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
02132 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
02133 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
02134 };
02135
02136 static const uint8_t needs[16]={
02137 0,1,0,0,
02138 2,4,2,0,
02139 0,1,0,0,
02140 15
02141 };
02142
02143 int x, y, b, r, l;
02144 int16_t tmpIt [64*(32+HTAPS_MAX)];
02145 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
02146 int16_t *tmpI= tmpIt;
02147 uint8_t *tmp2= tmp2t[0];
02148 const uint8_t *hpel[11];
02149 assert(dx<16 && dy<16);
02150 r= brane[dx + 16*dy]&15;
02151 l= brane[dx + 16*dy]>>4;
02152
02153 b= needs[l] | needs[r];
02154 if(p && !p->diag_mc)
02155 b= 15;
02156
02157 if(b&5){
02158 for(y=0; y < b_h+HTAPS_MAX-1; y++){
02159 for(x=0; x < b_w; x++){
02160 int a_1=src[x + HTAPS_MAX/2-4];
02161 int a0= src[x + HTAPS_MAX/2-3];
02162 int a1= src[x + HTAPS_MAX/2-2];
02163 int a2= src[x + HTAPS_MAX/2-1];
02164 int a3= src[x + HTAPS_MAX/2+0];
02165 int a4= src[x + HTAPS_MAX/2+1];
02166 int a5= src[x + HTAPS_MAX/2+2];
02167 int a6= src[x + HTAPS_MAX/2+3];
02168 int am=0;
02169 if(!p || p->fast_mc){
02170 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
02171 tmpI[x]= am;
02172 am= (am+16)>>5;
02173 }else{
02174 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
02175 tmpI[x]= am;
02176 am= (am+32)>>6;
02177 }
02178
02179 if(am&(~255)) am= ~(am>>31);
02180 tmp2[x]= am;
02181 }
02182 tmpI+= 64;
02183 tmp2+= stride;
02184 src += stride;
02185 }
02186 src -= stride*y;
02187 }
02188 src += HTAPS_MAX/2 - 1;
02189 tmp2= tmp2t[1];
02190
02191 if(b&2){
02192 for(y=0; y < b_h; y++){
02193 for(x=0; x < b_w+1; x++){
02194 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
02195 int a0= src[x + (HTAPS_MAX/2-3)*stride];
02196 int a1= src[x + (HTAPS_MAX/2-2)*stride];
02197 int a2= src[x + (HTAPS_MAX/2-1)*stride];
02198 int a3= src[x + (HTAPS_MAX/2+0)*stride];
02199 int a4= src[x + (HTAPS_MAX/2+1)*stride];
02200 int a5= src[x + (HTAPS_MAX/2+2)*stride];
02201 int a6= src[x + (HTAPS_MAX/2+3)*stride];
02202 int am=0;
02203 if(!p || p->fast_mc)
02204 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
02205 else
02206 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
02207
02208 if(am&(~255)) am= ~(am>>31);
02209 tmp2[x]= am;
02210 }
02211 src += stride;
02212 tmp2+= stride;
02213 }
02214 src -= stride*y;
02215 }
02216 src += stride*(HTAPS_MAX/2 - 1);
02217 tmp2= tmp2t[2];
02218 tmpI= tmpIt;
02219 if(b&4){
02220 for(y=0; y < b_h; y++){
02221 for(x=0; x < b_w; x++){
02222 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
02223 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
02224 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
02225 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
02226 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
02227 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
02228 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
02229 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
02230 int am=0;
02231 if(!p || p->fast_mc)
02232 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
02233 else
02234 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
02235 if(am&(~255)) am= ~(am>>31);
02236 tmp2[x]= am;
02237 }
02238 tmpI+= 64;
02239 tmp2+= stride;
02240 }
02241 }
02242
02243 hpel[ 0]= src;
02244 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
02245 hpel[ 2]= src + 1;
02246
02247 hpel[ 4]= tmp2t[1];
02248 hpel[ 5]= tmp2t[2];
02249 hpel[ 6]= tmp2t[1] + 1;
02250
02251 hpel[ 8]= src + stride;
02252 hpel[ 9]= hpel[1] + stride;
02253 hpel[10]= hpel[8] + 1;
02254
02255 if(b==15){
02256 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
02257 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
02258 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
02259 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
02260 dx&=7;
02261 dy&=7;
02262 for(y=0; y < b_h; y++){
02263 for(x=0; x < b_w; x++){
02264 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
02265 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
02266 }
02267 src1+=stride;
02268 src2+=stride;
02269 src3+=stride;
02270 src4+=stride;
02271 dst +=stride;
02272 }
02273 }else{
02274 const uint8_t *src1= hpel[l];
02275 const uint8_t *src2= hpel[r];
02276 int a= weight[((dx&7) + (8*(dy&7)))];
02277 int b= 8-a;
02278 for(y=0; y < b_h; y++){
02279 for(x=0; x < b_w; x++){
02280 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
02281 }
02282 src1+=stride;
02283 src2+=stride;
02284 dst +=stride;
02285 }
02286 }
02287 }
02288
02289 #define mca(dx,dy,b_w)\
02290 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
02291 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
02292 assert(h==b_w);\
02293 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
02294 }
02295
02296 mca( 0, 0,16)
02297 mca( 8, 0,16)
02298 mca( 0, 8,16)
02299 mca( 8, 8,16)
02300 mca( 0, 0,8)
02301 mca( 8, 0,8)
02302 mca( 0, 8,8)
02303 mca( 8, 8,8)
02304
02305 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
02306 if(block->type & BLOCK_INTRA){
02307 int x, y;
02308 const int color = block->color[plane_index];
02309 const int color4= color*0x01010101;
02310 if(b_w==32){
02311 for(y=0; y < b_h; y++){
02312 *(uint32_t*)&dst[0 + y*stride]= color4;
02313 *(uint32_t*)&dst[4 + y*stride]= color4;
02314 *(uint32_t*)&dst[8 + y*stride]= color4;
02315 *(uint32_t*)&dst[12+ y*stride]= color4;
02316 *(uint32_t*)&dst[16+ y*stride]= color4;
02317 *(uint32_t*)&dst[20+ y*stride]= color4;
02318 *(uint32_t*)&dst[24+ y*stride]= color4;
02319 *(uint32_t*)&dst[28+ y*stride]= color4;
02320 }
02321 }else if(b_w==16){
02322 for(y=0; y < b_h; y++){
02323 *(uint32_t*)&dst[0 + y*stride]= color4;
02324 *(uint32_t*)&dst[4 + y*stride]= color4;
02325 *(uint32_t*)&dst[8 + y*stride]= color4;
02326 *(uint32_t*)&dst[12+ y*stride]= color4;
02327 }
02328 }else if(b_w==8){
02329 for(y=0; y < b_h; y++){
02330 *(uint32_t*)&dst[0 + y*stride]= color4;
02331 *(uint32_t*)&dst[4 + y*stride]= color4;
02332 }
02333 }else if(b_w==4){
02334 for(y=0; y < b_h; y++){
02335 *(uint32_t*)&dst[0 + y*stride]= color4;
02336 }
02337 }else{
02338 for(y=0; y < b_h; y++){
02339 for(x=0; x < b_w; x++){
02340 dst[x + y*stride]= color;
02341 }
02342 }
02343 }
02344 }else{
02345 uint8_t *src= s->last_picture[block->ref].data[plane_index];
02346 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
02347 int mx= block->mx*scale;
02348 int my= block->my*scale;
02349 const int dx= mx&15;
02350 const int dy= my&15;
02351 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
02352 sx += (mx>>4) - (HTAPS_MAX/2-1);
02353 sy += (my>>4) - (HTAPS_MAX/2-1);
02354 src += sx + sy*stride;
02355 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
02356 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
02357 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
02358 src= tmp + MB_SIZE;
02359 }
02360
02361
02362 assert(b_w>1 && b_h>1);
02363 assert((tab_index>=0 && tab_index<4) || b_w==32);
02364 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
02365 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
02366 else if(b_w==32){
02367 int y;
02368 for(y=0; y<b_h; y+=16){
02369 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
02370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
02371 }
02372 }else if(b_w==b_h)
02373 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
02374 else if(b_w==2*b_h){
02375 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
02376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
02377 }else{
02378 assert(2*b_w==b_h);
02379 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
02380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
02381 }
02382 }
02383 }
02384
02385 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
02386 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
02387 int y, x;
02388 IDWTELEM * dst;
02389 for(y=0; y<b_h; y++){
02390
02391 const uint8_t *obmc1= obmc + y*obmc_stride;
02392 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02393 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02394 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02395 dst = slice_buffer_get_line(sb, src_y + y);
02396 for(x=0; x<b_w; x++){
02397 int v= obmc1[x] * block[3][x + y*src_stride]
02398 +obmc2[x] * block[2][x + y*src_stride]
02399 +obmc3[x] * block[1][x + y*src_stride]
02400 +obmc4[x] * block[0][x + y*src_stride];
02401
02402 v <<= 8 - LOG2_OBMC_MAX;
02403 if(FRAC_BITS != 8){
02404 v >>= 8 - FRAC_BITS;
02405 }
02406 if(add){
02407 v += dst[x + src_x];
02408 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02409 if(v&(~255)) v= ~(v>>31);
02410 dst8[x + y*src_stride] = v;
02411 }else{
02412 dst[x + src_x] -= v;
02413 }
02414 }
02415 }
02416 }
02417
02418
02419 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
02420 const int b_width = s->b_width << s->block_max_depth;
02421 const int b_height= s->b_height << s->block_max_depth;
02422 const int b_stride= b_width;
02423 BlockNode *lt= &s->block[b_x + b_y*b_stride];
02424 BlockNode *rt= lt+1;
02425 BlockNode *lb= lt+b_stride;
02426 BlockNode *rb= lb+1;
02427 uint8_t *block[4];
02428 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
02429 uint8_t *tmp = s->scratchbuf;
02430 uint8_t *ptmp;
02431 int x,y;
02432
02433 if(b_x<0){
02434 lt= rt;
02435 lb= rb;
02436 }else if(b_x + 1 >= b_width){
02437 rt= lt;
02438 rb= lb;
02439 }
02440 if(b_y<0){
02441 lt= lb;
02442 rt= rb;
02443 }else if(b_y + 1 >= b_height){
02444 lb= lt;
02445 rb= rt;
02446 }
02447
02448 if(src_x<0){
02449 obmc -= src_x;
02450 b_w += src_x;
02451 if(!sliced && !offset_dst)
02452 dst -= src_x;
02453 src_x=0;
02454 }else if(src_x + b_w > w){
02455 b_w = w - src_x;
02456 }
02457 if(src_y<0){
02458 obmc -= src_y*obmc_stride;
02459 b_h += src_y;
02460 if(!sliced && !offset_dst)
02461 dst -= src_y*dst_stride;
02462 src_y=0;
02463 }else if(src_y + b_h> h){
02464 b_h = h - src_y;
02465 }
02466
02467 if(b_w<=0 || b_h<=0) return;
02468
02469 assert(src_stride > 2*MB_SIZE + 5);
02470
02471 if(!sliced && offset_dst)
02472 dst += src_x + src_y*dst_stride;
02473 dst8+= src_x + src_y*src_stride;
02474
02475
02476 ptmp= tmp + 3*tmp_step;
02477 block[0]= ptmp;
02478 ptmp+=tmp_step;
02479 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
02480
02481 if(same_block(lt, rt)){
02482 block[1]= block[0];
02483 }else{
02484 block[1]= ptmp;
02485 ptmp+=tmp_step;
02486 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
02487 }
02488
02489 if(same_block(lt, lb)){
02490 block[2]= block[0];
02491 }else if(same_block(rt, lb)){
02492 block[2]= block[1];
02493 }else{
02494 block[2]= ptmp;
02495 ptmp+=tmp_step;
02496 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
02497 }
02498
02499 if(same_block(lt, rb) ){
02500 block[3]= block[0];
02501 }else if(same_block(rt, rb)){
02502 block[3]= block[1];
02503 }else if(same_block(lb, rb)){
02504 block[3]= block[2];
02505 }else{
02506 block[3]= ptmp;
02507 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
02508 }
02509 #if 0
02510 for(y=0; y<b_h; y++){
02511 for(x=0; x<b_w; x++){
02512 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
02513 if(add) dst[x + y*dst_stride] += v;
02514 else dst[x + y*dst_stride] -= v;
02515 }
02516 }
02517 for(y=0; y<b_h; y++){
02518 uint8_t *obmc2= obmc + (obmc_stride>>1);
02519 for(x=0; x<b_w; x++){
02520 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
02521 if(add) dst[x + y*dst_stride] += v;
02522 else dst[x + y*dst_stride] -= v;
02523 }
02524 }
02525 for(y=0; y<b_h; y++){
02526 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02527 for(x=0; x<b_w; x++){
02528 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
02529 if(add) dst[x + y*dst_stride] += v;
02530 else dst[x + y*dst_stride] -= v;
02531 }
02532 }
02533 for(y=0; y<b_h; y++){
02534 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02535 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02536 for(x=0; x<b_w; x++){
02537 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
02538 if(add) dst[x + y*dst_stride] += v;
02539 else dst[x + y*dst_stride] -= v;
02540 }
02541 }
02542 #else
02543 if(sliced){
02544 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
02545 }else{
02546 for(y=0; y<b_h; y++){
02547
02548 const uint8_t *obmc1= obmc + y*obmc_stride;
02549 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02550 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02551 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02552 for(x=0; x<b_w; x++){
02553 int v= obmc1[x] * block[3][x + y*src_stride]
02554 +obmc2[x] * block[2][x + y*src_stride]
02555 +obmc3[x] * block[1][x + y*src_stride]
02556 +obmc4[x] * block[0][x + y*src_stride];
02557
02558 v <<= 8 - LOG2_OBMC_MAX;
02559 if(FRAC_BITS != 8){
02560 v >>= 8 - FRAC_BITS;
02561 }
02562 if(add){
02563 v += dst[x + y*dst_stride];
02564 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02565 if(v&(~255)) v= ~(v>>31);
02566 dst8[x + y*src_stride] = v;
02567 }else{
02568 dst[x + y*dst_stride] -= v;
02569 }
02570 }
02571 }
02572 }
02573 #endif
02574 }
02575
02576 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
02577 Plane *p= &s->plane[plane_index];
02578 const int mb_w= s->b_width << s->block_max_depth;
02579 const int mb_h= s->b_height << s->block_max_depth;
02580 int x, y, mb_x;
02581 int block_size = MB_SIZE >> s->block_max_depth;
02582 int block_w = plane_index ? block_size/2 : block_size;
02583 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02584 int obmc_stride= plane_index ? block_size : 2*block_size;
02585 int ref_stride= s->current_picture.linesize[plane_index];
02586 uint8_t *dst8= s->current_picture.data[plane_index];
02587 int w= p->width;
02588 int h= p->height;
02589
02590 if(s->keyframe || (s->avctx->debug&512)){
02591 if(mb_y==mb_h)
02592 return;
02593
02594 if(add){
02595 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02596
02597 IDWTELEM * line = sb->line[y];
02598 for(x=0; x<w; x++){
02599
02600 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02601 v >>= FRAC_BITS;
02602 if(v&(~255)) v= ~(v>>31);
02603 dst8[x + y*ref_stride]= v;
02604 }
02605 }
02606 }else{
02607 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02608
02609 IDWTELEM * line = sb->line[y];
02610 for(x=0; x<w; x++){
02611 line[x] -= 128 << FRAC_BITS;
02612
02613 }
02614 }
02615 }
02616
02617 return;
02618 }
02619
02620 for(mb_x=0; mb_x<=mb_w; mb_x++){
02621 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
02622 block_w*mb_x - block_w/2,
02623 block_w*mb_y - block_w/2,
02624 block_w, block_w,
02625 w, h,
02626 w, ref_stride, obmc_stride,
02627 mb_x - 1, mb_y - 1,
02628 add, 0, plane_index);
02629 }
02630 }
02631
02632 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
02633 Plane *p= &s->plane[plane_index];
02634 const int mb_w= s->b_width << s->block_max_depth;
02635 const int mb_h= s->b_height << s->block_max_depth;
02636 int x, y, mb_x;
02637 int block_size = MB_SIZE >> s->block_max_depth;
02638 int block_w = plane_index ? block_size/2 : block_size;
02639 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02640 const int obmc_stride= plane_index ? block_size : 2*block_size;
02641 int ref_stride= s->current_picture.linesize[plane_index];
02642 uint8_t *dst8= s->current_picture.data[plane_index];
02643 int w= p->width;
02644 int h= p->height;
02645
02646 if(s->keyframe || (s->avctx->debug&512)){
02647 if(mb_y==mb_h)
02648 return;
02649
02650 if(add){
02651 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02652 for(x=0; x<w; x++){
02653 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02654 v >>= FRAC_BITS;
02655 if(v&(~255)) v= ~(v>>31);
02656 dst8[x + y*ref_stride]= v;
02657 }
02658 }
02659 }else{
02660 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02661 for(x=0; x<w; x++){
02662 buf[x + y*w]-= 128<<FRAC_BITS;
02663 }
02664 }
02665 }
02666
02667 return;
02668 }
02669
02670 for(mb_x=0; mb_x<=mb_w; mb_x++){
02671 add_yblock(s, 0, NULL, buf, dst8, obmc,
02672 block_w*mb_x - block_w/2,
02673 block_w*mb_y - block_w/2,
02674 block_w, block_w,
02675 w, h,
02676 w, ref_stride, obmc_stride,
02677 mb_x - 1, mb_y - 1,
02678 add, 1, plane_index);
02679 }
02680 }
02681
02682 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
02683 const int mb_h= s->b_height << s->block_max_depth;
02684 int mb_y;
02685 for(mb_y=0; mb_y<=mb_h; mb_y++)
02686 predict_slice(s, buf, plane_index, add, mb_y);
02687 }
02688
02689 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02690 int i, x2, y2;
02691 Plane *p= &s->plane[plane_index];
02692 const int block_size = MB_SIZE >> s->block_max_depth;
02693 const int block_w = plane_index ? block_size/2 : block_size;
02694 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02695 const int obmc_stride= plane_index ? block_size : 2*block_size;
02696 const int ref_stride= s->current_picture.linesize[plane_index];
02697 uint8_t *src= s-> input_picture.data[plane_index];
02698 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02699 const int b_stride = s->b_width << s->block_max_depth;
02700 const int w= p->width;
02701 const int h= p->height;
02702 int index= mb_x + mb_y*b_stride;
02703 BlockNode *b= &s->block[index];
02704 BlockNode backup= *b;
02705 int ab=0;
02706 int aa=0;
02707
02708 b->type|= BLOCK_INTRA;
02709 b->color[plane_index]= 0;
02710 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02711
02712 for(i=0; i<4; i++){
02713 int mb_x2= mb_x + (i &1) - 1;
02714 int mb_y2= mb_y + (i>>1) - 1;
02715 int x= block_w*mb_x2 + block_w/2;
02716 int y= block_w*mb_y2 + block_w/2;
02717
02718 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02719 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02720
02721 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02722 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02723 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02724 int obmc_v= obmc[index];
02725 int d;
02726 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02727 if(x<0) obmc_v += obmc[index + block_w];
02728 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02729 if(x+block_w>w) obmc_v += obmc[index - block_w];
02730
02731
02732 d = -dst[index] + (1<<(FRAC_BITS-1));
02733 dst[index] = d;
02734 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02735 aa += obmc_v * obmc_v;
02736 }
02737 }
02738 }
02739 *b= backup;
02740
02741 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255);
02742 }
02743
02744 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02745 const int b_stride = s->b_width << s->block_max_depth;
02746 const int b_height = s->b_height<< s->block_max_depth;
02747 int index= x + y*b_stride;
02748 const BlockNode *b = &s->block[index];
02749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02750 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
02751 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
02752 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02753 int dmx, dmy;
02754
02755
02756
02757 if(x<0 || x>=b_stride || y>=b_height)
02758 return 0;
02759
02760
02761
02762
02763
02764
02765
02766
02767
02768 if(b->type & BLOCK_INTRA){
02769 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02770 + av_log2(2*FFABS(left->color[1] - b->color[1]))
02771 + av_log2(2*FFABS(left->color[2] - b->color[2])));
02772 }else{
02773 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02774 dmx-= b->mx;
02775 dmy-= b->my;
02776 return 2*(1 + av_log2(2*FFABS(dmx))
02777 + av_log2(2*FFABS(dmy))
02778 + av_log2(2*b->ref));
02779 }
02780 }
02781
02782 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02783 Plane *p= &s->plane[plane_index];
02784 const int block_size = MB_SIZE >> s->block_max_depth;
02785 const int block_w = plane_index ? block_size/2 : block_size;
02786 const int obmc_stride= plane_index ? block_size : 2*block_size;
02787 const int ref_stride= s->current_picture.linesize[plane_index];
02788 uint8_t *dst= s->current_picture.data[plane_index];
02789 uint8_t *src= s-> input_picture.data[plane_index];
02790 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02791 uint8_t *cur = s->scratchbuf;
02792 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02793 const int b_stride = s->b_width << s->block_max_depth;
02794 const int b_height = s->b_height<< s->block_max_depth;
02795 const int w= p->width;
02796 const int h= p->height;
02797 int distortion;
02798 int rate= 0;
02799 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02800 int sx= block_w*mb_x - block_w/2;
02801 int sy= block_w*mb_y - block_w/2;
02802 int x0= FFMAX(0,-sx);
02803 int y0= FFMAX(0,-sy);
02804 int x1= FFMIN(block_w*2, w-sx);
02805 int y1= FFMIN(block_w*2, h-sy);
02806 int i,x,y;
02807
02808 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02809
02810 for(y=y0; y<y1; y++){
02811 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02812 const IDWTELEM *pred1 = pred + y*obmc_stride;
02813 uint8_t *cur1 = cur + y*ref_stride;
02814 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02815 for(x=x0; x<x1; x++){
02816 #if FRAC_BITS >= LOG2_OBMC_MAX
02817 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02818 #else
02819 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02820 #endif
02821 v = (v + pred1[x]) >> FRAC_BITS;
02822 if(v&(~255)) v= ~(v>>31);
02823 dst1[x] = v;
02824 }
02825 }
02826
02827
02828 if(LOG2_OBMC_MAX == 8
02829 && (mb_x == 0 || mb_x == b_stride-1)
02830 && (mb_y == 0 || mb_y == b_height-1)){
02831 if(mb_x == 0)
02832 x1 = block_w;
02833 else
02834 x0 = block_w;
02835 if(mb_y == 0)
02836 y1 = block_w;
02837 else
02838 y0 = block_w;
02839 for(y=y0; y<y1; y++)
02840 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02841 }
02842
02843 if(block_w==16){
02844
02845
02846
02847
02848
02849
02850 if(s->avctx->me_cmp == FF_CMP_W97)
02851 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02852 else if(s->avctx->me_cmp == FF_CMP_W53)
02853 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02854 else{
02855 distortion = 0;
02856 for(i=0; i<4; i++){
02857 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02858 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02859 }
02860 }
02861 }else{
02862 assert(block_w==8);
02863 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02864 }
02865
02866 if(plane_index==0){
02867 for(i=0; i<4; i++){
02868
02869
02870
02871
02872 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02873 }
02874 if(mb_x == b_stride-2)
02875 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02876 }
02877 return distortion + rate*penalty_factor;
02878 }
02879
02880 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02881 int i, y2;
02882 Plane *p= &s->plane[plane_index];
02883 const int block_size = MB_SIZE >> s->block_max_depth;
02884 const int block_w = plane_index ? block_size/2 : block_size;
02885 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02886 const int obmc_stride= plane_index ? block_size : 2*block_size;
02887 const int ref_stride= s->current_picture.linesize[plane_index];
02888 uint8_t *dst= s->current_picture.data[plane_index];
02889 uint8_t *src= s-> input_picture.data[plane_index];
02890
02891
02892 static IDWTELEM zero_dst[4096];
02893 const int b_stride = s->b_width << s->block_max_depth;
02894 const int w= p->width;
02895 const int h= p->height;
02896 int distortion= 0;
02897 int rate= 0;
02898 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02899
02900 for(i=0; i<9; i++){
02901 int mb_x2= mb_x + (i%3) - 1;
02902 int mb_y2= mb_y + (i/3) - 1;
02903 int x= block_w*mb_x2 + block_w/2;
02904 int y= block_w*mb_y2 + block_w/2;
02905
02906 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02907 x, y, block_w, block_w, w, h, 0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02908
02909
02910 for(y2= y; y2<0; y2++)
02911 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02912 for(y2= h; y2<y+block_w; y2++)
02913 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02914 if(x<0){
02915 for(y2= y; y2<y+block_w; y2++)
02916 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02917 }
02918 if(x+block_w > w){
02919 for(y2= y; y2<y+block_w; y2++)
02920 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02921 }
02922
02923 assert(block_w== 8 || block_w==16);
02924 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02925 }
02926
02927 if(plane_index==0){
02928 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02929 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02930
02931
02932
02933
02934
02935
02936 if(merged)
02937 rate = get_block_bits(s, mb_x, mb_y, 2);
02938 for(i=merged?4:0; i<9; i++){
02939 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
02940 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
02941 }
02942 }
02943 return distortion + rate*penalty_factor;
02944 }
02945
02946 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
02947 const int b_stride= s->b_width << s->block_max_depth;
02948 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
02949 BlockNode backup= *block;
02950 int rd, index, value;
02951
02952 assert(mb_x>=0 && mb_y>=0);
02953 assert(mb_x<b_stride);
02954
02955 if(intra){
02956 block->color[0] = p[0];
02957 block->color[1] = p[1];
02958 block->color[2] = p[2];
02959 block->type |= BLOCK_INTRA;
02960 }else{
02961 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
02962 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
02963 if(s->me_cache[index] == value)
02964 return 0;
02965 s->me_cache[index]= value;
02966
02967 block->mx= p[0];
02968 block->my= p[1];
02969 block->type &= ~BLOCK_INTRA;
02970 }
02971
02972 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
02973
02974
02975 if(rd < *best_rd){
02976 *best_rd= rd;
02977 return 1;
02978 }else{
02979 *block= backup;
02980 return 0;
02981 }
02982 }
02983
02984
02985
02986 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
02987 int p[2] = {p0, p1};
02988 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
02989 }
02990
02991 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
02992 const int b_stride= s->b_width << s->block_max_depth;
02993 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
02994 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
02995 int rd, index, value;
02996
02997 assert(mb_x>=0 && mb_y>=0);
02998 assert(mb_x<b_stride);
02999 assert(((mb_x|mb_y)&1) == 0);
03000
03001 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03002 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03003 if(s->me_cache[index] == value)
03004 return 0;
03005 s->me_cache[index]= value;
03006
03007 block->mx= p0;
03008 block->my= p1;
03009 block->ref= ref;
03010 block->type &= ~BLOCK_INTRA;
03011 block[1]= block[b_stride]= block[b_stride+1]= *block;
03012
03013 rd= get_4block_rd(s, mb_x, mb_y, 0);
03014
03015
03016 if(rd < *best_rd){
03017 *best_rd= rd;
03018 return 1;
03019 }else{
03020 block[0]= backup[0];
03021 block[1]= backup[1];
03022 block[b_stride]= backup[2];
03023 block[b_stride+1]= backup[3];
03024 return 0;
03025 }
03026 }
03027
03028 static void iterative_me(SnowContext *s){
03029 int pass, mb_x, mb_y;
03030 const int b_width = s->b_width << s->block_max_depth;
03031 const int b_height= s->b_height << s->block_max_depth;
03032 const int b_stride= b_width;
03033 int color[3];
03034
03035 {
03036 RangeCoder r = s->c;
03037 uint8_t state[sizeof(s->block_state)];
03038 memcpy(state, s->block_state, sizeof(s->block_state));
03039 for(mb_y= 0; mb_y<s->b_height; mb_y++)
03040 for(mb_x= 0; mb_x<s->b_width; mb_x++)
03041 encode_q_branch(s, 0, mb_x, mb_y);
03042 s->c = r;
03043 memcpy(s->block_state, state, sizeof(s->block_state));
03044 }
03045
03046 for(pass=0; pass<25; pass++){
03047 int change= 0;
03048
03049 for(mb_y= 0; mb_y<b_height; mb_y++){
03050 for(mb_x= 0; mb_x<b_width; mb_x++){
03051 int dia_change, i, j, ref;
03052 int best_rd= INT_MAX, ref_rd;
03053 BlockNode backup, ref_b;
03054 const int index= mb_x + mb_y * b_stride;
03055 BlockNode *block= &s->block[index];
03056 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
03057 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
03058 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
03059 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
03060 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
03061 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
03062 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03063 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03064 const int b_w= (MB_SIZE >> s->block_max_depth);
03065 uint8_t obmc_edged[b_w*2][b_w*2];
03066
03067 if(pass && (block->type & BLOCK_OPT))
03068 continue;
03069 block->type |= BLOCK_OPT;
03070
03071 backup= *block;
03072
03073 if(!s->me_cache_generation)
03074 memset(s->me_cache, 0, sizeof(s->me_cache));
03075 s->me_cache_generation += 1<<22;
03076
03077
03078 {
03079 int x, y;
03080 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03081 if(mb_x==0)
03082 for(y=0; y<b_w*2; y++)
03083 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03084 if(mb_x==b_stride-1)
03085 for(y=0; y<b_w*2; y++)
03086 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03087 if(mb_y==0){
03088 for(x=0; x<b_w*2; x++)
03089 obmc_edged[0][x] += obmc_edged[b_w-1][x];
03090 for(y=1; y<b_w; y++)
03091 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03092 }
03093 if(mb_y==b_height-1){
03094 for(x=0; x<b_w*2; x++)
03095 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03096 for(y=b_w; y<b_w*2-1; y++)
03097 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03098 }
03099 }
03100
03101
03102 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
03103 uint8_t *src= s-> input_picture.data[0];
03104 uint8_t *dst= s->current_picture.data[0];
03105 const int stride= s->current_picture.linesize[0];
03106 const int block_w= MB_SIZE >> s->block_max_depth;
03107 const int sx= block_w*mb_x - block_w/2;
03108 const int sy= block_w*mb_y - block_w/2;
03109 const int w= s->plane[0].width;
03110 const int h= s->plane[0].height;
03111 int y;
03112
03113 for(y=sy; y<0; y++)
03114 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03115 for(y=h; y<sy+block_w*2; y++)
03116 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03117 if(sx<0){
03118 for(y=sy; y<sy+block_w*2; y++)
03119 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03120 }
03121 if(sx+block_w*2 > w){
03122 for(y=sy; y<sy+block_w*2; y++)
03123 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03124 }
03125 }
03126
03127
03128 for(i=0; i<3; i++)
03129 color[i]= get_dc(s, mb_x, mb_y, i);
03130
03131
03132 if(pass > 0 && (block->type&BLOCK_INTRA)){
03133 int color0[3]= {block->color[0], block->color[1], block->color[2]};
03134 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03135 }else
03136 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03137
03138 ref_b= *block;
03139 ref_rd= best_rd;
03140 for(ref=0; ref < s->ref_frames; ref++){
03141 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03142 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2)
03143 continue;
03144 block->ref= ref;
03145 best_rd= INT_MAX;
03146
03147 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03148 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03149 if(tb)
03150 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03151 if(lb)
03152 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03153 if(rb)
03154 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03155 if(bb)
03156 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03157
03158
03159
03160 do{
03161 dia_change=0;
03162 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03163 for(j=0; j<i; j++){
03164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03168 }
03169 }
03170 }while(dia_change);
03171
03172 do{
03173 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03174 dia_change=0;
03175 for(i=0; i<8; i++)
03176 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03177 }while(dia_change);
03178
03179
03180 mvr[0][0]= block->mx;
03181 mvr[0][1]= block->my;
03182 if(ref_rd > best_rd){
03183 ref_rd= best_rd;
03184 ref_b= *block;
03185 }
03186 }
03187 best_rd= ref_rd;
03188 *block= ref_b;
03189 #if 1
03190 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03191
03192 #endif
03193 if(!same_block(block, &backup)){
03194 if(tb ) tb ->type &= ~BLOCK_OPT;
03195 if(lb ) lb ->type &= ~BLOCK_OPT;
03196 if(rb ) rb ->type &= ~BLOCK_OPT;
03197 if(bb ) bb ->type &= ~BLOCK_OPT;
03198 if(tlb) tlb->type &= ~BLOCK_OPT;
03199 if(trb) trb->type &= ~BLOCK_OPT;
03200 if(blb) blb->type &= ~BLOCK_OPT;
03201 if(brb) brb->type &= ~BLOCK_OPT;
03202 change ++;
03203 }
03204 }
03205 }
03206 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03207 if(!change)
03208 break;
03209 }
03210
03211 if(s->block_max_depth == 1){
03212 int change= 0;
03213 for(mb_y= 0; mb_y<b_height; mb_y+=2){
03214 for(mb_x= 0; mb_x<b_width; mb_x+=2){
03215 int i;
03216 int best_rd, init_rd;
03217 const int index= mb_x + mb_y * b_stride;
03218 BlockNode *b[4];
03219
03220 b[0]= &s->block[index];
03221 b[1]= b[0]+1;
03222 b[2]= b[0]+b_stride;
03223 b[3]= b[2]+1;
03224 if(same_block(b[0], b[1]) &&
03225 same_block(b[0], b[2]) &&
03226 same_block(b[0], b[3]))
03227 continue;
03228
03229 if(!s->me_cache_generation)
03230 memset(s->me_cache, 0, sizeof(s->me_cache));
03231 s->me_cache_generation += 1<<22;
03232
03233 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03234
03235
03236 check_4block_inter(s, mb_x, mb_y,
03237 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03238 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03239
03240 for(i=0; i<4; i++)
03241 if(!(b[i]->type&BLOCK_INTRA))
03242 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03243
03244 if(init_rd != best_rd)
03245 change++;
03246 }
03247 }
03248 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03249 }
03250 }
03251
03252 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03253 const int w= b->width;
03254 const int h= b->height;
03255 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03256 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03257 int x,y, thres1, thres2;
03258
03259 if(s->qlog == LOSSLESS_QLOG){
03260 for(y=0; y<h; y++)
03261 for(x=0; x<w; x++)
03262 dst[x + y*stride]= src[x + y*stride];
03263 return;
03264 }
03265
03266 bias= bias ? 0 : (3*qmul)>>3;
03267 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03268 thres2= 2*thres1;
03269
03270 if(!bias){
03271 for(y=0; y<h; y++){
03272 for(x=0; x<w; x++){
03273 int i= src[x + y*stride];
03274
03275 if((unsigned)(i+thres1) > thres2){
03276 if(i>=0){
03277 i<<= QEXPSHIFT;
03278 i/= qmul;
03279 dst[x + y*stride]= i;
03280 }else{
03281 i= -i;
03282 i<<= QEXPSHIFT;
03283 i/= qmul;
03284 dst[x + y*stride]= -i;
03285 }
03286 }else
03287 dst[x + y*stride]= 0;
03288 }
03289 }
03290 }else{
03291 for(y=0; y<h; y++){
03292 for(x=0; x<w; x++){
03293 int i= src[x + y*stride];
03294
03295 if((unsigned)(i+thres1) > thres2){
03296 if(i>=0){
03297 i<<= QEXPSHIFT;
03298 i= (i + bias) / qmul;
03299 dst[x + y*stride]= i;
03300 }else{
03301 i= -i;
03302 i<<= QEXPSHIFT;
03303 i= (i + bias) / qmul;
03304 dst[x + y*stride]= -i;
03305 }
03306 }else
03307 dst[x + y*stride]= 0;
03308 }
03309 }
03310 }
03311 }
03312
03313 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
03314 const int w= b->width;
03315 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03316 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03317 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03318 int x,y;
03319
03320 if(s->qlog == LOSSLESS_QLOG) return;
03321
03322 for(y=start_y; y<end_y; y++){
03323
03324 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03325 for(x=0; x<w; x++){
03326 int i= line[x];
03327 if(i<0){
03328 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03329 }else if(i>0){
03330 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
03331 }
03332 }
03333 }
03334 }
03335
03336 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03337 const int w= b->width;
03338 const int h= b->height;
03339 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03340 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03341 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03342 int x,y;
03343
03344 if(s->qlog == LOSSLESS_QLOG) return;
03345
03346 for(y=0; y<h; y++){
03347 for(x=0; x<w; x++){
03348 int i= src[x + y*stride];
03349 if(i<0){
03350 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03351 }else if(i>0){
03352 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
03353 }
03354 }
03355 }
03356 }
03357
03358 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03359 const int w= b->width;
03360 const int h= b->height;
03361 int x,y;
03362
03363 for(y=h-1; y>=0; y--){
03364 for(x=w-1; x>=0; x--){
03365 int i= x + y*stride;
03366
03367 if(x){
03368 if(use_median){
03369 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03370 else src[i] -= src[i - 1];
03371 }else{
03372 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03373 else src[i] -= src[i - 1];
03374 }
03375 }else{
03376 if(y) src[i] -= src[i - stride];
03377 }
03378 }
03379 }
03380 }
03381
03382 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
03383 const int w= b->width;
03384 int x,y;
03385
03386 IDWTELEM * line=0;
03387 IDWTELEM * prev;
03388
03389 if (start_y != 0)
03390 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03391
03392 for(y=start_y; y<end_y; y++){
03393 prev = line;
03394
03395 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03396 for(x=0; x<w; x++){
03397 if(x){
03398 if(use_median){
03399 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
03400 else line[x] += line[x - 1];
03401 }else{
03402 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
03403 else line[x] += line[x - 1];
03404 }
03405 }else{
03406 if(y) line[x] += prev[x];
03407 }
03408 }
03409 }
03410 }
03411
03412 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03413 const int w= b->width;
03414 const int h= b->height;
03415 int x,y;
03416
03417 for(y=0; y<h; y++){
03418 for(x=0; x<w; x++){
03419 int i= x + y*stride;
03420
03421 if(x){
03422 if(use_median){
03423 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03424 else src[i] += src[i - 1];
03425 }else{
03426 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03427 else src[i] += src[i - 1];
03428 }
03429 }else{
03430 if(y) src[i] += src[i - stride];
03431 }
03432 }
03433 }
03434 }
03435
03436 static void encode_qlogs(SnowContext *s){
03437 int plane_index, level, orientation;
03438
03439 for(plane_index=0; plane_index<2; plane_index++){
03440 for(level=0; level<s->spatial_decomposition_count; level++){
03441 for(orientation=level ? 1:0; orientation<4; orientation++){
03442 if(orientation==2) continue;
03443 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03444 }
03445 }
03446 }
03447 }
03448
03449 static void encode_header(SnowContext *s){
03450 int plane_index, i;
03451 uint8_t kstate[32];
03452
03453 memset(kstate, MID_STATE, sizeof(kstate));
03454
03455 put_rac(&s->c, kstate, s->keyframe);
03456 if(s->keyframe || s->always_reset){
03457 reset_contexts(s);
03458 s->last_spatial_decomposition_type=
03459 s->last_qlog=
03460 s->last_qbias=
03461 s->last_mv_scale=
03462 s->last_block_max_depth= 0;
03463 for(plane_index=0; plane_index<2; plane_index++){
03464 Plane *p= &s->plane[plane_index];
03465 p->last_htaps=0;
03466 p->last_diag_mc=0;
03467 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03468 }
03469 }
03470 if(s->keyframe){
03471 put_symbol(&s->c, s->header_state, s->version, 0);
03472 put_rac(&s->c, s->header_state, s->always_reset);
03473 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03474 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03475 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03476 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03477 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03478 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03479 put_rac(&s->c, s->header_state, s->spatial_scalability);
03480
03481 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03482
03483 encode_qlogs(s);
03484 }
03485
03486 if(!s->keyframe){
03487 int update_mc=0;
03488 for(plane_index=0; plane_index<2; plane_index++){
03489 Plane *p= &s->plane[plane_index];
03490 update_mc |= p->last_htaps != p->htaps;
03491 update_mc |= p->last_diag_mc != p->diag_mc;
03492 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03493 }
03494 put_rac(&s->c, s->header_state, update_mc);
03495 if(update_mc){
03496 for(plane_index=0; plane_index<2; plane_index++){
03497 Plane *p= &s->plane[plane_index];
03498 put_rac(&s->c, s->header_state, p->diag_mc);
03499 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03500 for(i= p->htaps/2; i; i--)
03501 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03502 }
03503 }
03504 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03505 put_rac(&s->c, s->header_state, 1);
03506 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03507 encode_qlogs(s);
03508 }else
03509 put_rac(&s->c, s->header_state, 0);
03510 }
03511
03512 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03513 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
03514 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
03515 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
03516 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03517
03518 }
03519
03520 static void update_last_header_values(SnowContext *s){
03521 int plane_index;
03522
03523 if(!s->keyframe){
03524 for(plane_index=0; plane_index<2; plane_index++){
03525 Plane *p= &s->plane[plane_index];
03526 p->last_diag_mc= p->diag_mc;
03527 p->last_htaps = p->htaps;
03528 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03529 }
03530 }
03531
03532 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
03533 s->last_qlog = s->qlog;
03534 s->last_qbias = s->qbias;
03535 s->last_mv_scale = s->mv_scale;
03536 s->last_block_max_depth = s->block_max_depth;
03537 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
03538 }
03539
03540 static void decode_qlogs(SnowContext *s){
03541 int plane_index, level, orientation;
03542
03543 for(plane_index=0; plane_index<3; plane_index++){
03544 for(level=0; level<s->spatial_decomposition_count; level++){
03545 for(orientation=level ? 1:0; orientation<4; orientation++){
03546 int q;
03547 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
03548 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
03549 else q= get_symbol(&s->c, s->header_state, 1);
03550 s->plane[plane_index].band[level][orientation].qlog= q;
03551 }
03552 }
03553 }
03554 }
03555
03556 static int decode_header(SnowContext *s){
03557 int plane_index;
03558 uint8_t kstate[32];
03559
03560 memset(kstate, MID_STATE, sizeof(kstate));
03561
03562 s->keyframe= get_rac(&s->c, kstate);
03563 if(s->keyframe || s->always_reset){
03564 reset_contexts(s);
03565 s->spatial_decomposition_type=
03566 s->qlog=
03567 s->qbias=
03568 s->mv_scale=
03569 s->block_max_depth= 0;
03570 }
03571 if(s->keyframe){
03572 s->version= get_symbol(&s->c, s->header_state, 0);
03573 if(s->version>0){
03574 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
03575 return -1;
03576 }
03577 s->always_reset= get_rac(&s->c, s->header_state);
03578 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
03579 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03580 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03581 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
03582 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
03583 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
03584 s->spatial_scalability= get_rac(&s->c, s->header_state);
03585
03586 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
03587
03588 decode_qlogs(s);
03589 }
03590
03591 if(!s->keyframe){
03592 if(get_rac(&s->c, s->header_state)){
03593 for(plane_index=0; plane_index<2; plane_index++){
03594 int htaps, i, sum=0;
03595 Plane *p= &s->plane[plane_index];
03596 p->diag_mc= get_rac(&s->c, s->header_state);
03597 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
03598 if((unsigned)htaps > HTAPS_MAX || htaps==0)
03599 return -1;
03600 p->htaps= htaps;
03601 for(i= htaps/2; i; i--){
03602 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
03603 sum += p->hcoeff[i];
03604 }
03605 p->hcoeff[0]= 32-sum;
03606 }
03607 s->plane[2].diag_mc= s->plane[1].diag_mc;
03608 s->plane[2].htaps = s->plane[1].htaps;
03609 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
03610 }
03611 if(get_rac(&s->c, s->header_state)){
03612 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03613 decode_qlogs(s);
03614 }
03615 }
03616
03617 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
03618 if(s->spatial_decomposition_type > 1){
03619 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
03620 return -1;
03621 }
03622
03623 s->qlog += get_symbol(&s->c, s->header_state, 1);
03624 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
03625 s->qbias += get_symbol(&s->c, s->header_state, 1);
03626 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
03627 if(s->block_max_depth > 1 || s->block_max_depth < 0){
03628 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
03629 s->block_max_depth= 0;
03630 return -1;
03631 }
03632
03633 return 0;
03634 }
03635
03636 static void init_qexp(void){
03637 int i;
03638 double v=128;
03639
03640 for(i=0; i<QROOT; i++){
03641 qexp[i]= lrintf(v);
03642 v *= pow(2, 1.0 / QROOT);
03643 }
03644 }
03645
03646 static av_cold int common_init(AVCodecContext *avctx){
03647 SnowContext *s = avctx->priv_data;
03648 int width, height;
03649 int i, j;
03650
03651 s->avctx= avctx;
03652
03653 dsputil_init(&s->dsp, avctx);
03654
03655 #define mcf(dx,dy)\
03656 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
03657 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
03658 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
03659 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
03660 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
03661 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
03662
03663 mcf( 0, 0)
03664 mcf( 4, 0)
03665 mcf( 8, 0)
03666 mcf(12, 0)
03667 mcf( 0, 4)
03668 mcf( 4, 4)
03669 mcf( 8, 4)
03670 mcf(12, 4)
03671 mcf( 0, 8)
03672 mcf( 4, 8)
03673 mcf( 8, 8)
03674 mcf(12, 8)
03675 mcf( 0,12)
03676 mcf( 4,12)
03677 mcf( 8,12)
03678 mcf(12,12)
03679
03680 #define mcfh(dx,dy)\
03681 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
03682 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
03683 mc_block_hpel ## dx ## dy ## 16;\
03684 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
03685 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
03686 mc_block_hpel ## dx ## dy ## 8;
03687
03688 mcfh(0, 0)
03689 mcfh(8, 0)
03690 mcfh(0, 8)
03691 mcfh(8, 8)
03692
03693 if(!qexp[0])
03694 init_qexp();
03695
03696
03697
03698 width= s->avctx->width;
03699 height= s->avctx->height;
03700
03701 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
03702 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
03703
03704 for(i=0; i<MAX_REF_FRAMES; i++)
03705 for(j=0; j<MAX_REF_FRAMES; j++)
03706 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
03707
03708 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
03709 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
03710
03711 return 0;
03712 }
03713
03714 static int common_init_after_header(AVCodecContext *avctx){
03715 SnowContext *s = avctx->priv_data;
03716 int plane_index, level, orientation;
03717
03718 for(plane_index=0; plane_index<3; plane_index++){
03719 int w= s->avctx->width;
03720 int h= s->avctx->height;
03721
03722 if(plane_index){
03723 w>>= s->chroma_h_shift;
03724 h>>= s->chroma_v_shift;
03725 }
03726 s->plane[plane_index].width = w;
03727 s->plane[plane_index].height= h;
03728
03729 for(level=s->spatial_decomposition_count-1; level>=0; level--){
03730 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03731 SubBand *b= &s->plane[plane_index].band[level][orientation];
03732
03733 b->buf= s->spatial_dwt_buffer;
03734 b->level= level;
03735 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
03736 b->width = (w + !(orientation&1))>>1;
03737 b->height= (h + !(orientation>1))>>1;
03738
03739 b->stride_line = 1 << (s->spatial_decomposition_count - level);
03740 b->buf_x_offset = 0;
03741 b->buf_y_offset = 0;
03742
03743 if(orientation&1){
03744 b->buf += (w+1)>>1;
03745 b->buf_x_offset = (w+1)>>1;
03746 }
03747 if(orientation>1){
03748 b->buf += b->stride>>1;
03749 b->buf_y_offset = b->stride_line >> 1;
03750 }
03751 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
03752
03753 if(level)
03754 b->parent= &s->plane[plane_index].band[level-1][orientation];
03755
03756 av_freep(&b->x_coeff);
03757 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
03758 }
03759 w= (w+1)>>1;
03760 h= (h+1)>>1;
03761 }
03762 }
03763
03764 return 0;
03765 }
03766
03767 static int qscale2qlog(int qscale){
03768 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
03769 + 61*QROOT/8;
03770 }
03771
03772 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
03773 {
03774
03775
03776
03777 uint32_t coef_sum= 0;
03778 int level, orientation, delta_qlog;
03779
03780 for(level=0; level<s->spatial_decomposition_count; level++){
03781 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03782 SubBand *b= &s->plane[0].band[level][orientation];
03783 IDWTELEM *buf= b->ibuf;
03784 const int w= b->width;
03785 const int h= b->height;
03786 const int stride= b->stride;
03787 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
03788 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03789 const int qdiv= (1<<16)/qmul;
03790 int x, y;
03791
03792 for(y=0; y<h; y++)
03793 for(x=0; x<w; x++)
03794 buf[x+y*stride]= b->buf[x+y*stride];
03795 if(orientation==0)
03796 decorrelate(s, b, buf, stride, 1, 0);
03797 for(y=0; y<h; y++)
03798 for(x=0; x<w; x++)
03799 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
03800 }
03801 }
03802
03803
03804 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
03805 assert(coef_sum < INT_MAX);
03806
03807 if(pict->pict_type == FF_I_TYPE){
03808 s->m.current_picture.mb_var_sum= coef_sum;
03809 s->m.current_picture.mc_mb_var_sum= 0;
03810 }else{
03811 s->m.current_picture.mc_mb_var_sum= coef_sum;
03812 s->m.current_picture.mb_var_sum= 0;
03813 }
03814
03815 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
03816 if (pict->quality < 0)
03817 return INT_MIN;
03818 s->lambda= pict->quality * 3/2;
03819 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
03820 s->qlog+= delta_qlog;
03821 return delta_qlog;
03822 }
03823
03824 static void calculate_visual_weight(SnowContext *s, Plane *p){
03825 int width = p->width;
03826 int height= p->height;
03827 int level, orientation, x, y;
03828
03829 for(level=0; level<s->spatial_decomposition_count; level++){
03830 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03831 SubBand *b= &p->band[level][orientation];
03832 IDWTELEM *ibuf= b->ibuf;
03833 int64_t error=0;
03834
03835 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
03836 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
03837 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
03838 for(y=0; y<height; y++){
03839 for(x=0; x<width; x++){
03840 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
03841 error += d*d;
03842 }
03843 }
03844
03845 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
03846 }
03847 }
03848 }
03849
03850 #define QUANTIZE2 0
03851
03852 #if QUANTIZE2==1
03853 #define Q2_STEP 8
03854
03855 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
03856 SubBand *b= &p->band[level][orientation];
03857 int x, y;
03858 int xo=0;
03859 int yo=0;
03860 int step= 1 << (s->spatial_decomposition_count - level);
03861
03862 if(orientation&1)
03863 xo= step>>1;
03864 if(orientation&2)
03865 yo= step>>1;
03866
03867
03868
03869 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
03870 for(y=0; y<p->height; y++){
03871 for(x=0; x<p->width; x++){
03872 int sx= (x-xo + step/2) / step / Q2_STEP;
03873 int sy= (y-yo + step/2) / step / Q2_STEP;
03874 int v= r0[x + y*p->width] - r1[x + y*p->width];
03875 assert(sx>=0 && sy>=0 && sx < score_stride);
03876 v= ((v+8)>>4)<<4;
03877 score[sx + sy*score_stride] += v*v;
03878 assert(score[sx + sy*score_stride] >= 0);
03879 }
03880 }
03881 }
03882
03883 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
03884 int level, orientation;
03885
03886 for(level=0; level<s->spatial_decomposition_count; level++){
03887 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03888 SubBand *b= &p->band[level][orientation];
03889 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
03890
03891 dequantize(s, b, dst, b->stride);
03892 }
03893 }
03894 }
03895
03896 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
03897 int level, orientation, ys, xs, x, y, pass;
03898 IDWTELEM best_dequant[height * stride];
03899 IDWTELEM idwt2_buffer[height * stride];
03900 const int score_stride= (width + 10)/Q2_STEP;
03901 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP];
03902 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP];
03903 int threshold= (s->m.lambda * s->m.lambda) >> 6;
03904
03905
03906
03907
03908 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
03909
03910 for(level=0; level<s->spatial_decomposition_count; level++){
03911 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03912 SubBand *b= &p->band[level][orientation];
03913 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03914 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
03915 assert(src == b->buf);
03916
03917 quantize(s, b, dst, src, b->stride, s->qbias);
03918 }
03919 }
03920 for(pass=0; pass<1; pass++){
03921 if(s->qbias == 0)
03922 continue;
03923 for(level=0; level<s->spatial_decomposition_count; level++){
03924 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03925 SubBand *b= &p->band[level][orientation];
03926 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
03927 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03928
03929 for(ys= 0; ys<Q2_STEP; ys++){
03930 for(xs= 0; xs<Q2_STEP; xs++){
03931 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03932 dequantize_all(s, p, idwt2_buffer, width, height);
03933 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
03934 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
03935 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03936 for(y=ys; y<b->height; y+= Q2_STEP){
03937 for(x=xs; x<b->width; x+= Q2_STEP){
03938 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
03939 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
03940
03941 }
03942 }
03943 dequantize_all(s, p, idwt2_buffer, width, height);
03944 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
03945 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
03946 for(y=ys; y<b->height; y+= Q2_STEP){
03947 for(x=xs; x<b->width; x+= Q2_STEP){
03948 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
03949 if(score[score_idx] <= best_score[score_idx] + threshold){
03950 best_score[score_idx]= score[score_idx];
03951 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
03952 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
03953
03954 }
03955 }
03956 }
03957 }
03958 }
03959 }
03960 }
03961 }
03962 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03963 }
03964
03965 #endif
03966
03967 static av_cold int encode_init(AVCodecContext *avctx)
03968 {
03969 SnowContext *s = avctx->priv_data;
03970 int plane_index;
03971
03972 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
03973 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
03974 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
03975 return -1;
03976 }
03977
03978 if(avctx->prediction_method == DWT_97
03979 && (avctx->flags & CODEC_FLAG_QSCALE)
03980 && avctx->global_quality == 0){
03981 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
03982 return -1;
03983 }
03984
03985 s->spatial_decomposition_type= avctx->prediction_method;
03986
03987 s->chroma_h_shift= 1;
03988 s->chroma_v_shift= 1;
03989
03990 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
03991 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
03992
03993 for(plane_index=0; plane_index<3; plane_index++){
03994 s->plane[plane_index].diag_mc= 1;
03995 s->plane[plane_index].htaps= 6;
03996 s->plane[plane_index].hcoeff[0]= 40;
03997 s->plane[plane_index].hcoeff[1]= -10;
03998 s->plane[plane_index].hcoeff[2]= 2;
03999 s->plane[plane_index].fast_mc= 1;
04000 }
04001
04002 common_init(avctx);
04003 alloc_blocks(s);
04004
04005 s->version=0;
04006
04007 s->m.avctx = avctx;
04008 s->m.flags = avctx->flags;
04009 s->m.bit_rate= avctx->bit_rate;
04010
04011 s->m.me.temp =
04012 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
04013 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04014 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04015 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
04016 h263_encode_init(&s->m);
04017
04018 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
04019
04020 if(avctx->flags&CODEC_FLAG_PASS1){
04021 if(!avctx->stats_out)
04022 avctx->stats_out = av_mallocz(256);
04023 }
04024 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
04025 if(ff_rate_control_init(&s->m) < 0)
04026 return -1;
04027 }
04028 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
04029
04030 avctx->coded_frame= &s->current_picture;
04031 switch(avctx->pix_fmt){
04032
04033
04034 case PIX_FMT_YUV420P:
04035 case PIX_FMT_GRAY8:
04036
04037
04038 s->colorspace_type= 0;
04039 break;
04040
04041
04042
04043 default:
04044 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
04045 return -1;
04046 }
04047
04048 s->chroma_h_shift= 1;
04049 s->chroma_v_shift= 1;
04050
04051 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
04052 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
04053
04054 s->avctx->get_buffer(s->avctx, &s->input_picture);
04055
04056 if(s->avctx->me_method == ME_ITER){
04057 int i;
04058 int size= s->b_width * s->b_height << 2*s->block_max_depth;
04059 for(i=0; i<s->max_ref_frames; i++){
04060 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
04061 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
04062 }
04063 }
04064
04065 return 0;
04066 }
04067
04068 #define USE_HALFPEL_PLANE 0
04069
04070 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
04071 int p,x,y;
04072
04073 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
04074
04075 for(p=0; p<3; p++){
04076 int is_chroma= !!p;
04077 int w= s->avctx->width >>is_chroma;
04078 int h= s->avctx->height >>is_chroma;
04079 int ls= frame->linesize[p];
04080 uint8_t *src= frame->data[p];
04081
04082 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04083 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04084 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04085
04086 halfpel[0][p]= src;
04087 for(y=0; y<h; y++){
04088 for(x=0; x<w; x++){
04089 int i= y*ls + x;
04090
04091 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
04092 }
04093 }
04094 for(y=0; y<h; y++){
04095 for(x=0; x<w; x++){
04096 int i= y*ls + x;
04097
04098 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04099 }
04100 }
04101 src= halfpel[1][p];
04102 for(y=0; y<h; y++){
04103 for(x=0; x<w; x++){
04104 int i= y*ls + x;
04105
04106 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04107 }
04108 }
04109
04110
04111 }
04112 }
04113
04114 static int frame_start(SnowContext *s){
04115 AVFrame tmp;
04116 int w= s->avctx->width;
04117 int h= s->avctx->height;
04118
04119 if(s->current_picture.data[0]){
04120 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
04121 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
04122 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
04123 }
04124
04125 tmp= s->last_picture[s->max_ref_frames-1];
04126 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
04127 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
04128 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
04129 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
04130 s->last_picture[0]= s->current_picture;
04131 s->current_picture= tmp;
04132
04133 if(s->keyframe){
04134 s->ref_frames= 0;
04135 }else{
04136 int i;
04137 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
04138 if(i && s->last_picture[i-1].key_frame)
04139 break;
04140 s->ref_frames= i;
04141 }
04142
04143 s->current_picture.reference= 1;
04144 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
04145 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
04146 return -1;
04147 }
04148
04149 s->current_picture.key_frame= s->keyframe;
04150
04151 return 0;
04152 }
04153
04154 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
04155 SnowContext *s = avctx->priv_data;
04156 RangeCoder * const c= &s->c;
04157 AVFrame *pict = data;
04158 const int width= s->avctx->width;
04159 const int height= s->avctx->height;
04160 int level, orientation, plane_index, i, y;
04161 uint8_t rc_header_bak[sizeof(s->header_state)];
04162 uint8_t rc_block_bak[sizeof(s->block_state)];
04163
04164 ff_init_range_encoder(c, buf, buf_size);
04165 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04166
04167 for(i=0; i<3; i++){
04168 int shift= !!i;
04169 for(y=0; y<(height>>shift); y++)
04170 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
04171 &pict->data[i][y * pict->linesize[i]],
04172 width>>shift);
04173 }
04174 s->new_picture = *pict;
04175
04176 s->m.picture_number= avctx->frame_number;
04177 if(avctx->flags&CODEC_FLAG_PASS2){
04178 s->m.pict_type =
04179 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
04180 s->keyframe= pict->pict_type==FF_I_TYPE;
04181 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
04182 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
04183 if (pict->quality < 0)
04184 return -1;
04185 }
04186 }else{
04187 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
04188 s->m.pict_type=
04189 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
04190 }
04191
04192 if(s->pass1_rc && avctx->frame_number == 0)
04193 pict->quality= 2*FF_QP2LAMBDA;
04194 if(pict->quality){
04195 s->qlog= qscale2qlog(pict->quality);
04196 s->lambda = pict->quality * 3/2;
04197 }
04198 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
04199 s->qlog= LOSSLESS_QLOG;
04200 s->lambda = 0;
04201 }
04202
04203 frame_start(s);
04204
04205 s->m.current_picture_ptr= &s->m.current_picture;
04206 if(pict->pict_type == FF_P_TYPE){
04207 int block_width = (width +15)>>4;
04208 int block_height= (height+15)>>4;
04209 int stride= s->current_picture.linesize[0];
04210
04211 assert(s->current_picture.data[0]);
04212 assert(s->last_picture[0].data[0]);
04213
04214 s->m.avctx= s->avctx;
04215 s->m.current_picture.data[0]= s->current_picture.data[0];
04216 s->m. last_picture.data[0]= s->last_picture[0].data[0];
04217 s->m. new_picture.data[0]= s-> input_picture.data[0];
04218 s->m. last_picture_ptr= &s->m. last_picture;
04219 s->m.linesize=
04220 s->m. last_picture.linesize[0]=
04221 s->m. new_picture.linesize[0]=
04222 s->m.current_picture.linesize[0]= stride;
04223 s->m.uvlinesize= s->current_picture.linesize[1];
04224 s->m.width = width;
04225 s->m.height= height;
04226 s->m.mb_width = block_width;
04227 s->m.mb_height= block_height;
04228 s->m.mb_stride= s->m.mb_width+1;
04229 s->m.b8_stride= 2*s->m.mb_width+1;
04230 s->m.f_code=1;
04231 s->m.pict_type= pict->pict_type;
04232 s->m.me_method= s->avctx->me_method;
04233 s->m.me.scene_change_score=0;
04234 s->m.flags= s->avctx->flags;
04235 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
04236 s->m.out_format= FMT_H263;
04237 s->m.unrestricted_mv= 1;
04238
04239 s->m.lambda = s->lambda;
04240 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
04241 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
04242
04243 s->m.dsp= s->dsp;
04244 ff_init_me(&s->m);
04245 s->dsp= s->m.dsp;
04246 }
04247
04248 if(s->pass1_rc){
04249 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
04250 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
04251 }
04252
04253 redo_frame:
04254
04255 if(pict->pict_type == FF_I_TYPE)
04256 s->spatial_decomposition_count= 5;
04257 else
04258 s->spatial_decomposition_count= 5;
04259
04260 s->m.pict_type = pict->pict_type;
04261 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
04262
04263 common_init_after_header(avctx);
04264
04265 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
04266 for(plane_index=0; plane_index<3; plane_index++){
04267 calculate_visual_weight(s, &s->plane[plane_index]);
04268 }
04269 }
04270
04271 encode_header(s);
04272 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04273 encode_blocks(s, 1);
04274 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
04275
04276 for(plane_index=0; plane_index<3; plane_index++){
04277 Plane *p= &s->plane[plane_index];
04278 int w= p->width;
04279 int h= p->height;
04280 int x, y;
04281
04282
04283 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
04284
04285 if(pict->data[plane_index])
04286 for(y=0; y<h; y++){
04287 for(x=0; x<w; x++){
04288 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
04289 }
04290 }
04291 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
04292
04293 if( plane_index==0
04294 && pict->pict_type == FF_P_TYPE
04295 && !(avctx->flags&CODEC_FLAG_PASS2)
04296 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
04297 ff_init_range_encoder(c, buf, buf_size);
04298 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04299 pict->pict_type= FF_I_TYPE;
04300 s->keyframe=1;
04301 s->current_picture.key_frame=1;
04302 goto redo_frame;
04303 }
04304
04305 if(s->qlog == LOSSLESS_QLOG){
04306 for(y=0; y<h; y++){
04307 for(x=0; x<w; x++){
04308 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
04309 }
04310 }
04311 }else{
04312 for(y=0; y<h; y++){
04313 for(x=0; x<w; x++){
04314 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
04315 }
04316 }
04317 }
04318
04319
04320
04321
04322 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04323
04324 if(s->pass1_rc && plane_index==0){
04325 int delta_qlog = ratecontrol_1pass(s, pict);
04326 if (delta_qlog <= INT_MIN)
04327 return -1;
04328 if(delta_qlog){
04329
04330 ff_init_range_encoder(c, buf, buf_size);
04331 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
04332 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
04333 encode_header(s);
04334 encode_blocks(s, 0);
04335 }
04336 }
04337
04338 for(level=0; level<s->spatial_decomposition_count; level++){
04339 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04340 SubBand *b= &p->band[level][orientation];
04341
04342 if(!QUANTIZE2)
04343 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
04344 if(orientation==0)
04345 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
04346 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
04347 assert(b->parent==NULL || b->parent->stride == b->stride*2);
04348 if(orientation==0)
04349 correlate(s, b, b->ibuf, b->stride, 1, 0);
04350 }
04351 }
04352
04353 for(level=0; level<s->spatial_decomposition_count; level++){
04354 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04355 SubBand *b= &p->band[level][orientation];
04356
04357 dequantize(s, b, b->ibuf, b->stride);
04358 }
04359 }
04360
04361 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04362 if(s->qlog == LOSSLESS_QLOG){
04363 for(y=0; y<h; y++){
04364 for(x=0; x<w; x++){
04365 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
04366 }
04367 }
04368 }
04369 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04370 }else{
04371
04372 if(pict->pict_type == FF_I_TYPE){
04373 for(y=0; y<h; y++){
04374 for(x=0; x<w; x++){
04375 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
04376 pict->data[plane_index][y*pict->linesize[plane_index] + x];
04377 }
04378 }
04379 }else{
04380 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
04381 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04382 }
04383 }
04384 if(s->avctx->flags&CODEC_FLAG_PSNR){
04385 int64_t error= 0;
04386
04387 if(pict->data[plane_index])
04388 for(y=0; y<h; y++){
04389 for(x=0; x<w; x++){
04390 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
04391 error += d*d;
04392 }
04393 }
04394 s->avctx->error[plane_index] += error;
04395 s->current_picture.error[plane_index] = error;
04396 }
04397
04398 }
04399
04400 update_last_header_values(s);
04401
04402 if(s->last_picture[s->max_ref_frames-1].data[0]){
04403 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04404 for(i=0; i<9; i++)
04405 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04406 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04407 }
04408
04409 s->current_picture.coded_picture_number = avctx->frame_number;
04410 s->current_picture.pict_type = pict->pict_type;
04411 s->current_picture.quality = pict->quality;
04412 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04413 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
04414 s->m.current_picture.display_picture_number =
04415 s->m.current_picture.coded_picture_number = avctx->frame_number;
04416 s->m.current_picture.quality = pict->quality;
04417 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
04418 if(s->pass1_rc)
04419 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
04420 return -1;
04421 if(avctx->flags&CODEC_FLAG_PASS1)
04422 ff_write_pass1_stats(&s->m);
04423 s->m.last_pict_type = s->m.pict_type;
04424 avctx->frame_bits = s->m.frame_bits;
04425 avctx->mv_bits = s->m.mv_bits;
04426 avctx->misc_bits = s->m.misc_bits;
04427 avctx->p_tex_bits = s->m.p_tex_bits;
04428
04429 emms_c();
04430
04431 return ff_rac_terminate(c);
04432 }
04433
04434 static av_cold void common_end(SnowContext *s){
04435 int plane_index, level, orientation, i;
04436
04437 av_freep(&s->spatial_dwt_buffer);
04438 av_freep(&s->spatial_idwt_buffer);
04439
04440 s->m.me.temp= NULL;
04441 av_freep(&s->m.me.scratchpad);
04442 av_freep(&s->m.me.map);
04443 av_freep(&s->m.me.score_map);
04444 av_freep(&s->m.obmc_scratchpad);
04445
04446 av_freep(&s->block);
04447 av_freep(&s->scratchbuf);
04448
04449 for(i=0; i<MAX_REF_FRAMES; i++){
04450 av_freep(&s->ref_mvs[i]);
04451 av_freep(&s->ref_scores[i]);
04452 if(s->last_picture[i].data[0])
04453 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
04454 }
04455
04456 for(plane_index=0; plane_index<3; plane_index++){
04457 for(level=s->spatial_decomposition_count-1; level>=0; level--){
04458 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04459 SubBand *b= &s->plane[plane_index].band[level][orientation];
04460
04461 av_freep(&b->x_coeff);
04462 }
04463 }
04464 }
04465 }
04466
04467 static av_cold int encode_end(AVCodecContext *avctx)
04468 {
04469 SnowContext *s = avctx->priv_data;
04470
04471 common_end(s);
04472 av_free(avctx->stats_out);
04473
04474 return 0;
04475 }
04476
04477 static av_cold int decode_init(AVCodecContext *avctx)
04478 {
04479 avctx->pix_fmt= PIX_FMT_YUV420P;
04480
04481 common_init(avctx);
04482
04483 return 0;
04484 }
04485
04486 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
04487 SnowContext *s = avctx->priv_data;
04488 RangeCoder * const c= &s->c;
04489 int bytes_read;
04490 AVFrame *picture = data;
04491 int level, orientation, plane_index, i;
04492
04493 ff_init_range_decoder(c, buf, buf_size);
04494 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04495
04496 s->current_picture.pict_type= FF_I_TYPE;
04497 if(decode_header(s)<0)
04498 return -1;
04499 common_init_after_header(avctx);
04500
04501
04502 slice_buffer_destroy(&s->sb);
04503 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
04504
04505 for(plane_index=0; plane_index<3; plane_index++){
04506 Plane *p= &s->plane[plane_index];
04507 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
04508 && p->hcoeff[1]==-10
04509 && p->hcoeff[2]==2;
04510 }
04511
04512 if(!s->block) alloc_blocks(s);
04513
04514 frame_start(s);
04515
04516 if(avctx->debug&FF_DEBUG_PICT_INFO)
04517 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
04518
04519 decode_blocks(s);
04520
04521 for(plane_index=0; plane_index<3; plane_index++){
04522 Plane *p= &s->plane[plane_index];
04523 int w= p->width;
04524 int h= p->height;
04525 int x, y;
04526 int decode_state[MAX_DECOMPOSITIONS][4][1];
04527
04528 if(s->avctx->debug&2048){
04529 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
04530 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04531
04532 for(y=0; y<h; y++){
04533 for(x=0; x<w; x++){
04534 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
04535 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
04536 }
04537 }
04538 }
04539
04540 {
04541 for(level=0; level<s->spatial_decomposition_count; level++){
04542 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04543 SubBand *b= &p->band[level][orientation];
04544 unpack_coeffs(s, b, b->parent, orientation);
04545 }
04546 }
04547 }
04548
04549 {
04550 const int mb_h= s->b_height << s->block_max_depth;
04551 const int block_size = MB_SIZE >> s->block_max_depth;
04552 const int block_w = plane_index ? block_size/2 : block_size;
04553 int mb_y;
04554 DWTCompose cs[MAX_DECOMPOSITIONS];
04555 int yd=0, yq=0;
04556 int y;
04557 int end_y;
04558
04559 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
04560 for(mb_y=0; mb_y<=mb_h; mb_y++){
04561
04562 int slice_starty = block_w*mb_y;
04563 int slice_h = block_w*(mb_y+1);
04564 if (!(s->keyframe || s->avctx->debug&512)){
04565 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
04566 slice_h -= (block_w >> 1);
04567 }
04568
04569 for(level=0; level<s->spatial_decomposition_count; level++){
04570 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04571 SubBand *b= &p->band[level][orientation];
04572 int start_y;
04573 int end_y;
04574 int our_mb_start = mb_y;
04575 int our_mb_end = (mb_y + 1);
04576 const int extra= 3;
04577 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
04578 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
04579 if (!(s->keyframe || s->avctx->debug&512)){
04580 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04581 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04582 }
04583 start_y = FFMIN(b->height, start_y);
04584 end_y = FFMIN(b->height, end_y);
04585
04586 if (start_y != end_y){
04587 if (orientation == 0){
04588 SubBand * correlate_band = &p->band[0][0];
04589 int correlate_end_y = FFMIN(b->height, end_y + 1);
04590 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
04591 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
04592 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
04593 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
04594 }
04595 else
04596 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
04597 }
04598 }
04599 }
04600
04601 for(; yd<slice_h; yd+=4){
04602 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
04603 }
04604
04605 if(s->qlog == LOSSLESS_QLOG){
04606 for(; yq<slice_h && yq<h; yq++){
04607 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
04608 for(x=0; x<w; x++){
04609 line[x] <<= FRAC_BITS;
04610 }
04611 }
04612 }
04613
04614 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
04615
04616 y = FFMIN(p->height, slice_starty);
04617 end_y = FFMIN(p->height, slice_h);
04618 while(y < end_y)
04619 slice_buffer_release(&s->sb, y++);
04620 }
04621
04622 slice_buffer_flush(&s->sb);
04623 }
04624
04625 }
04626
04627 emms_c();
04628
04629 if(s->last_picture[s->max_ref_frames-1].data[0]){
04630 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04631 for(i=0; i<9; i++)
04632 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04633 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04634 }
04635
04636 if(!(s->avctx->debug&2048))
04637 *picture= s->current_picture;
04638 else
04639 *picture= s->mconly_picture;
04640
04641 *data_size = sizeof(AVFrame);
04642
04643 bytes_read= c->bytestream - c->bytestream_start;
04644 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
04645
04646 return bytes_read;
04647 }
04648
04649 static av_cold int decode_end(AVCodecContext *avctx)
04650 {
04651 SnowContext *s = avctx->priv_data;
04652
04653 slice_buffer_destroy(&s->sb);
04654
04655 common_end(s);
04656
04657 return 0;
04658 }
04659
04660 AVCodec snow_decoder = {
04661 "snow",
04662 CODEC_TYPE_VIDEO,
04663 CODEC_ID_SNOW,
04664 sizeof(SnowContext),
04665 decode_init,
04666 NULL,
04667 decode_end,
04668 decode_frame,
04669 0 ,
04670 NULL,
04671 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04672 };
04673
04674 #if CONFIG_SNOW_ENCODER
04675 AVCodec snow_encoder = {
04676 "snow",
04677 CODEC_TYPE_VIDEO,
04678 CODEC_ID_SNOW,
04679 sizeof(SnowContext),
04680 encode_init,
04681 encode_frame,
04682 encode_end,
04683 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04684 };
04685 #endif
04686
04687
04688 #ifdef TEST
04689 #undef malloc
04690 #undef free
04691 #undef printf
04692 #undef random
04693
04694 int main(void){
04695 int width=256;
04696 int height=256;
04697 int buffer[2][width*height];
04698 SnowContext s;
04699 int i;
04700 s.spatial_decomposition_count=6;
04701 s.spatial_decomposition_type=1;
04702
04703 printf("testing 5/3 DWT\n");
04704 for(i=0; i<width*height; i++)
04705 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04706
04707 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04708 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04709
04710 for(i=0; i<width*height; i++)
04711 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04712
04713 printf("testing 9/7 DWT\n");
04714 s.spatial_decomposition_type=0;
04715 for(i=0; i<width*height; i++)
04716 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04717
04718 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04719 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04720
04721 for(i=0; i<width*height; i++)
04722 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04723
04724 #if 0
04725 printf("testing AC coder\n");
04726 memset(s.header_state, 0, sizeof(s.header_state));
04727 ff_init_range_encoder(&s.c, buffer[0], 256*256);
04728 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04729
04730 for(i=-256; i<256; i++){
04731 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
04732 }
04733 ff_rac_terminate(&s.c);
04734
04735 memset(s.header_state, 0, sizeof(s.header_state));
04736 ff_init_range_decoder(&s.c, buffer[0], 256*256);
04737 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04738
04739 for(i=-256; i<256; i++){
04740 int j;
04741 j= get_symbol(&s.c, s.header_state, 1);
04742 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
04743 }
04744 #endif
04745 {
04746 int level, orientation, x, y;
04747 int64_t errors[8][4];
04748 int64_t g=0;
04749
04750 memset(errors, 0, sizeof(errors));
04751 s.spatial_decomposition_count=3;
04752 s.spatial_decomposition_type=0;
04753 for(level=0; level<s.spatial_decomposition_count; level++){
04754 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04755 int w= width >> (s.spatial_decomposition_count-level);
04756 int h= height >> (s.spatial_decomposition_count-level);
04757 int stride= width << (s.spatial_decomposition_count-level);
04758 DWTELEM *buf= buffer[0];
04759 int64_t error=0;
04760
04761 if(orientation&1) buf+=w;
04762 if(orientation>1) buf+=stride>>1;
04763
04764 memset(buffer[0], 0, sizeof(int)*width*height);
04765 buf[w/2 + h/2*stride]= 256*256;
04766 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04767 for(y=0; y<height; y++){
04768 for(x=0; x<width; x++){
04769 int64_t d= buffer[0][x + y*width];
04770 error += d*d;
04771 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
04772 }
04773 if(FFABS(height/2-y)<9 && level==2) printf("\n");
04774 }
04775 error= (int)(sqrt(error)+0.5);
04776 errors[level][orientation]= error;
04777 if(g) g=av_gcd(g, error);
04778 else g= error;
04779 }
04780 }
04781 printf("static int const visual_weight[][4]={\n");
04782 for(level=0; level<s.spatial_decomposition_count; level++){
04783 printf(" {");
04784 for(orientation=0; orientation<4; orientation++){
04785 printf("%8"PRId64",", errors[level][orientation]/g);
04786 }
04787 printf("},\n");
04788 }
04789 printf("};\n");
04790 {
04791 int level=2;
04792 int w= width >> (s.spatial_decomposition_count-level);
04793
04794 int stride= width << (s.spatial_decomposition_count-level);
04795 DWTELEM *buf= buffer[0];
04796 int64_t error=0;
04797
04798 buf+=w;
04799 buf+=stride>>1;
04800
04801 memset(buffer[0], 0, sizeof(int)*width*height);
04802 #if 1
04803 for(y=0; y<height; y++){
04804 for(x=0; x<width; x++){
04805 int tab[4]={0,2,3,1};
04806 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
04807 }
04808 }
04809 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04810 #else
04811 for(y=0; y<h; y++){
04812 for(x=0; x<w; x++){
04813 buf[x + y*stride ]=169;
04814 buf[x + y*stride-w]=64;
04815 }
04816 }
04817 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04818 #endif
04819 for(y=0; y<height; y++){
04820 for(x=0; x<width; x++){
04821 int64_t d= buffer[0][x + y*width];
04822 error += d*d;
04823 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
04824 }
04825 if(FFABS(height/2-y)<9) printf("\n");
04826 }
04827 }
04828
04829 }
04830 return 0;
04831 }
04832 #endif