这两个图可能就是AAN 的数学模型
优化DCT就是用代码实现矩阵9,10
9和10已经把64个系数缩小到一半32个了。光从这两图可看出,优化后乘法少了64-32+4=36个,加法少了64-32-8=24。估计优化时间可少百分之40左右。
实际编码640×480 的图片,程序执行时间缩短为0.13秒。
要想减少DCT时间就要尽量减少DCT函数的代码量,可以不用查表直接赋值。
最后一种优化就是象ffmpeg一样用汇编写DCT函数部分了。这种就搞不定了。
下一个目标,h264!
实现9,10 矩阵代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define PI 3.1415926int main(void){//cs:cos(PI*(2*n+1)*k/16)/* double cs[64]={1, 1, 1, 1, 1, 1, 1, 1,0.98, 0.83, 0.56, 0.20, -0.20, -0.56, -0.83, -0.98,0.92, 0.38, -0.38, -0.92, -0.92, -0.38, 0.38, 0.92 ,0.83, -0.20, -0.98, -0.56, 0.56, 0.98, 0.20, -0.83,0.71, -0.71, -0.71, 0.71, 0.71, -0.71, -0.71,0.71,0.56, -0.98, 0.20, 0.83, -0.83, -0.20, 0.98, -0.56,0.38, -0.92, 0.92, -0.38, -0.38, 0.92, -0.92, 0.38,0.20, -0.56, 0.83, -0.98, 0.98, -0.83, 0.56, -0.20};//--------------1D DCT-----------------------------------------int DCT(double i[8],double o[8]){ //ID DCT 参数类型不能用unsigned char ,因为中间系数已超char取值范围double s=0.0;for(int k=0;k<8;k++){for(int n=0;n<8;n++){s=s+i[n]*cs[k*8+n]; //查cs表}if(k==0){s=s*(1.0/(2*sqrt(2)));}else{s=s*(1.0/2);}o[k]=s;s=0.0;}return 0;}*/double cs1[16]={1, 1, 1, 1, 0.92, 0.38, -0.38, -0.92,0.71, -0.71, -0.71, 0.71,0.38, -0.92, 0.92, -0.38, };double cs2[16]={0.98, 0.83, 0.56, 0.20, 0.83, -0.20, -0.98, -0.56,0.56, -0.98, 0.20, 0.83, 0.20, -0.56, 0.83, -0.98, };int DCT(double i[8],double o[8]){ //采用AAN 方式double z0=i[0]+i[7];double z1=i[0]-i[7];double z2=i[1]+i[6];double z3=i[1]-i[6];double z4=i[2]+i[5];double z5=i[2]-i[5];double z6=i[3]+i[4];double z7=i[3]-i[4];o[0]=(cs1[0]*z0+cs1[1]*z2+cs1[2]*z4+cs1[3]*z6)*(1.0/(2*sqrt(2)));o[1]=(cs2[0]*z1+cs2[1]*z3+cs2[2]*z5+cs2[3]*z7)*(1.0/2);o[2]=(cs1[4]*z0+cs1[5]*z2+cs1[6]*z4+cs1[7]*z6)*(1.0/2);o[3]=(cs2[4]*z1+cs2[5]*z3+cs2[6]*z5+cs2[7]*z7)*(1.0/2);o[4]=(cs1[8]*z0+cs1[9]*z2+cs1[10]*z4+cs1[11]*z6)*(1.0/2);o[5]=(cs2[8]*z1+cs2[9]*z3+cs2[10]*z5+cs2[11]*z7)*(1.0/2);o[6]=(cs1[12]*z0+cs1[13]*z2+cs1[14]*z4+cs1[15]*z6)*(1.0/2);o[7]=(cs2[12]*z1+cs2[13]*z3+cs2[14]*z5+cs2[15]*z7)*(1.0/2);return 0;}//--------------------------------------------------------------------double i[64]={-76,-73,-67,-62,-58,-67,-64,-55,-65,-69,-73,-38,-19,-43,-59,-56,-66,-69,-60,-15,16,-24,-62,-55,-65,-70,-57,-6,26,-22,-58,-59,-61,-67,-60,-24,-2,-40,-60,-58,-49,-63,-68,-58,-51,-60,-70,-53,-43,-57,-64,-69,-73,-67,-63,-45,-41,-49,-59,-60,-63,-52,-50,-34};//-------------8行分别1D DCT---------------------double w[64]={}; //中间8×8for(int a=0;a<64;a=a+8){double ls_o[8]={};double ls_i[8]={};memcpy(ls_i,&(i[a]),64);DCT(ls_i,ls_o);memcpy(&(w[a]),ls_o,64);}//----------对中间8×8 列1D DCT-------------------------double zj[8][8]={}; //取中间w的8个8列int t=0;for(int a=0;a<8;a++){for(int b=0;b<8;b++){zj[t][b]=w[b*8+a];}t++;}double ll[64]={}; //现在的列是水平放置的,也就是列变成了行,要转为列for(int a=0;a<8;a++){ //对8列1D DCTdouble zz[8]={};DCT(zj[a],zz);memcpy(&(ll[8*a]),zz,64);}int k=0;double out[64]={}; //2D DCT 系数for(int a=0;a<8;a++){for(int b=0;b<8;b++){out[8*b+a]=ll[k];k++;}}//----------显示--------------------------------------------for(int a=0;a<8;a++){for(int b=0;b<8;b++){printf("%f ,",out[a*8+b]);}puts("");}return 0;
}