1. 对512*512 png 四通道图像顺时针旋转90度
思路: 像素分块,对块内转置;再水平镜像。图像库使用 stb img
2. 代码
#include <stdio.h>
#include <arm_neon.h>#include <stdlib.h>
#define STB_IMAGE_IMPLEMENTATION
#include "./stb/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "./stb/stb_image_write.h"// #define DEBUGint main()
{//读取图像int w,h,c;
#ifdef DEBUGw=h=8;c=4;uint8_t* src = (uint8_t*)calloc(w*h*c,1);for(int i=0;i<h;i++){for(int j=0;j<h*c;j++)src[i*h*c+j] = j;}for(int i=0;i<h;i++){for(int j=0;j<w*c;j+=4)printf("%u%u%u%u ",*(dst+i*h*c+j),*(dst+i*h*c+j+1),*(dst+i*h*c+j+2),*(dst+i*h*c+j+3));printf("\n");}printf("======\n");
#elseuint8_t *src = stbi_load("./pic.png",&w,&h,&c,0);if(!src){printf("load img failed.\n");return 0;}elseprintf("int w %d h %d c %d\n",w,h,c);//512 512 4#endifuint8_t *dst = (uint8_t*)calloc(w*h*c,sizeof(uint8_t));int blockSize = 4;// 128/sizeof(src[0][0]);for(int i=0;i<h;i+=blockSize){for(int j=0;j<w;j+=blockSize){uint32x4x4_t block = {0};uint32x4x2_t blockTemp = {0};//储存数据: 像素转置、然后水平翻转存储[i+m][j] -> [j][i+m] -> [j][N-(i+m)]//加载块数据for(int m=0;m<blockSize;m++)block.val[m] = vreinterpretq_u32_u8(vld1q_u8(src+((i+m)*w+j)*c));//像素转置blockTemp = vtrnq_u32(block.val[0],block.val[1]);block.val[0] = blockTemp.val[0];block.val[1] = blockTemp.val[1];blockTemp = vtrnq_u32(block.val[2],block.val[3]);block.val[2] = blockTemp.val[0];block.val[3] = blockTemp.val[1];//没有 vtrnq_u64 所以手动交换数据blockTemp.val[0] = vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(block.val[0]),vreinterpretq_u64_u32(block.val[2])));blockTemp.val[1] = vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(block.val[0]),vreinterpretq_u64_u32(block.val[2])));block.val[0] = blockTemp.val[0];block.val[2] = blockTemp.val[1];blockTemp.val[0] = vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(block.val[1]),vreinterpretq_u64_u32(block.val[3])));blockTemp.val[1] = vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(block.val[1]),vreinterpretq_u64_u32(block.val[3])));block.val[1] = blockTemp.val[0];block.val[3] = blockTemp.val[1];for(int m=0;m<blockSize;m++){block.val[m] = vrev64q_u32(block.val[m]);block.val[m] = vcombine_u32(vget_high_u32(block.val[m]),vget_low_u32(block.val[m]));//存储vst1q_u8(dst+((j+m)*h+(h-i-blockSize))*c,vreinterpretq_u8_u32(block.val[m]));}}}#ifdef DEBUGfor(int i=0;i<w;i++){for(int j=0;j<h*c;j+=4)printf("%u%u%u%u ",*(dst+i*h*c+j),*(dst+i*h*c+j+1),*(dst+i*h*c+j+2),*(dst+i*h*c+j+3));printf("\n");}free(src);#elsestbi_write_png("pic1.png",h,w,c,dst,h*c);stbi_image_free(src);#endiffree(dst);return 0;}
3. 测试结果
原图
旋转后图像