返回:OpenCV系列文章目录(持续更新中......)
上一篇:如何利用OpenCV4.9 更改图像的对比度和亮度
下一篇:
目标
我们将寻求以下问题的答案:
- 什么是傅里叶变换,为什么要使用它?
- 如何在 OpenCV 中做到这一点?
- 使用以下函数: copyMakeBorder() , merge() , dft() , getOptimalDFTSize() , log()和normalize() .
源代码
你可以在官方网站下载相关源代码。
以下 dft() 的用法示例:
C++
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
static void help(char ** argv)
{cout << endl<< "This program demonstrated the use of the discrete Fourier transform (DFT). " << endl<< "The dft of an image is taken and it's power spectrum is displayed." << endl << endl<< "Usage:" << endl<< argv[0] << " [image_name -- default lena.jpg]" << endl << endl;
}
int main(int argc, char ** argv)
{help(argv); const char* filename = argc >=2 ? argv[1] : "lena.jpg"; Mat I = imread( samples::findFile( filename ), IMREAD_GRAYSCALE);if( I.empty()){cout << "Error opening image" << endl;return EXIT_FAILURE;} Mat padded; //expand input image to optimal sizeint m = getOptimalDFTSize( I.rows );int n = getOptimalDFTSize( I.cols ); // on the border add zero valuescopyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0)); Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};Mat complexI;merge(planes, 2, complexI); // Add to the expanded another plane with zeros dft(complexI, complexI); // this way the result may fit in the source matrix // compute the magnitude and switch to logarithmic scale// => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitudeMat magI = planes[0]; magI += Scalar::all(1); // switch to logarithmic scalelog(magI, magI); // crop the spectrum, if it has an odd number of rows or columnsmagI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2)); // rearrange the quadrants of Fourier image so that the origin is at the image centerint cx = magI.cols/2;int cy = magI.rows/2; Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrantMat q1(magI, Rect(cx, 0, cx, cy)); // Top-RightMat q2(magI, Rect(0, cy, cx, cy)); // Bottom-LeftMat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right Mat tmp; // swap quadrants (Top-Left with Bottom-Right)q0.copyTo(tmp);q3.copyTo(q0);tmp.copyTo(q3); q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)q2.copyTo(q1);tmp.copyTo(q2); normalize(magI, magI, 0, 1, NORM_MINMAX); // Transform the matrix with float values into a// viewable image form (float between values 0 and 1). imshow("Input Image" , I ); // Show the resultimshow("spectrum magnitude", magI);waitKey(); return EXIT_SUCCESS;
}
Java
import org.opencv.core.*;
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs;
import java.util.List;
import java.util.*;
class DiscreteFourierTransformRun{private void help() {System.out.println("" +"This program demonstrated the use of the discrete Fourier transform (DFT). \n" +"The dft of an image is taken and it's power spectrum is displayed.\n" +"Usage:\n" +"./DiscreteFourierTransform [image_name -- default ../data/lena.jpg]");} public void run(String[] args){ help(); String filename = ((args.length > 0) ? args[0] : "../data/lena.jpg"); Mat I = Imgcodecs.imread(filename, Imgcodecs.IMREAD_GRAYSCALE);if( I.empty() ) {System.out.println("Error opening image");System.exit(-1);} Mat padded = new Mat(); //expand input image to optimal sizeint m = Core.getOptimalDFTSize( I.rows() );int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero valuesCore.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0)); List<Mat> planes = new ArrayList<Mat>();padded.convertTo(padded, CvType.CV_32F);planes.add(padded);planes.add(Mat.zeros(padded.size(), CvType.CV_32F));Mat complexI = new Mat();Core.merge(planes, complexI); // Add to the expanded another plane with zeros Core.dft(complexI, complexI); // this way the result may fit in the source matrix // compute the magnitude and switch to logarithmic scale// => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))Core.split(complexI, planes); // planes.get(0) = Re(DFT(I)// planes.get(1) = Im(DFT(I))Core.magnitude(planes.get(0), planes.get(1), planes.get(0));// planes.get(0) = magnitudeMat magI = planes.get(0); Mat matOfOnes = Mat.ones(magI.size(), magI.type());Core.add(matOfOnes, magI, magI); // switch to logarithmic scaleCore.log(magI, magI); // crop the spectrum, if it has an odd number of rows or columnsmagI = magI.submat(new Rect(0, 0, magI.cols() & -2, magI.rows() & -2)); // rearrange the quadrants of Fourier image so that the origin is at the image centerint cx = magI.cols()/2;int cy = magI.rows()/2; Mat q0 = new Mat(magI, new Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrantMat q1 = new Mat(magI, new Rect(cx, 0, cx, cy)); // Top-RightMat q2 = new Mat(magI, new Rect(0, cy, cx, cy)); // Bottom-LeftMat q3 = new Mat(magI, new Rect(cx, cy, cx, cy)); // Bottom-Right Mat tmp = new Mat(); // swap quadrants (Top-Left with Bottom-Right)q0.copyTo(tmp);q3.copyTo(q0);tmp.copyTo(q3); q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)q2.copyTo(q1);tmp.copyTo(q2); magI.convertTo(magI, CvType.CV_8UC1);Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1); // Transform the matrix with float values// into a viewable image form (float between// values 0 and 255). HighGui.imshow("Input Image" , I ); // Show the resultHighGui.imshow("Spectrum Magnitude", magI);HighGui.waitKey(); System.exit(0);}
}
public class DiscreteFourierTransform {public static void main(String[] args) {// Load the native library.System.loadLibrary(Core.NATIVE_LIBRARY_NAME);new DiscreteFourierTransformRun().run(args);}
}
Python
from __future__ import print_function
import sys
import cv2 as cv
import numpy as np
def print_help():print('''This program demonstrated the use of the discrete Fourier transform (DFT).The dft of an image is taken and it's power spectrum is displayed.Usage:discrete_fourier_transform.py [image_name -- default lena.jpg]''')
def main(argv): print_help() filename = argv[0] if len(argv) > 0 else 'lena.jpg' I = cv.imread(cv.samples.findFile(filename), cv.IMREAD_GRAYSCALE)if I is None:print('Error opening image')return -1 rows, cols = I.shapem = cv.getOptimalDFTSize( rows )n = cv.getOptimalDFTSize( cols )padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0]) planes = [np.float32(padded), np.zeros(padded.shape, np.float32)]complexI = cv.merge(planes) # Add to the expanded another plane with zeros cv.dft(complexI, complexI) # this way the result may fit in the source matrix cv.split(complexI, planes) # planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))cv.magnitude(planes[0], planes[1], planes[0])# planes[0] = magnitudemagI = planes[0] matOfOnes = np.ones(magI.shape, dtype=magI.dtype)cv.add(matOfOnes, magI, magI) # switch to logarithmic scalecv.log(magI, magI) magI_rows, magI_cols = magI.shape# crop the spectrum, if it has an odd number of rows or columnsmagI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]cx = int(magI_rows/2)cy = int(magI_cols/2) q0 = magI[0:cx, 0:cy] # Top-Left - Create a ROI per quadrantq1 = magI[cx:cx+cx, 0:cy] # Top-Rightq2 = magI[0:cx, cy:cy+cy] # Bottom-Leftq3 = magI[cx:cx+cx, cy:cy+cy] # Bottom-Right tmp = np.copy(q0) # swap quadrants (Top-Left with Bottom-Right)magI[0:cx, 0:cy] = q3magI[cx:cx + cx, cy:cy + cy] = tmp tmp = np.copy(q1) # swap quadrant (Top-Right with Bottom-Left)magI[cx:cx + cx, 0:cy] = q2magI[0:cx, cy:cy + cy] = tmp cv.normalize(magI, magI, 0, 1, cv.NORM_MINMAX) # Transform the matrix with float values into a cv.imshow("Input Image" , I ) # Show the resultcv.imshow("spectrum magnitude", magI)cv.waitKey()
if __name__ == "__main__":main(sys.argv[1:])
解释:
傅里叶变换会将图像分解为正弦和余弦分量。换句话说,它将图像从其空间域转换为其频域。这个想法是,任何函数都可以精确地近似于无限正弦和余弦函数的总和。傅里叶变换是一种如何做到这一点的方法。从数学上讲,二维图像傅里叶变换是:
这里 f 是其空间域中的图像值,F 是其频域中的图像值。变换的结果是复数。可以通过真实图像和复数图像或通过幅度和相位图像来显示这一点。然而,在整个图像处理算法中,只有幅度图像是有趣的,因为它包含了我们需要的有关图像几何结构的所有信息。但是,如果您打算以这些形式对图像进行一些修改,然后需要重新转换它,则需要保留这两种形式。
在此示例中,我将演示如何计算和显示傅里叶变换的幅度图像。在数字图像的情况下是离散的。这意味着它们可能会从给定的域值中获取一个值。例如,在基本灰度中,图像值通常介于 0 到 255 之间。因此,傅里叶变换也需要是离散类型的,从而产生离散傅里叶变换 (DFT)。每当您需要从几何角度确定图像的结构时,您都需要使用它。以下是要遵循的步骤(如果是灰度输入图像 I):
将图像扩展到最佳大小
DFT 的性能取决于图像大小。对于数字 2、3 和 5 的倍数的图像大小,它往往是最快的。因此,为了实现最佳性能,通常最好将边框值填充到图像上,以获得具有此类特征的大小。getOptimalDFTSize()返回这个最佳大小,我们可以使用 copyMakeBorder() 函数来扩展图像的边框(附加的像素以零初始化):
c++:
Mat padded; //expand input image to optimal sizeint m = getOptimalDFTSize( I.rows );int n = getOptimalDFTSize( I.cols ); // on the border add zero valuescopyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0));
Java:
Mat padded = new Mat(); //expand input image to optimal sizeint m = Core.getOptimalDFTSize( I.rows() );int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero valuesCore.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));
python:
rows, cols = I.shapem = cv.getOptimalDFTSize( rows )n = cv.getOptimalDFTSize( cols )padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
为复杂价值和真实价值腾出空间
傅里叶变换的结果很复杂。这意味着对于每个图像值,结果是两个图像值(每个组件一个)。此外,频域范围比空间对应物大得多。因此,我们通常至少以浮点格式存储这些内容。因此,我们将输入图像转换为此类型,并使用另一个通道对其进行扩展以保存复数值:
c++:
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};Mat complexI;merge(planes, 2, complexI); // Add to the expanded another plane with zeros
Java:
Mat padded = new Mat(); //expand input image to optimal sizeint m = Core.getOptimalDFTSize( I.rows() );int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero valuesCore.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));
Python:
rows, cols = I.shapem = cv.getOptimalDFTSize( rows )n = cv.getOptimalDFTSize( cols )padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
进行离散傅里叶变换
可以就地计算(输入与输出相同):
C++:
dft(complexI, complexI); // this way the result may fit in the source matrix
Java:
Core.dft(complexI, complexI); // this way the result may fit in the source matrix
Python:
cv.dft(complexI, complexI) # this way the result may fit in the source matrix
将实数值和复数值转换为量级
复数有一个实数(Re)和一个复数(虚数-Im)部分。DFT 的结果是复数。DFT 的量级为:
翻译成 OpenCV 代码:
C++:
split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitudeMat magI = planes[0];
Java:
Core.split(complexI, planes); // planes.get(0) = Re(DFT(I)// planes.get(1) = Im(DFT(I))Core.magnitude(planes.get(0), planes.get(1), planes.get(0));// planes.get(0) = magnitudeMat magI = planes.get(0);
Python:
cv.split(complexI, planes) # planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))cv.magnitude(planes[0], planes[1], planes[0])# planes[0] = magnitudemagI = planes[0]
切换到对数刻度
事实证明,傅里叶系数的动态范围太大,无法显示在屏幕上。我们有一些小的和一些高的变化值,我们无法像这样观察到。因此,高值将全部变成白点,而小值则变成黑点。为了使用灰度值进行可视化,我们可以将线性刻度转换为对数刻度:
翻译成 OpenCV 代码:
C++:
magI += Scalar::all(1); // switch to logarithmic scalelog(magI, magI);
Java:
Mat matOfOnes = Mat.ones(magI.size(), magI.type());Core.add(matOfOnes, magI, magI); // switch to logarithmic scaleCore.log(magI, magI);
Python :
matOfOnes = np.ones(magI.shape, dtype=magI.dtype)cv.add(matOfOnes, magI, magI) # switch to logarithmic scalecv.log(magI, magI)
裁剪和重新排列
还记得,在第一步,我们扩展了图像吗?好吧,是时候扔掉新引入的价值观了。出于可视化目的,我们还可以重新排列结果的象限,使原点(零,零)与图像中心相对应。
C++:
// crop the spectrum, if it has an odd number of rows or columnsmagI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2)); // rearrange the quadrants of Fourier image so that the origin is at the image centerint cx = magI.cols/2;int cy = magI.rows/2; Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrantMat q1(magI, Rect(cx, 0, cx, cy)); // Top-RightMat q2(magI, Rect(0, cy, cx, cy)); // Bottom-LeftMat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right Mat tmp; // swap quadrants (Top-Left with Bottom-Right)q0.copyTo(tmp);q3.copyTo(q0);tmp.copyTo(q3); q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)q2.copyTo(q1);tmp.copyTo(q2);
Java:
// crop the spectrum, if it has an odd number of rows or columnsmagI = magI.submat(new Rect(0, 0, magI.cols() & -2, magI.rows() & -2));// rearrange the quadrants of Fourier image so that the origin is at the image centerint cx = magI.cols()/2;int cy = magI.rows()/2; Mat q0 = new Mat(magI, new Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrantMat q1 = new Mat(magI, new Rect(cx, 0, cx, cy)); // Top-RightMat q2 = new Mat(magI, new Rect(0, cy, cx, cy)); // Bottom-LeftMat q3 = new Mat(magI, new Rect(cx, cy, cx, cy)); // Bottom-Right Mat tmp = new Mat(); // swap quadrants (Top-Left with Bottom-Right)q0.copyTo(tmp);q3.copyTo(q0);tmp.copyTo(q3); q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)q2.copyTo(q1);tmp.copyTo(q2);
Python:
magI_rows, magI_cols = magI.shape# crop the spectrum, if it has an odd number of rows or columnsmagI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]cx = int(magI_rows/2)cy = int(magI_cols/2) q0 = magI[0:cx, 0:cy] # Top-Left - Create a ROI per quadrantq1 = magI[cx:cx+cx, 0:cy] # Top-Rightq2 = magI[0:cx, cy:cy+cy] # Bottom-Leftq3 = magI[cx:cx+cx, cy:cy+cy] # Bottom-Right tmp = np.copy(q0) # swap quadrants (Top-Left with Bottom-Right)magI[0:cx, 0:cy] = q3magI[cx:cx + cx, cy:cy + cy] = tmp tmp = np.copy(q1) # swap quadrant (Top-Right with Bottom-Left)magI[cx:cx + cx, 0:cy] = q2magI[0:cx, cy:cy + cy] = tmp
正常化
出于可视化目的,再次执行此操作。我们现在有了星等,但这仍然超出了我们的图像显示范围 0 到 1。我们使用 cv::normalize() 函数将我们的值规范化到这个范围。
C++:
normalize(magI, magI, 0, 1, NORM_MINMAX); // Transform the matrix with float values into a// viewable image form (float between values 0 and 1).
Java:
Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1); // Transform the matrix with float values// into a viewable image form (float between// values 0 and 255).
python:
cv.normalize(magI, magI, 0, 1, cv.NORM_MINMAX) # Transform the matrix with float values into a
结果
一个应用思路是确定图像中存在的几何方向。例如,让我们找出文本是否是水平的?看一些文本,你会注意到文本线条也形成了水平线,字母形成了垂直线条。在傅里叶变换的情况下,也可以看到文本片段的这两个主要组成部分。让我们使用这个水平和这个旋转的图像来描述一个文本。
如果是横向文本:
如果文本旋转:
您可以看到,频域中最有影响力的分量(幅度图像上最亮的点)遵循图像上物体的几何旋转。由此我们可以计算偏移量并执行图像旋转以校正最终的未对齐。
参考文献:
1、《Discrete Fourier Transform》-----Bernát Gábor