引言
在日常的开发工作中,一般是使用文案后缀去判断文件类型,这种不是很严谨,那么这样可通过修改文件名的方式去修改文件类型,如果将一个危险文件修成成png或者txt上传到文件服务器,后果难以估量。那么可以使用文件头魔数的方式去判断文件类型,这是一种比较安全的方式。
魔数介绍(Magic Number)
文件头魔数是指文件格式中用来标识文件类型的一段特定字节序列。它通常位于文件的开头部分,能够帮助操作系统或应用程序识别文件的格式和类型。
特点
- 唯一性:每种文件格式通常都有一个独特的魔数。
- 固定位置:魔数通常位于文件的开头,便于快速检查。
- 二进制格式:魔数通常以二进制形式存储。
文件类型魔数枚举
package com.demo.utils;/*** 文件类型魔数枚举* 使用场景:用于判断文件类型,获取文件类型** @author lyu* @description:* @create 2024-11-29 15:43*/
public enum MagicNumnerFileTypeEnum {/*** JPG*/JPG("ffd8ff", "jpg"),/*** PNG*/PNG("89504e47", "png"),/*** GIF*/GIF_87("4749463837", "gif"),/*** GIF*/GIF_89("4749463839", "gif"),/*** TIF*/TIF("49492a00227105008037", "tif"),/*** Windows bitmap*/BMP_16("424d228c010000000000", "bmp"),/*** Windows bitmap*/BMP_24("424d8240090000000000", "bmp"),/*** Windows bitmap*/BMP_256("424d8e1b030000000000", "bmp"),/*** CAD*/DWG("41433130313500000000", "dwg"),/*** Rich Text Format*/RTF("7b5c727466315c616e73", "rtf"),/*** Adobe photoshop*/PSD("38425053000100000000", "psd"),/*** eml*/EML("46726f6d3a203d3f6762", "eml"),/*** Microsoft Access*/MDB("5374616E64617264204A", "mdb"),/*** Postscript*/PS("252150532D41646F6265", "ps"),/*** Adobe Acrobat*/PDF("255044462d312e", "pdf"),/*** rmvb*/RMVB("2e524d46000000120001", "rmvb"),/*** flv*/FLV("464c5601050000000900", "flv"),/*** MP4*/MP4("00000020667479706", "mp4"),/*** MP4*/MP4_ALT("00000018667479706d70", "mp4"),/*** MP3*/MP3("49443303000000002176", "mp3"),/*** mpg*/MPG("000001ba210001000180", "mpg"),/*** wmv*/WMV("3026b2758e66cf11a6d9", "wmv"),/*** wav*/WAV("52494646e27807005741", "wav"),/*** AVI*/AVI("52494646d07d60074156", "avi"),/*** MID*/MID("4d546864000000060001", "mid"),/*** ARAR Archive*/RAR("526172211a0700cf9073", "rar"),/*** INI*/INI("235468697320636f6e66", "ini"),/*** JAR*/JAR("504B03040a0000000000", "jar"),/*** JAR*/JAR_ALT("504B0304140008000800", "jar"),/*** Microsoft Word/Excel*/XLS("d0cf11e0a1b11ae10", "xls"),/*** ZIP Archive*/ZIP("504B0304", "zip"),/*** windows exe*/EXE("4d5a9000030000000400", "exe"),/*** JSP*/JSP("3c25402070616765206c", "jsp"),/*** MF*/MF("4d616e69666573742d56", "mf"),/*** java*/JAVA("7061636b616765207765", "java"),/*** windows script*/BAT("406563686f206f66660d", "bat"),/*** GZ*/GZ("1f8b0800000000000000", "gz"),/*** java bytecode*/CLASS("cafebabe0000002e0041", "class"),/*** CHM*/CHM("49545346030000006000", "chm"),/*** mxp*/MXP("04000000010000001300", "mxp"),/*** torrent*/TORRENT("6431303a637265617465", "torrent"),/*** mov*/MOV("6D6F6F76", "mov"),/*** Word Perfect*/WPD("FF575043", "wpd"),/*** Outlook Express*/DBX("CFAD12FEC5FD746F", "dbx"),/*** Outlook*/PST("2142444E", "pst"),/****/QDF("AC9EBD8F", "qdf"),/*** windows password*/PWL("E3828596", "pwl"),/*** Real Audio*/RAM("2E7261FD", "ram");private final String key;private final String value;MagicNumnerFileTypeEnum(String key, String value) {this.key = key;this.value = value;}public String getValue() {return value;}public String getKey() {return key;}}
文件类型判断工具类
package com.demo.utils;import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;/*** @author lyu* @version: 1.0* @create 2024-11-29 16:24*/
public class FileTypeUtils {private static final String EMPTY = "";private static final String DOT = ".";private static final int OFFSET = 28;private static final int INDEX_NOT_FOUND = -1;/*** 类Unix路径分隔符*/public static final char UNIX_SEPARATOR = '/';;/*** Windows路径分隔符*/public static final char WINDOWS_SEPARATOR = '\\';private FileTypeUtils() {throw new UnsupportedOperationException("Disabled use refection to create instance!!");}/*** 获取文件头** @param inputStream 输入流* @return 16 进制的文件投信息* @throws IOException io异常*/private static String getFileHeader(InputStream inputStream) throws IOException {byte[] b = new byte[28];inputStream.read(b, 0, 28);return bytes2hex(b);}/*** 将字节数组转换成16进制字符串** @param src 文件字节数组* @return 16进制字符串*/private static String bytes2hex(byte[] src) {StringBuilder stringBuilder = new StringBuilder();if (src == null || src.length == 0) {return null;}for (byte b : src) {int v = b & 0xFF;String hv = Integer.toHexString(v);if (hv.length() < 2) {stringBuilder.append(0);}stringBuilder.append(hv);}return stringBuilder.toString().toUpperCase();}private static byte[] getFileTypeByte(byte[] bytes) {byte[] fileTypeByte = new byte[OFFSET];int offset = Math.min(bytes.length, OFFSET);System.arraycopy(bytes, 0, fileTypeByte, 0, offset);return fileTypeByte;}public static String getFileType(byte[] bytes, String filename) {byte[] fileTypeByte = getFileTypeByte(bytes);String fileHeader = bytes2hex(fileTypeByte);String typeName = getType(fileHeader);if (null == typeName) {return extName(filename);}if ("zip".equalsIgnoreCase(typeName)) {// zip可能为docx、xlsx、pptx、jar、war等格式,扩展名辅助判断final String extName = extName(typeName);if ("docx".equalsIgnoreCase(extName)) {typeName = "docx";} else if ("xlsx".equalsIgnoreCase(extName)) {typeName = "xlsx";} else if ("pptx".equalsIgnoreCase(extName)) {typeName = "pptx";} else if ("jar".equalsIgnoreCase(extName)) {typeName = "jar";} else if ("war".equalsIgnoreCase(extName)) {typeName = "war";}}return typeName;}private static String extName(String filename) {if (filename == null) {return null;}int index = filename.lastIndexOf(DOT);if (index == -1) {return EMPTY;} else {String ext = filename.substring(index + 1);// 扩展名中不能包含路径相关的符号return containsAny(ext, UNIX_SEPARATOR, WINDOWS_SEPARATOR) ? EMPTY : ext;}}public static boolean containsAny(CharSequence str, char... testChars) {if (!isEmpty(str)) {int len = str.length();for (int i = 0; i < len; i++) {if (contains(testChars, str.charAt(i)) > INDEX_NOT_FOUND) {return true;}}}return false;}private static int contains(char[] array, char value) {if (null != array) {for (int i = 0; i < array.length; i++) {if (value == array[i]) {return i;}}}return INDEX_NOT_FOUND;}private static boolean isEmpty(CharSequence str) {return str == null || str.length() == 0;}private static String getType(String fileHeader) {for (MagicNumnerFileTypeEnum fileTypeEnum : MagicNumnerFileTypeEnum.values()) {if (fileHeader.startsWith(fileTypeEnum.getValue())) {return fileTypeEnum.getKey();}}return null;}/*** 判断指定输入流是否是指定文件格式** @param inputStream 文件流* @param fileTypeEnum 文件格式枚举* @return 结果* @throws IOException io异常*/public static boolean isFileType(InputStream inputStream, MagicNumnerFileTypeEnum fileTypeEnum) throws IOException {if (null == inputStream) {return false;}String fileHeader = getFileHeader(inputStream);return fileHeader.startsWith(fileTypeEnum.getValue());}public static void main(String[] args) throws IOException {String str = "E:\\a.docx";FileInputStream inputStream = new FileInputStream(str);byte[] b = new byte[inputStream.available()];inputStream.read(b, 0, inputStream.available());System.out.println(getFileType(b, str));}}