一.背景
供应商系统下载的csv文件不支持域控(主要是第三方wps服务不能对csv文件加密,但是可以对office系列产品进行权限访问的加密控制)。因此思路就改为现将csv文件转为excel文件,然后对excel文件进行加域控制。本文主要介绍如何将csv文件转为excel文件。
二.要求
- Csv文件可能比较大,达到40-60M,需要控制内存使用率;
- 考虑接口的并发,需要进行接口的限流
-
三.方案
- 采用alibaba的easyexcel,降低内存占用率,根据压测结果,设置合理的接口限流参数(限流
- 本文不再介绍,可以使用java注解+redis+lua, 或者nginx限流等)
-
四.代码
-
CsvController
-
package com.xxx.xxx.controller;import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future;import javax.annotation.Resource; import javax.servlet.http.HttpServletResponse;import com.xxx.xxx.common.utils.EasyExcelUtil; import com.xxx.xxx.common.utils.ObjectUtil; import com.xxx.xxx.service.ExcelAnalysisService;import lombok.extern.slf4j.Slf4j; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile;/*** description:** @author: lgq* @create: 2024-04-16 11:06*/ @Slf4j @RestController @RequestMapping("/csv") public class CsvController {@Resourceprivate ExcelAnalysisService excelAnalysisService;/*** 读取传入的csv 文本的内容可以存入数据库** @param file* @return*/@PostMapping("/uploadCsvAndImportExcel")public void uploadCsvAndImportExcel(@RequestParam("file") MultipartFile file, HttpServletResponse response) {String[] splitName = file.getOriginalFilename().split(".csv");if (ObjectUtil.isEmpty(splitName) || ObjectUtil.isEmpty(splitName[0])) {return;}EasyExcelUtil.setResponseParam(response, splitName[0]);long startTime = System.currentTimeMillis();log.info("导出开始时间:{}", startTime);try {// 输出流可以为本地文件 // OutputStream outputStream = new FileOutputStream("D:\\templateExcel\\filename.xlsx");OutputStream outputStream = response.getOutputStream();InputStream inputStream = file.getInputStream();Future<String> future = excelAnalysisService.csv2Excel(inputStream, outputStream);future.get();} catch (IOException ioException) {log.error("csv转为excel出错!", ioException.getMessage());ioException.printStackTrace();} catch (InterruptedException interruptedException) {log.error("csv转为excel出错!", interruptedException.getMessage());interruptedException.printStackTrace();} catch (ExecutionException executionException) {log.error("csv转为excel出错!", executionException.getMessage());executionException.printStackTrace();}// 导出时间结束long endTime = System.currentTimeMillis();log.info("导出结束时间:{}", endTime + "ms");log.info("导出所用时间:{}", (endTime - startTime) / 1000 + "秒");}}
EasyExcelGeneralCsvListener
-
package com.xxx.xxx.listener;import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map;import com.alibaba.excel.ExcelWriter; import com.alibaba.excel.context.AnalysisContext; import com.alibaba.excel.event.AnalysisEventListener; import com.alibaba.excel.write.metadata.WriteSheet; import com.xxx.xxx.constants.ExcelConstants;/*** description:** @author: lgq* @create: 2024-04-16 11:25*/ public class EasyExcelGeneralCsvListener extends AnalysisEventListener<Map<Integer, String>> {/*** 用于存储读取的数据*/private List<Map<Integer, String>> dataList = new ArrayList<>();private ExcelWriter excelWriter;private WriteSheet writeSheet;public EasyExcelGeneralCsvListener() {}public EasyExcelGeneralCsvListener(ExcelWriter excelWriter, WriteSheet writeSheet) {this.excelWriter = excelWriter;this.writeSheet = writeSheet;}@Overridepublic void invoke(Map<Integer, String> data, AnalysisContext context) {// 数据add进入集合dataList.add(data);// size是否为2000条:这里其实就是分批.当数据等于2k的时候执行一次写入excelif (dataList.size() >= ExcelConstants.PER_WRITE_EXCEL_ROW_COUNT) {save2Excel();// 清理集合便于GC回收dataList.clear();}}@Overridepublic void invokeHeadMap(Map<Integer, String> headers, AnalysisContext context) {List<List<String>> titles = new ArrayList<>();for (int i = 0; i < headers.size(); i++) {titles.add(Collections.singletonList(headers.get(i)));}this.writeSheet.setHead(titles);}/*** 保存数据到 excel*/private void save2Excel() {if (dataList.size() > 0) {List<List<String>> consumerDataList = new ArrayList<>();dataList.stream().forEach( e ->{List<String> objects = new ArrayList<>();for (int i = 0; i < e.size(); i++) {objects.add(e.get(i));}consumerDataList.add(objects);});this.excelWriter.write(consumerDataList, writeSheet);}}/*** Excel 中所有数据解析完毕会调用此方法*/@Overridepublic void doAfterAllAnalysed(AnalysisContext context) {save2Excel();dataList.clear();}}
VisiableThreadPoolTaskExecutor
-
package com.xxx.xxx.task;import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor;import lombok.extern.slf4j.Slf4j; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.util.concurrent.ListenableFuture;/*** description:VisiableThreadPoolTaskExecutor** @author: lgq* @create: 2024-04-17 10:52*/ @Slf4j public class VisiableThreadPoolTaskExecutor extends ThreadPoolTaskExecutor {private void showThreadPoolInfo(String prefix){ThreadPoolExecutor threadPoolExecutor = getThreadPoolExecutor();if(null==threadPoolExecutor){return;}log.info("{}, {},taskCount [{}], completedTaskCount [{}], activeCount [{}], queueSize [{}]",this.getThreadNamePrefix(),prefix,threadPoolExecutor.getTaskCount(),threadPoolExecutor.getCompletedTaskCount(),threadPoolExecutor.getActiveCount(),threadPoolExecutor.getQueue().size());}@Overridepublic void execute(Runnable task) {showThreadPoolInfo("1. do execute");super.execute(task);}@Overridepublic void execute(Runnable task, long startTimeout) {showThreadPoolInfo("2. do execute");super.execute(task, startTimeout);}@Overridepublic Future<?> submit(Runnable task) {showThreadPoolInfo("1. do submit");return super.submit(task);}@Overridepublic <T> Future<T> submit(Callable<T> task) {showThreadPoolInfo("2. do submit");return super.submit(task);}@Overridepublic ListenableFuture<?> submitListenable(Runnable task) {showThreadPoolInfo("1. do submitListenable");return super.submitListenable(task);}@Overridepublic <T> ListenableFuture<T> submitListenable(Callable<T> task) {showThreadPoolInfo("2. do submitListenable");return super.submitListenable(task);} }
ExcelAnalysisService
-
package com.xxx.xxx.service;import java.io.OutputStream; import java.io.InputStream; import java.util.concurrent.Future;/*** description:excel文档分析处理类** @author: lgq* @create: 2024-04-17 11:42*/ public interface ExcelAnalysisService {/*** csv文档转为excel文档*/Future<String> csv2Excel(InputStream inputStream, OutputStream outputStream); }
ExcelAnalysisServiceImpl
-
package com.xxx.xxx.service.impl;import java.io.OutputStream; import java.nio.charset.Charset;import com.alibaba.excel.EasyExcel; import com.alibaba.excel.ExcelWriter; import com.alibaba.excel.support.ExcelTypeEnum; import com.alibaba.excel.write.metadata.WriteSheet; import com.xxx.xxx.listener.EasyExcelGeneralCsvListener; import com.xxx.xxx.service.ExcelAnalysisService;import lombok.extern.slf4j.Slf4j; import java.io.InputStream; import java.util.concurrent.Future;import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.AsyncResult; import org.springframework.stereotype.Service;/*** description:ExcelAnalysisService实现类** @author: lgq* @create: 2024-04-17 14:53*/ @Service @Slf4j public class ExcelAnalysisServiceImpl implements ExcelAnalysisService {@Async("asyncExcelAnalysisServiceExecutor")@Overridepublic Future<String> csv2Excel(InputStream inputStream, OutputStream outputStream) {try {ExcelWriter writer = EasyExcel.write(outputStream).excelType(ExcelTypeEnum.XLSX).build();EasyExcel.read(inputStream, new EasyExcelGeneralCsvListener(writer, new WriteSheet())).excelType(ExcelTypeEnum.CSV).charset(Charset.forName("UTF-8")).sheet().doRead();writer.finish();outputStream.flush();} catch (Exception e) {log.error("csv转为excel出错!", e.getMessage());e.printStackTrace();} finally {if (outputStream != null) {try {outputStream.close();} catch (Exception e) {log.error("outputStream.close() -> csv转为excel出错!", e.getMessage());e.printStackTrace();}}if (inputStream != null) {try {inputStream.close();} catch (Exception e) {log.error("inputStream.close() -> csv转为excel出错!", e.getMessage());e.printStackTrace();}}}return new AsyncResult<>("task complete!");} }
ExecutorConfig
-
package com.xxx.xxx.config;import java.util.concurrent.Executor; import java.util.concurrent.ThreadPoolExecutor;import com.xxx.xxx.task.VisiableThreadPoolTaskExecutor;import lombok.extern.slf4j.Slf4j; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;/*** description:线程池配置类** @author: lgq* @create: 2024-04-17 10:28*/ @Configuration @Slf4j @EnableAsync public class ExecutorConfig {private static int corePoolSize = Runtime.getRuntime().availableProcessors() + 1;private static int maxPoolSize = Runtime.getRuntime().availableProcessors() + 1;private static int queueCapacity = 100;private static final String namePrefix = "ExcelAnalysis";@Bean(name = "asyncExcelAnalysisServiceExecutor")public Executor asyncExcelServiceExecutor() {log.info("start asyncExcelAnalysisServiceExecutor----------------");//ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();//使用可视化运行状态的线程池ThreadPoolTaskExecutor executor = new VisiableThreadPoolTaskExecutor();//配置核心线程数executor.setCorePoolSize(corePoolSize);//配置最大线程数executor.setMaxPoolSize(maxPoolSize);//配置队列大小executor.setQueueCapacity(queueCapacity);//配置线程池中的线程的名称前缀executor.setThreadNamePrefix(namePrefix);// rejection-policy:当pool已经达到max size的时候,如何处理新任务// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());//执行初始化executor.initialize();log.info("end asyncExcelAnalysisServiceExecutor------------");return executor;}}
ExcelConstants
-
package com.xxx.xxx.constants;/*** description:线程池配置类** @author: lgq* @create: 2024-04-17 10:28*/ public class ExcelConstants {public static final Integer PER_SHEET_ROW_COUNT = 100*10000;public static final Integer PER_WRITE_ROW_COUNT = 20*10000;public static final Integer PER_WRITE_EXCEL_ROW_COUNT = 2 * 1000;public static final Integer GENERAL_ONCE_SAVE_TO_DB_ROWS_JDBC = 10*10000;public static final Integer GENERAL_ONCE_SAVE_TO_DB_ROWS_MYBATIS = 5*10000; }
配置文件
-
spring:servlet:multipart:enabled: truemax-file-size: 100MB # 单个文件的最大值max-request-size: 100MB # 上传文件总的最大值
pom依赖
-
<dependency><groupId>com.alibaba</groupId><artifactId>easyexcel</artifactId><version>3.3.2</version></dependency>
-
五.压测
- jvm参数(本地电脑,性能较差)
- -Xms2g -Xmx2g
- 导出日志
性能监控
压测结果