easyExcel大数据量导出
记录一次 easyExcel 大数据量导出优化
需求前景
需要导出一份系统操作日志,目前数据量在 60W+,最开始导出要几分钟,于是便更换为 easyExcel 并进行了优化.
代码示例
TIP
大致思路:
将要导出的数据预先存入缓存,缓存没有的情况下使用 mybatis 的流式查询查出数据;
然后将数据写入多个 sheet,并且每个 sheet 进行多次写入;
适用于:数据量大,导致一个 sheet 存储不下;同时单次读入的数据量太大。
Controller接口
/**
* 导出系统日志
*
* @param type 类型
* @param psw psw
* @param request 请求
* @return {@link AjaxResult}
* @throws Exception 异常
*/
public AjaxResult downLoadLogFile(@PathVariable("type") Integer type, String psw, HttpServletRequest request) throws Exception
{
boolean flag = TokenTools.checkTokenIsEqual(request, "UserToken", "user_SToken");
if (!flag)
{
throw new RRException("token失效");
}
List<SysLogModel> date;
if (ObjectUtil.isNotNull(SystemLoggerTimeCache.getSystemLoggerList(SystemLoggerTimeCache.KEY)))
{
// 缓存有则从缓存取数据
date = SystemLoggerTimeCache.getSystemLoggerList(SystemLoggerTimeCache.KEY);
}else
{
// 缓存没有则进行流式查询
date = logService.getAllLoggerList(type);
}
String fileName = IdUtil.simpleUUID()+"审计日志.xlsx";
String filePath = properties.getServerResultPath()+fileName;
BigExcelFileInit.bigExcelFileInit(date,psw,fileName,filePath,SysLogModel.class);
return AjaxResult.success("日志文件生成完成!",fileName);
}
excel大数据量导出工具类
/**
* @version 1.0.0
* @className: BigExcelFileInit
* @description: excel大数据量导出工具类
* @author: LiJunYi
* @create: 2022/7/6 8:45
*/
public class BigExcelFileInit
{
/**
* xlsx excel 每个 sheet 写入的数据
*/
private static final int NUM_PER_SHEET = 300000;
/**
* xlsx excel 每次向 sheet 中写入的数据(分页写入)
*/
private static final int NUM_BY_TIMES = 50000;
/**
* excel通用大数据量分sheet分次写入
*
* @param passWord 密码
* @param fileName 文件名字
* @param data 数据
* @param filePath 文件路径
* @param elementType 元素类型
* @throws Exception 异常
*/
public static void bigExcelFileInit(List<?> data, String passWord, String fileName, String filePath, Class<?> elementType) throws Exception
{
// 获取 sheet 的个数
int sheetNum = data.size() % NUM_PER_SHEET == 0 ? data.size() / NUM_PER_SHEET : data.size() / NUM_PER_SHEET + 1;
// 获取每个sheet 写入的次数
int writeNumPerSheet = NUM_PER_SHEET / NUM_BY_TIMES;
// 最后一个 sheet 写入的数量
int writeNumLastSheet = data.size() - (sheetNum - 1) * NUM_PER_SHEET;
// 最后一个 sheet 写入的次数
int writeNumPerLastSheet = writeNumLastSheet % NUM_BY_TIMES == 0 ? writeNumLastSheet / NUM_BY_TIMES : writeNumLastSheet / NUM_BY_TIMES + 1;
// 指定写入的文件
try(ExcelWriter excelWriter = EasyExcel.write(filePath, elementType).build())
{
for (int i = 0; i < sheetNum; i++)
{
String sheetName = "sheet" + i;
WriteSheet writeSheet = EasyExcel.writerSheet(i, sheetName).build();
// 每个sheet 写入的次数
int writeNum = i == sheetNum - 1 ? writeNumPerLastSheet : writeNumPerSheet;
// 每个sheet 最后一次写入的最后行数
int endEndNum = i == sheetNum - 1 ? data.size() : (i + 1) * NUM_PER_SHEET;
for (int j = 0; j < writeNum; j++)
{
int startNum = i * NUM_PER_SHEET + j * NUM_BY_TIMES;
int endNum = j == writeNum - 1 ? endEndNum : i * NUM_PER_SHEET + (j + 1) * NUM_BY_TIMES;
excelWriter.write(data.subList(startNum, endNum), writeSheet);
}
}
}
if (StrUtil.isNotEmpty(passWord))
{
// 设置excel打开密码
FileReadonlyProtectionUtil.enforceEncryptProtectionExcel(filePath, passWord);
}
}
}
日志处理类
这里提供了两种方式
- 通过
多线程
+分批
获取所有日志记录 - mybatis流式查询
/**
* @version 1.0.0
* @className: SystemBigLoggerHandle
* @description: 系统大数据日志处理类
* @author: LiJunYi
* @create: 2022/7/6 9:53
*/
@Component
@Slf4j
public class SystemBigLoggerHandle
{
private static SysLogDao logDao;
@Autowired
public void setLogDao(SysLogDao logDao)
{
SystemBigLoggerHandle.logDao = logDao;
}
/**
* 通过线程池+分段Mysql获取所有日志记录
*
* @param limit 限制
* @return {@link List}<{@link SysLogModel}>
*/
public static List<SysLogModel> getAllLoggerList(int limit,Integer type)
{
// 获取对应日志类型的总个数
Integer count = logDao.getCountByType(type);
// 线程池
ThreadPoolTaskExecutor poolTaskExecutor = SpringUtil.getBean("commonTaskSchedulingThreadPool");
List<FutureTask<List<SysLogModel>>> resultList = new ArrayList<>();
//分段次数
long cycles = count / limit;
for (long i = 0; i < cycles; i++)
{
//每一段的起始坐标
long offset = i * limit;
log.info("异步查询系统日志,偏移量offset: {}", offset);
//具体的查询任务
FutureTask<List<SysLogModel>> futureTask = new FutureTask<>(() -> logDao.queryAllByLimit(offset,limit,type));
poolTaskExecutor.execute(futureTask);
resultList.add(futureTask);
}
List<SysLogModel> result = new ArrayList<>();
Iterator<FutureTask<List<SysLogModel>>> iterator = resultList.iterator();
while (iterator.hasNext()) {
try {
result.addAll(iterator.next().get());
} catch (InterruptedException | ExecutionException e)
{
log.error("【getAllLoggerList#SysLogServiceImpl()】多线程查询出现异常:{}" ,e.getMessage());
throw new RuntimeException(e);
}
//获取一个就删除一个任务
iterator.remove();
}
//最后一次数据可能不为整,需要额外操作
if (result.size() != count)
{
result.addAll(logDao.queryAllByLimit(result.size() ,Math.toIntExact(count),type));
}
return result;
}
/**
* 获取所有日志记录通过mybatis光标
* 这里还可以参考上面方法修改为分段查询
* @param type 类型
* @param count 总数
* @param pageNum 页面顺序值
* @param pageSize 页面大小
* @return {@link List}<{@link SysLogModel}>
*/
public static List<SysLogModel> getAllLoggerListUseMybatisCursor(int type,Integer count,Integer pageNum,Integer pageSize)
{
//分页偏移量
Integer offset = (pageNum - 1) * pageSize;
List<SysLogModel> resultList = new ArrayList<>(count);
// 流式查询
try(Cursor<SysLogModel> cursor = logDao.queryByCursor(type,offset,pageSize))
{
cursor.forEach(resultList::add);
} catch (IOException e) {
throw new RuntimeException(e);
}
// 将结果存入缓存
SystemLoggerTimeCache.putSystemLoggerList(SystemLoggerTimeCache.KEY,resultList);
return resultList;
}
}
Mapper接口
public interface SysLogDao extends BaseMapper<SysLogModel>
{
/**
* 通过流式方法查询所有数据
*
* @param type 类型
* @param offset 分页偏移量
* @param pageSize 页面大小
* @return {@link Cursor}<{@link SysLogModel}>
*/
Cursor<SysLogModel> queryByCursor(@Param("type") Integer type, @Param("offset") Integer offset, @Param("pageSize") Integer pageSize);
}
xml-SQL
- 这里注意
fetchSize
的值的设置,我这里设置为Integer
的最大值
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.xsw.module.procurement.log.dao.SysLogDao">
<!--通过流式方法查询所有数据-->
<select id="queryByCursor" resultType="com.xsw.module.procurement.log.model.SysLogModel" fetchSize="-2147483648">
select ID, OPER_TYPE, OPER_TIME, OPER_EVENT, OPER_RESULT, OPER_OBJECT, USER_KEY, OPER_NAME, SSR, ERRORMSG, CLIENT_IP
from T_SYS_LOG
where OPER_TYPE = #{type}
and ID <=
(
SELECT ID FROM T_SYS_LOG WHERE OPER_TYPE = #{type} order by ID desc LIMIT #{offset},1
)
order by id desc
limit #{pageSize}
</select>
</mapper>
测试
因为线上还未升级,所以就本地构造了下 100W 的数据进行导出,耗时时间如下:
12:42:03.346[nio-8080-exec-9]: 写入sheet=sheet0,数据量=50000-0=50000,耗时=1636
12:42:04.567[nio-8080-exec-9]: 写入sheet=sheet0,数据量=100000-50000=50000,耗时=1221
12:42:05.773[nio-8080-exec-9]: 写入sheet=sheet0,数据量=150000-100000=50000,耗时=1206
12:42:07.057[nio-8080-exec-9]: 写入sheet=sheet0,数据量=200000-150000=50000,耗时=1284
12:42:08.297[nio-8080-exec-9]: 写入sheet=sheet0,数据量=250000-200000=50000,耗时=1240
12:42:09.461[nio-8080-exec-9]: 写入sheet=sheet0,数据量=300000-250000=50000,耗时=1164
12:42:10.630[nio-8080-exec-9]: 写入sheet=sheet1,数据量=350000-300000=50000,耗时=1169
12:42:11.862[nio-8080-exec-9]: 写入sheet=sheet1,数据量=400000-350000=50000,耗时=1232
12:42:13.031[nio-8080-exec-9]: 写入sheet=sheet1,数据量=450000-400000=50000,耗时=1169
12:42:14.155[nio-8080-exec-9]: 写入sheet=sheet1,数据量=500000-450000=50000,耗时=1124
12:42:15.315[nio-8080-exec-9]: 写入sheet=sheet1,数据量=550000-500000=50000,耗时=1160
12:42:16.572[nio-8080-exec-9]: 写入sheet=sheet1,数据量=600000-550000=50000,耗时=1257
12:42:17.845[nio-8080-exec-9]: 写入sheet=sheet2,数据量=650000-600000=50000,耗时=1273
12:42:19.133[nio-8080-exec-9]: 写入sheet=sheet2,数据量=700000-650000=50000,耗时=1288
12:42:20.283[nio-8080-exec-9]: 写入sheet=sheet2,数据量=750000-700000=50000,耗时=1150
12:42:21.485[nio-8080-exec-9]: 写入sheet=sheet2,数据量=800000-750000=50000,耗时=1202
12:42:22.690[nio-8080-exec-9]: 写入sheet=sheet2,数据量=850000-800000=50000,耗时=1204
12:42:23.831[nio-8080-exec-9]: 写入sheet=sheet2,数据量=900000-850000=50000,耗时=1141
12:42:25.091[nio-8080-exec-9]: 写入sheet=sheet3,数据量=950000-900000=50000,耗时=1260
12:42:26.444[nio-8080-exec-9]: 写入sheet=sheet3,数据量=1000000-950000=50000,耗时=1353
12:42:33.836[nio-8080-exec-9]: 导出excel结束,总数据量=1000000,耗时=32402ms