easyExcel大数据量导出

7/5/2022 excel

# 记录一次 easyExcel 大数据量导出优化

# 需求前景

需要导出一份系统操作日志,目前数据量在 60W+,最开始导出要几分钟,于是便更换为 easyExcel 并进行了优化.

# 代码示例

大致思路:

将要导出的数据预先存入缓存,缓存没有的情况下使用 mybatis 的流式查询查出数据;

然后将数据写入多个 sheet,并且每个 sheet 进行多次写入;

适用于:数据量大,导致一个 sheet 存储不下;同时单次读入的数据量太大。

# Controller接口

/**
* 导出系统日志
*
* @param type    类型
* @param psw     psw
* @param request 请求
* @return {@link AjaxResult}
* @throws Exception 异常
*/
public AjaxResult downLoadLogFile(@PathVariable("type") Integer type, String psw, HttpServletRequest request) throws Exception
{
    boolean flag = TokenTools.checkTokenIsEqual(request, "UserToken", "user_SToken");
    if (!flag)
    {
        throw new RRException("token失效");
    }
    List<SysLogModel> date;
    if (ObjectUtil.isNotNull(SystemLoggerTimeCache.getSystemLoggerList(SystemLoggerTimeCache.KEY)))
    {
        // 缓存有则从缓存取数据
        date = SystemLoggerTimeCache.getSystemLoggerList(SystemLoggerTimeCache.KEY);
    }else
    {
        // 缓存没有则进行流式查询
        date = logService.getAllLoggerList(type);
    }
    String fileName = IdUtil.simpleUUID()+"审计日志.xlsx";
    String filePath = properties.getServerResultPath()+fileName;
    BigExcelFileInit.bigExcelFileInit(date,psw,fileName,filePath,SysLogModel.class);
    return AjaxResult.success("日志文件生成完成!",fileName);
    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

# excel大数据量导出工具类

/**
 * @version 1.0.0
 * @className: BigExcelFileInit
 * @description: excel大数据量导出工具类
 * @author: LiJunYi
 * @create: 2022/7/6 8:45
 */
public class BigExcelFileInit
{
    /**
     * xlsx excel 每个 sheet 写入的数据
     */
    private static final int NUM_PER_SHEET = 300000;

    /**
     * xlsx excel 每次向 sheet 中写入的数据(分页写入)
     */
    private static final int NUM_BY_TIMES = 50000;

    /**
     * excel通用大数据量分sheet分次写入
     *
     * @param passWord    密码
     * @param fileName    文件名字
     * @param data        数据
     * @param filePath    文件路径
     * @param elementType 元素类型
     * @throws Exception 异常
     */
    public static void bigExcelFileInit(List<?> data, String passWord, String fileName, String filePath, Class<?> elementType) throws Exception
    {
        // 获取 sheet 的个数
        int sheetNum = data.size() % NUM_PER_SHEET == 0 ? data.size() / NUM_PER_SHEET : data.size() / NUM_PER_SHEET + 1;
        // 获取每个sheet 写入的次数
        int writeNumPerSheet = NUM_PER_SHEET / NUM_BY_TIMES;
        // 最后一个 sheet 写入的数量
        int writeNumLastSheet = data.size() - (sheetNum - 1) * NUM_PER_SHEET;
        // 最后一个 sheet 写入的次数
        int writeNumPerLastSheet = writeNumLastSheet % NUM_BY_TIMES == 0 ? writeNumLastSheet / NUM_BY_TIMES : writeNumLastSheet / NUM_BY_TIMES + 1;
        // 指定写入的文件
        try(ExcelWriter excelWriter = EasyExcel.write(filePath, elementType).build())
        {
            for (int i = 0; i < sheetNum; i++)
            {
                String sheetName = "sheet" + i;
                WriteSheet writeSheet = EasyExcel.writerSheet(i, sheetName).build();
                // 每个sheet 写入的次数
                int writeNum = i == sheetNum - 1 ? writeNumPerLastSheet : writeNumPerSheet;
                // 每个sheet 最后一次写入的最后行数
                int endEndNum = i == sheetNum - 1 ? data.size() : (i + 1) * NUM_PER_SHEET;
                for (int j = 0; j < writeNum; j++)
                {
                    int startNum = i * NUM_PER_SHEET + j * NUM_BY_TIMES;
                    int endNum = j == writeNum - 1 ? endEndNum : i * NUM_PER_SHEET + (j + 1) * NUM_BY_TIMES;
                    excelWriter.write(data.subList(startNum, endNum), writeSheet);
                }
            }
        }
        if (StrUtil.isNotEmpty(passWord))
        {
            // 设置excel打开密码
            FileReadonlyProtectionUtil.enforceEncryptProtectionExcel(filePath, passWord);
        }
    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

# 日志处理类

这里提供了两种方式

  • 通过多线程+分批获取所有日志记录
  • mybatis流式查询
/**
 * @version 1.0.0
 * @className: SystemBigLoggerHandle
 * @description: 系统大数据日志处理类
 * @author: LiJunYi
 * @create: 2022/7/6 9:53
 */
@Component
@Slf4j
public class SystemBigLoggerHandle
{
    private static SysLogDao logDao;

    @Autowired
    public void setLogDao(SysLogDao logDao)
    {
        SystemBigLoggerHandle.logDao = logDao;
    }

    /**
     * 通过线程池+分段Mysql获取所有日志记录
     *
     * @param limit 限制
     * @return {@link List}<{@link SysLogModel}>
     */
    public static List<SysLogModel> getAllLoggerList(int limit,Integer type)
    {
        // 获取对应日志类型的总个数
        Integer count = logDao.getCountByType(type);
        // 线程池
        ThreadPoolTaskExecutor poolTaskExecutor = SpringUtil.getBean("commonTaskSchedulingThreadPool");
        List<FutureTask<List<SysLogModel>>> resultList = new ArrayList<>();
        //分段次数
        long cycles = count / limit;
        for (long i = 0; i < cycles; i++)
        {
            //每一段的起始坐标
            long offset = i * limit;
            log.info("异步查询系统日志,偏移量offset: {}", offset);
            //具体的查询任务
            FutureTask<List<SysLogModel>> futureTask = new FutureTask<>(() -> logDao.queryAllByLimit(offset,limit,type));
            poolTaskExecutor.execute(futureTask);
            resultList.add(futureTask);
        }
        List<SysLogModel> result = new ArrayList<>();
        Iterator<FutureTask<List<SysLogModel>>> iterator = resultList.iterator();
        while (iterator.hasNext()) {
            try {
                result.addAll(iterator.next().get());
            } catch (InterruptedException | ExecutionException e)
            {
                log.error("【getAllLoggerList#SysLogServiceImpl()】多线程查询出现异常:{}" ,e.getMessage());
                throw new RuntimeException(e);
            }
            //获取一个就删除一个任务
            iterator.remove();
        }
        //最后一次数据可能不为整,需要额外操作
        if (result.size() != count)
        {
            result.addAll(logDao.queryAllByLimit(result.size() ,Math.toIntExact(count),type));
        }
        return result;
    }

    /**
     * 获取所有日志记录通过mybatis光标
     * 这里还可以参考上面方法修改为分段查询
     * @param type     类型
     * @param count    总数
     * @param pageNum  页面顺序值
     * @param pageSize 页面大小
     * @return {@link List}<{@link SysLogModel}>
     */
    public static List<SysLogModel> getAllLoggerListUseMybatisCursor(int type,Integer count,Integer pageNum,Integer pageSize)
    {
        //分页偏移量
        Integer offset = (pageNum - 1) * pageSize;
        List<SysLogModel> resultList = new ArrayList<>(count);
        // 流式查询
        try(Cursor<SysLogModel> cursor = logDao.queryByCursor(type,offset,pageSize))
        {
            cursor.forEach(resultList::add);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        // 将结果存入缓存
        SystemLoggerTimeCache.putSystemLoggerList(SystemLoggerTimeCache.KEY,resultList);
        return resultList;
    }
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

# Mapper接口

public interface SysLogDao extends BaseMapper<SysLogModel>
{
    /**
     * 通过流式方法查询所有数据
     *
     * @param type     类型
     * @param offset   分页偏移量
     * @param pageSize 页面大小
     * @return {@link Cursor}<{@link SysLogModel}>
     */
    Cursor<SysLogModel> queryByCursor(@Param("type") Integer type, @Param("offset") Integer offset, @Param("pageSize") Integer pageSize);
}
1
2
3
4
5
6
7
8
9
10
11
12

# xml-SQL

  • 这里注意fetchSize的值的设置,我这里设置为Integer的最大值
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.xsw.module.procurement.log.dao.SysLogDao">

<!--通过流式方法查询所有数据-->
    <select id="queryByCursor" resultType="com.xsw.module.procurement.log.model.SysLogModel" fetchSize="-2147483648">
        select ID, OPER_TYPE, OPER_TIME, OPER_EVENT, OPER_RESULT, OPER_OBJECT, USER_KEY, OPER_NAME, SSR, ERRORMSG, CLIENT_IP
        from T_SYS_LOG
        where OPER_TYPE = #{type}
        and ID &lt;=
        (
            SELECT ID FROM T_SYS_LOG WHERE OPER_TYPE = #{type} order by ID desc LIMIT #{offset},1
        )
        order by id desc
        limit #{pageSize}
    </select>
</mapper>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

# 测试

因为线上还未升级,所以就本地构造了下 100W 的数据进行导出,耗时时间如下:

12:42:03.346[nio-8080-exec-9]: 写入sheet=sheet0,数据量=50000-0=50000,耗时=1636
12:42:04.567[nio-8080-exec-9]: 写入sheet=sheet0,数据量=100000-50000=50000,耗时=1221
12:42:05.773[nio-8080-exec-9]: 写入sheet=sheet0,数据量=150000-100000=50000,耗时=1206
12:42:07.057[nio-8080-exec-9]: 写入sheet=sheet0,数据量=200000-150000=50000,耗时=1284
12:42:08.297[nio-8080-exec-9]: 写入sheet=sheet0,数据量=250000-200000=50000,耗时=1240
12:42:09.461[nio-8080-exec-9]: 写入sheet=sheet0,数据量=300000-250000=50000,耗时=1164
12:42:10.630[nio-8080-exec-9]: 写入sheet=sheet1,数据量=350000-300000=50000,耗时=1169
12:42:11.862[nio-8080-exec-9]: 写入sheet=sheet1,数据量=400000-350000=50000,耗时=1232
12:42:13.031[nio-8080-exec-9]: 写入sheet=sheet1,数据量=450000-400000=50000,耗时=1169
12:42:14.155[nio-8080-exec-9]: 写入sheet=sheet1,数据量=500000-450000=50000,耗时=1124
12:42:15.315[nio-8080-exec-9]: 写入sheet=sheet1,数据量=550000-500000=50000,耗时=1160
12:42:16.572[nio-8080-exec-9]: 写入sheet=sheet1,数据量=600000-550000=50000,耗时=1257
12:42:17.845[nio-8080-exec-9]: 写入sheet=sheet2,数据量=650000-600000=50000,耗时=1273
12:42:19.133[nio-8080-exec-9]: 写入sheet=sheet2,数据量=700000-650000=50000,耗时=1288
12:42:20.283[nio-8080-exec-9]: 写入sheet=sheet2,数据量=750000-700000=50000,耗时=1150
12:42:21.485[nio-8080-exec-9]: 写入sheet=sheet2,数据量=800000-750000=50000,耗时=1202
12:42:22.690[nio-8080-exec-9]: 写入sheet=sheet2,数据量=850000-800000=50000,耗时=1204
12:42:23.831[nio-8080-exec-9]: 写入sheet=sheet2,数据量=900000-850000=50000,耗时=1141
12:42:25.091[nio-8080-exec-9]: 写入sheet=sheet3,数据量=950000-900000=50000,耗时=1260
12:42:26.444[nio-8080-exec-9]: 写入sheet=sheet3,数据量=1000000-950000=50000,耗时=1353
12:42:33.836[nio-8080-exec-9]: 导出excel结束,总数据量=1000000,耗时=32402ms
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21