dataease-dm/backend/src/main/java/io/dataease/commons/utils/ExcelXlsReader.java

388 lines
14 KiB
Java
Raw Normal View History

2021-07-21 15:07:51 +08:00
package io.dataease.commons.utils;
import io.dataease.datasource.dto.TableFiled;
import io.dataease.dto.dataset.ExcelSheetData;
import io.dataease.i18n.Translator;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* @author y
* @create 2018-01-19 14:18
* @desc 用于解决.xls2003版本大数据量问题
**/
public class ExcelXlsReader implements HSSFListener {
public ExcelReaderUtil excelReaderUtil = new ExcelReaderUtil();
private int minColums = -1;
private POIFSFileSystem fs;
/**
* 总行数
*/
private int totalRows = 0;
/**
* 上一行row的序号
*/
private int lastRowNumber;
/**
* 上一单元格的序号
*/
private int lastColumnNumber;
/**
* 是否输出formula还是它对应的值
*/
private boolean outputFormulaValues = true;
/**
* 用于转换formulas
*/
private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
//excel2003工作簿
private HSSFWorkbook stubWorkbook;
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
private final HSSFDataFormatter formatter = new HSSFDataFormatter();
/**
* 文件的绝对路径
*/
private String filePath = "";
//表索引
private int sheetIndex = 0;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
//当前行
private int curRow = 0;
//存储一行记录所有单元格的容器
private List<String> cellList = new ArrayList<String>();
/**
* 判断整行是否为空行的标记
*/
private boolean flag = false;
@SuppressWarnings("unused")
private String sheetName;
public List<TableFiled> fields = new ArrayList<>();
public List<List<String>> data = new ArrayList<>();
public List<ExcelSheetData> totalSheets = new ArrayList<>();
/**
* 是否为日期
*/
private boolean isDateFormat = false;
public List<TableFiled> getFields() {
return fields;
}
public void setFields(List<TableFiled> fields) {
this.fields = fields;
}
public List<List<String>> getData() {
return data;
}
public void setData(List<List<String>> data) {
this.data = data;
}
/**
* 遍历excel下所有的sheet
*
* @param inputStream
* @throws Exception
*/
public int process(InputStream inputStream) throws Exception {
this.fs = new POIFSFileSystem(inputStream);
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
return totalRows; //返回该excel文件的总行数不包括首列和空行
}
/**
* HSSFListener 监听方法处理Record
* 处理每个单元格
*
* @param record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid: //开始处理每个sheet
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
//如果有需要,则建立子工作簿
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
sheetIndex++;
}
break;
case MergeCellsRecord.sid:
throw new RuntimeException(Translator.get("i18n_excel_have_merge_region"));
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid: //单元格为空白
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
cellList.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: //单元格为布尔类型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue() + "";
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); //如果里面某个单元格含有值,则标识该行不为空行
break;
case FormulaRecord.sid://单元格为公式类型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
thisStr = String.valueOf(frec.getValue());
String feildType = checkType(thisStr, thisColumn);
if(feildType.equalsIgnoreCase("LONG") && thisStr.endsWith(".0")){
thisStr = thisStr.substring(0, thisStr.length() -2);
}
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); //如果里面某个单元格含有值,则标识该行不为空行
break;
case StringRecord.sid: //单元格中公式的字符串
if (outputNextStringRecord) {
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("") ? "" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行
break;
case LabelSSTRecord.sid: //单元格为字符串类型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
cellList.add(thisColumn, "");
} else {
value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("") ? "" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行
}
break;
case NumberRecord.sid: //单元格为数字类型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
//第一种方式
//value = formatListener.formatNumberDateCell(numrec).trim();//这个被写死采用的m/d/yy h:mm格式不符合要求
//第二种方式参照formatNumberDateCell里面的实现方法编写
Double valueDouble = ((NumberRecord) numrec).getValue();
String formatString = formatListener.getFormatString(numrec);
if (formatString.contains("m/d/yy")) {
formatString = "yyyy-MM-dd hh:mm:ss";
}
int formatIndex = formatListener.getFormatIndex(numrec);
value = formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();
value = value.equals("") ? "" : value;
//向容器加入列值
cellList.add(thisColumn, value);
if(formatIndex == 59){
totalSheets.get(totalSheets.size() -1).getFields().get(thisColumn).setFieldType("DATETIME");
}else {
checkType(value, thisColumn);
}
checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行
break;
default:
break;
}
//遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
//空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
cellList.add(thisColumn, "");
}
//更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
//行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColums > 0) {
//列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
lastColumnNumber = -1;
if(!totalSheets.stream().map(ExcelSheetData::getSheetName).collect(Collectors.toList()).contains(sheetName)){
ExcelSheetData excelSheetData = new ExcelSheetData();
excelSheetData.setSheetName(sheetName);
excelSheetData.setData(new ArrayList<>());
excelSheetData.setFields(new ArrayList<>());
totalSheets.add(excelSheetData);
}
2021-07-28 14:40:22 +08:00
2021-07-21 15:07:51 +08:00
if(curRow == 0){
for (String s : cellList) {
TableFiled tableFiled = new TableFiled();
tableFiled.setFieldType("TEXT");
tableFiled.setFieldSize(65533);
tableFiled.setFieldName(s);
tableFiled.setRemarks(s);
this.fields.add(tableFiled);
totalSheets.get(totalSheets.size() -1).getFields().add(tableFiled);
}
}
if (flag && curRow != 0) { //该行不为空行且该行不是第一行,发送(第一行为列名,不需要)
if(!totalSheets.stream().map(ExcelSheetData::getSheetName).collect(Collectors.toList()).contains(sheetName)){
ExcelSheetData excelSheetData = new ExcelSheetData();
excelSheetData.setData(new ArrayList<>(data));
excelSheetData.setSheetName(sheetName);
excelSheetData.setFields(new ArrayList<>(fields));
List<String> tmp = new ArrayList<>(cellList);
excelSheetData.getData().add(tmp);
totalRows++;
totalSheets.add(excelSheetData);
}else {
List<String> tmp = new ArrayList<>(cellList);
totalSheets.stream().filter(s->s.getSheetName().equalsIgnoreCase(sheetName)).collect(Collectors.toList()).get(0).getData().add(tmp);
totalRows++;
}
}
//清空容器
cellList.clear();
flag = false;
}
}
/**
* 如果里面某个单元格含有值则标识该行不为空行
*
* @param value
*/
public void checkRowIsNull(String value) {
if (value != null && !"".equals(value)) {
flag = true;
}
}
private String checkType(String str, int thisColumn){
String type = null;
try {
double d = Double.valueOf(str);
try {
Double value = new Double(d);
double eps = 1e-10;
if (value - Math.floor(value) < eps) {
type = "LONG";
} else {
type = "DOUBLE";
}
} catch (Exception e) {
type = "TEXT";
}
}catch (Exception e){
type = "TEXT";
}
String oldType = totalSheets.get(totalSheets.size() -1).getFields().get(thisColumn).getFieldType();
if(type.equalsIgnoreCase("LONG") && oldType.equalsIgnoreCase("TEXT")){
totalSheets.get(totalSheets.size() -1).getFields().get(thisColumn).setFieldType(type);
}
if(type.equalsIgnoreCase("DOUBLE")){
totalSheets.get(totalSheets.size() -1).getFields().get(thisColumn).setFieldType(type);
}
return type;
}
2021-07-28 14:40:22 +08:00
}