Merge pull request #3110 from dataease/pr@dev@csv

feat: 支持 csv 文件
This commit is contained in:
xuwei-fit2cloud 2022-09-20 10:00:29 +08:00 committed by GitHub
commit f4e1a28b40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 133 additions and 202 deletions

View File

@ -60,14 +60,8 @@ import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
@ -77,10 +71,9 @@ import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.*;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
@ -2184,7 +2177,7 @@ public class DataSetTableService {
public ExcelFileData excelSaveAndParse(MultipartFile file, String tableId, Integer editType) throws Exception {
String filename = file.getOriginalFilename();
// parse file
List<ExcelSheetData> excelSheetDataList = parseExcel2(filename, file.getInputStream(), true);
List<ExcelSheetData> excelSheetDataList = parseExcel(filename, file.getInputStream(), true);
List<ExcelSheetData> retrunSheetDataList = new ArrayList<>();
if (StringUtils.isNotEmpty(tableId)) {
@ -2283,7 +2276,7 @@ public class DataSetTableService {
return excelFileData;
}
private List<ExcelSheetData> parseExcel2(String filename, InputStream inputStream, boolean isPreview)
private List<ExcelSheetData> parseExcel(String filename, InputStream inputStream, boolean isPreview)
throws Exception {
List<ExcelSheetData> excelSheetDataList = new ArrayList<>();
String suffix = filename.substring(filename.lastIndexOf(".") + 1);
@ -2299,6 +2292,38 @@ public class DataSetTableService {
excelXlsxReader.process(inputStream);
excelSheetDataList = excelXlsxReader.totalSheets;
}
if (StringUtils.equalsIgnoreCase(suffix, "csv")) {
List<TableField> fields = new ArrayList<>();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
String s = reader.readLine();// first line
String[] split = s.split(",");
for (String s1 : split) {
TableField tableFiled = new TableField();
tableFiled.setFieldName(s1);
tableFiled.setRemarks(s1);
tableFiled.setFieldType("TEXT");
fields.add(tableFiled);
}
List<List<String>> data = new ArrayList<>();
int num = 1;
String line = null;
while ((line = reader.readLine()) != null) {
if (num > 100) {
break;
}
data.add(Arrays.asList(line.split(",")));
num++;
}
ExcelSheetData excelSheetData = new ExcelSheetData();
String[] fieldArray = fields.stream().map(TableField::getFieldName).toArray(String[]::new);
excelSheetData.setFields(fields);
excelSheetData.setData(data);
excelSheetData.setExcelLable(filename);
excelSheetData.setFieldsMd5(Md5Utils.md5(StringUtils.join(fieldArray, ",")));
excelSheetDataList.add(excelSheetData);
}
inputStream.close();
excelSheetDataList.forEach(excelSheetData -> {
List<List<String>> data = excelSheetData.getData();
@ -2320,137 +2345,6 @@ public class DataSetTableService {
return excelSheetDataList;
}
private Map<String, Object> parseExcel(String filename, InputStream inputStream, boolean isPreview)
throws Exception {
String suffix = filename.substring(filename.lastIndexOf(".") + 1);
List<TableField> fields = new ArrayList<>();
List<String[]> data = new ArrayList<>();
List<Map<String, Object>> jsonArray = new ArrayList<>();
List<String> sheets = new ArrayList<>();
if (StringUtils.equalsIgnoreCase(suffix, "xls")) {
HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
HSSFSheet sheet0 = workbook.getSheetAt(0);
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
sheets.add(workbook.getSheetAt(i).getSheetName());
}
if (sheet0.getNumMergedRegions() > 0) {
throw new RuntimeException(Translator.get("i18n_excel_have_merge_region"));
}
int rows;
if (isPreview) {
rows = Math.min(sheet0.getPhysicalNumberOfRows(), 100);
} else {
rows = sheet0.getPhysicalNumberOfRows();
}
int columnNum = 0;
for (int i = 0; i < rows; i++) {
HSSFRow row = sheet0.getRow(i);
if (i == 0) {
if (row == null) {
throw new RuntimeException(Translator.get("i18n_excel_header_empty"));
}
columnNum = row.getPhysicalNumberOfCells();
}
String[] r = new String[columnNum];
for (int j = 0; j < columnNum; j++) {
if (i == 0) {
TableField tableField = new TableField();
tableField.setFieldType("TEXT");
tableField.setFieldSize(1024);
String columnName = readCell(row.getCell(j), false, null);
if (StringUtils.isEmpty(columnName)) {
columnName = "NONE_" + String.valueOf(j);
}
tableField.setFieldName(columnName);
tableField.setRemarks(columnName);
fields.add(tableField);
} else {
if (row == null) {
break;
}
r[j] = readCell(row.getCell(j), true, fields.get(j));
}
}
if (i > 0) {
data.add(r);
}
}
} else if (StringUtils.equalsIgnoreCase(suffix, "xlsx")) {
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(inputStream);
XSSFSheet sheet0 = xssfWorkbook.getSheetAt(0);
for (int i = 0; i < xssfWorkbook.getNumberOfSheets(); i++) {
sheets.add(xssfWorkbook.getSheetAt(i).getSheetName());
}
if (sheet0.getNumMergedRegions() > 0) {
throw new RuntimeException(Translator.get("i18n_excel_have_merge_region"));
}
int rows;
if (isPreview) {
rows = Math.min(sheet0.getPhysicalNumberOfRows(), 100);
} else {
rows = sheet0.getPhysicalNumberOfRows();
}
int columnNum = 0;
for (int i = 0; i < rows; i++) {
XSSFRow row = sheet0.getRow(i);
if (i == 0) {
if (row == null) {
throw new RuntimeException(Translator.get("i18n_excel_header_empty"));
}
columnNum = row.getLastCellNum();
}
String[] r = new String[columnNum];
for (int j = 0; j < columnNum; j++) {
if (i == 0) {
TableField tableField = new TableField();
tableField.setFieldType("TEXT");
tableField.setFieldSize(1024);
String columnName = readCell(row.getCell(j), false, null);
if (StringUtils.isEmpty(columnName)) {
columnName = "NONE_" + String.valueOf(j);
}
tableField.setFieldName(columnName);
tableField.setRemarks(columnName);
fields.add(tableField);
} else {
if (row == null) {
break;
}
r[j] = readCell(row.getCell(j), true, fields.get(j));
}
}
if (i > 0) {
data.add(r);
}
}
}
String[] fieldArray = fields.stream().map(TableField::getFieldName).toArray(String[]::new);
// 校验excel字段是否重名
if (checkIsRepeat(fieldArray)) {
DataEaseException.throwException(Translator.get("i18n_excel_field_repeat"));
}
if (CollectionUtils.isNotEmpty(data)) {
jsonArray = data.stream().map(ele -> {
Map<String, Object> map = new HashMap<>();
for (int i = 0; i < ele.length; i++) {
map.put(fieldArray[i], ele[i]);
}
return map;
}).collect(Collectors.toList());
}
inputStream.close();
Map<String, Object> map = new HashMap<>();
map.put("fields", fields);
map.put("data", jsonArray);
map.put("sheets", sheets);
return map;
}
private String readCell(Cell cell, boolean cellType, TableField tableField) {
if (cell == null) {
return "";

View File

@ -34,6 +34,7 @@ import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.pentaho.di.cluster.SlaveServer;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.row.ValueMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.job.Job;
import org.pentaho.di.job.JobExecutionConfiguration;
@ -50,10 +51,12 @@ import org.pentaho.di.trans.TransExecutionConfiguration;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.csvinput.CsvInputMeta;
import org.pentaho.di.trans.steps.excelinput.ExcelInputField;
import org.pentaho.di.trans.steps.excelinput.ExcelInputMeta;
import org.pentaho.di.trans.steps.excelinput.SpreadSheetType;
import org.pentaho.di.trans.steps.tableinput.TableInputMeta;
import org.pentaho.di.trans.steps.textfileinput.TextFileInputField;
import org.pentaho.di.trans.steps.textfileoutput.TextFileField;
import org.pentaho.di.trans.steps.textfileoutput.TextFileOutputMeta;
import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassDef;
@ -898,10 +901,10 @@ public class ExtractDataService {
String outFile = null;
DatasourceTypes datasourceType = DatasourceTypes.valueOf(datasource.getType());
DatabaseMeta dataMeta;
StepMeta inputStep = null;
List<StepMeta> inputSteps = new ArrayList<>();
StepMeta outputStep;
StepMeta udjcStep = null;
TransHopMeta hi1;
TransHopMeta hi2;
String transName = null;
@ -921,7 +924,7 @@ public class ExtractDataService {
}
transMeta.addDatabase(dataMeta);
selectSQL = getSelectSQL(extractType, datasetTable, datasource, datasetTableFields, selectSQL);
inputStep = inputStep(transMeta, selectSQL, mysqlConfiguration);
inputSteps = inputStep(transMeta, selectSQL, mysqlConfiguration);
udjcStep = udjc(datasetTableFields, DatasourceTypes.mysql, mysqlConfiguration);
break;
case sqlServer:
@ -929,7 +932,7 @@ public class ExtractDataService {
dataMeta = new DatabaseMeta("db", "MSSQLNATIVE", "Native", sqlServerConfiguration.getHost().trim(), sqlServerConfiguration.getDataBase(), sqlServerConfiguration.getPort().toString(), sqlServerConfiguration.getUsername(), sqlServerConfiguration.getPassword());
transMeta.addDatabase(dataMeta);
selectSQL = getSelectSQL(extractType, datasetTable, datasource, datasetTableFields, selectSQL);
inputStep = inputStep(transMeta, selectSQL, sqlServerConfiguration);
inputSteps = inputStep(transMeta, selectSQL, sqlServerConfiguration);
udjcStep = udjc(datasetTableFields, DatasourceTypes.sqlServer, sqlServerConfiguration);
break;
case pg:
@ -937,7 +940,7 @@ public class ExtractDataService {
dataMeta = new DatabaseMeta("db", "POSTGRESQL", "Native", pgConfiguration.getHost().trim(), pgConfiguration.getDataBase(), pgConfiguration.getPort().toString(), pgConfiguration.getUsername(), pgConfiguration.getPassword());
transMeta.addDatabase(dataMeta);
selectSQL = getSelectSQL(extractType, datasetTable, datasource, datasetTableFields, selectSQL);
inputStep = inputStep(transMeta, selectSQL, pgConfiguration);
inputSteps = inputStep(transMeta, selectSQL, pgConfiguration);
udjcStep = udjc(datasetTableFields, DatasourceTypes.pg, pgConfiguration);
break;
case oracle:
@ -950,7 +953,7 @@ public class ExtractDataService {
}
transMeta.addDatabase(dataMeta);
selectSQL = getSelectSQL(extractType, datasetTable, datasource, datasetTableFields, selectSQL);
inputStep = inputStep(transMeta, selectSQL, oracleConfiguration);
inputSteps = inputStep(transMeta, selectSQL, oracleConfiguration);
udjcStep = udjc(datasetTableFields, DatasourceTypes.oracle, oracleConfiguration);
break;
case ck:
@ -959,7 +962,7 @@ public class ExtractDataService {
dataMeta.setDatabaseType("Clickhouse");
transMeta.addDatabase(dataMeta);
selectSQL = getSelectSQL(extractType, datasetTable, datasource, datasetTableFields, selectSQL);
inputStep = inputStep(transMeta, selectSQL, chConfiguration);
inputSteps = inputStep(transMeta, selectSQL, chConfiguration);
udjcStep = udjc(datasetTableFields, DatasourceTypes.ck, chConfiguration);
break;
case db2:
@ -968,11 +971,11 @@ public class ExtractDataService {
dataMeta.setDatabaseType("DB2");
transMeta.addDatabase(dataMeta);
selectSQL = getSelectSQL(extractType, datasetTable, datasource, datasetTableFields, selectSQL);
inputStep = inputStep(transMeta, selectSQL, db2Configuration);
inputSteps = inputStep(transMeta, selectSQL, db2Configuration);
udjcStep = udjc(datasetTableFields, DatasourceTypes.db2, db2Configuration);
break;
case excel:
inputStep = excelInputStep(datasetTable.getInfo(), datasetTableFields);
inputSteps = excelInputStep(datasetTable.getInfo(), datasetTableFields);
udjcStep = udjc(datasetTableFields, DatasourceTypes.excel, null);
default:
break;
@ -1000,11 +1003,14 @@ public class ExtractDataService {
outputStep = outputStep(outFile, datasetTableFields, datasource);
hi1 = new TransHopMeta(inputStep, udjcStep);
for (StepMeta inputStep : inputSteps) {
TransHopMeta hi1 = new TransHopMeta(inputStep, udjcStep);
transMeta.addTransHop(hi1);
transMeta.addStep(inputStep);
}
hi2 = new TransHopMeta(udjcStep, outputStep);
transMeta.addTransHop(hi1);
transMeta.addTransHop(hi2);
transMeta.addStep(inputStep);
transMeta.addStep(udjcStep);
transMeta.addStep(outputStep);
@ -1036,7 +1042,7 @@ public class ExtractDataService {
return selectSQL;
}
private StepMeta inputStep(TransMeta transMeta, String selectSQL, JdbcConfiguration jdbcConfiguration) {
private List<StepMeta> inputStep(TransMeta transMeta, String selectSQL, JdbcConfiguration jdbcConfiguration) {
TableInputMeta tableInput = new TableInputMeta();
DatabaseMeta database = transMeta.findDatabase(DatasetType.DB.name());
tableInput.setDatabaseMeta(database);
@ -1044,58 +1050,89 @@ public class ExtractDataService {
StepMeta fromStep = new StepMeta("TableInput", "Data Input", tableInput);
fromStep.setDraw(true);
fromStep.setLocation(100, 100);
return fromStep;
List<StepMeta> inputSteps = new ArrayList<>();
inputSteps.add(fromStep);
return inputSteps;
}
private StepMeta excelInputStep(String Info, List<DatasetTableField> datasetTableFields) {
private List<StepMeta> excelInputStep(String Info, List<DatasetTableField> datasetTableFields) {
List<StepMeta>inputSteps = new ArrayList<>();
DataTableInfoDTO dataTableInfoDTO = new Gson().fromJson(Info, DataTableInfoDTO.class);
List<ExcelSheetData> excelSheetDataList = dataTableInfoDTO.getExcelSheetDataList();
String suffix = excelSheetDataList.get(0).getPath().substring(excelSheetDataList.get(0).getPath().lastIndexOf(".") + 1);
ExcelInputMeta excelInputMeta = new ExcelInputMeta();
List<String> sheetNames = new ArrayList<>();
List<String> files = new ArrayList<>();
List<String> filesRequired = new ArrayList<>();
for (ExcelSheetData excelSheetData : excelSheetDataList) {
if (!sheetNames.contains(excelSheetData.getExcelLable())) {
sheetNames.add(excelSheetData.getExcelLable());
}
if (!files.contains(excelSheetData.getPath())) {
files.add(excelSheetData.getPath());
filesRequired.add("Y");
}
}
if (StringUtils.equalsIgnoreCase(suffix, "xlsx")) {
excelInputMeta.setSpreadSheetType(SpreadSheetType.SAX_POI);
excelInputMeta.setSheetName(sheetNames.toArray(new String[sheetNames.size()]));
}
if (StringUtils.equalsIgnoreCase(suffix, "xls")) {
excelInputMeta.setSpreadSheetType(SpreadSheetType.JXL);
excelInputMeta.setSheetName(sheetNames.toArray(new String[sheetNames.size()]));
}
excelInputMeta.setPassword("Encrypted");
excelInputMeta.setFileName(files.toArray(new String[files.size()]));
excelInputMeta.setFileRequired(filesRequired.toArray(new String[filesRequired.size()]));
excelInputMeta.setStartsWithHeader(true);
excelInputMeta.setIgnoreEmptyRows(true);
ExcelInputField[] fields = new ExcelInputField[datasetTableFields.size()];
for (int i = 0; i < datasetTableFields.size(); i++) {
ExcelInputField field = new ExcelInputField();
field.setName(datasetTableFields.get(i).getDataeaseName());
if (datasetTableFields.get(i).getDeExtractType() == 1) {
field.setType("String");
field.setFormat("yyyy-MM-dd HH:mm:ss");
} else {
field.setType("String");
}
fields[i] = field;
}
excelInputMeta.setField(fields);
StepMeta fromStep = new StepMeta("ExcelInput", "Data Input", excelInputMeta);
fromStep.setDraw(true);
fromStep.setLocation(100, 100);
return fromStep;
int size =1;
for (ExcelSheetData excelSheetData : excelSheetDataList) {
StepMeta fromStep = null;
String suffix = excelSheetData.getPath().substring(excelSheetDataList.get(0).getPath().lastIndexOf(".") + 1);
if (StringUtils.equalsIgnoreCase(suffix, "csv")) {
CsvInputMeta csvInputMeta = new CsvInputMeta();
csvInputMeta.setFilename(excelSheetData.getPath());
csvInputMeta.setHeaderPresent(true);
csvInputMeta.setBufferSize("10000");
csvInputMeta.setDelimiter(",");
TextFileInputField[] fields = new TextFileInputField[datasetTableFields.size()];
for (int i = 0; i < datasetTableFields.size(); i++) {
TextFileInputField field = new TextFileInputField();
field.setName(datasetTableFields.get(i).getDataeaseName());
if (datasetTableFields.get(i).getDeExtractType() == 1) {
field.setType(ValueMeta.getType("String"));
field.setFormat("yyyy-MM-dd HH:mm:ss");
} else {
field.setType(ValueMeta.getType("String"));
}
fields[i] = field;
}
csvInputMeta.setInputFields(fields);
fromStep = new StepMeta("CsvInput", "Data Input " + size, csvInputMeta);
fromStep.setDraw(true);
fromStep.setLocation(100, 100 * size);
inputSteps.add(fromStep);
}else {
List<String> files = new ArrayList<>();
files.add(excelSheetData.getPath());
List<String> filesRequired = new ArrayList<>();
filesRequired.add("Y");
ExcelInputMeta excelInputMeta = new ExcelInputMeta();
sheetNames.add(excelSheetData.getExcelLable());
if (StringUtils.equalsIgnoreCase(suffix, "xlsx")) {
excelInputMeta.setSpreadSheetType(SpreadSheetType.SAX_POI);
excelInputMeta.setSheetName(sheetNames.toArray(new String[sheetNames.size()]));
}
if (StringUtils.equalsIgnoreCase(suffix, "xls")) {
excelInputMeta.setSpreadSheetType(SpreadSheetType.JXL);
excelInputMeta.setSheetName(sheetNames.toArray(new String[sheetNames.size()]));
}
excelInputMeta.setPassword("Encrypted");
excelInputMeta.setFileName(files.toArray(new String[files.size()]));
excelInputMeta.setFileRequired(filesRequired.toArray(new String[filesRequired.size()]));
excelInputMeta.setStartsWithHeader(true);
excelInputMeta.setIgnoreEmptyRows(true);
ExcelInputField[] fields = new ExcelInputField[datasetTableFields.size()];
for (int i = 0; i < datasetTableFields.size(); i++) {
ExcelInputField field = new ExcelInputField();
field.setName(datasetTableFields.get(i).getDataeaseName());
if (datasetTableFields.get(i).getDeExtractType() == 1) {
field.setType("String");
field.setFormat("yyyy-MM-dd HH:mm:ss");
} else {
field.setType("String");
}
fields[i] = field;
}
excelInputMeta.setField(fields);
fromStep = new StepMeta("ExcelInput", "Data Input " + size, excelInputMeta);
fromStep.setDraw(true);
fromStep.setLocation(100, 100 * size);
inputSteps.add(fromStep);
}
size++;
}
return inputSteps;
}
private StepMeta outputStep(String dorisOutputTable, List<DatasetTableField> datasetTableFields, Datasource datasource) {

View File

@ -40,7 +40,7 @@
:show-file-list="false"
:file-list="fileList"
:data="param"
accept=".xls,.xlsx,"
accept=".xls,.xlsx,.csv"
:before-upload="beforeUpload"
:on-success="uploadSuccess"
:on-error="uploadFail"
@ -374,7 +374,7 @@ export default {
if (this.param.editType === 0 && this.param.tableId && (effectExtField || changeFiled)) {
var msg = effectExtField ? i18n.t('dataset.task.effect_ext_field') + ', ' + i18n.t('dataset.task.excel_replace_msg') : i18n.t('dataset.task.excel_replace_msg')
var msg = effectExtField ? i18n.t('dataset.effect_ext_field') + ', ' + i18n.t('dataset.excel_replace_msg') : i18n.t('dataset.excel_replace_msg')
$confirm(msg, () => {
this.saveExcelData(sheetFileMd5, table)
})