forked from github/dataease
feat(backend):数据抽取,切换Doris
This commit is contained in:
parent
84f68f1e4a
commit
9302d0f092
@ -2,7 +2,6 @@ package io.dataease.config;
|
||||
|
||||
import com.fit2cloud.autoconfigure.QuartzAutoConfiguration;
|
||||
import io.dataease.commons.utils.CommonThreadPool;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.pentaho.di.core.KettleEnvironment;
|
||||
import org.pentaho.di.repository.filerep.KettleFileRepository;
|
||||
import org.pentaho.di.repository.filerep.KettleFileRepositoryMeta;
|
||||
@ -32,31 +31,31 @@ public class CommonConfig {
|
||||
// return configuration;
|
||||
// }
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean
|
||||
public SparkSession javaSparkSession() {
|
||||
SparkSession spark = SparkSession.builder()
|
||||
.appName(env.getProperty("spark.appName", "DataeaseJob"))
|
||||
.master(env.getProperty("spark.master", "local[*]"))
|
||||
.config("spark.scheduler.mode", env.getProperty("spark.scheduler.mode", "FAIR"))
|
||||
.config("spark.serializer", env.getProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer"))
|
||||
.config("spark.executor.cores", env.getProperty("spark.executor.cores", "8"))
|
||||
.config("spark.executor.memory", env.getProperty("spark.executor.memory", "6442450944b"))
|
||||
.config("spark.locality.wait", env.getProperty("spark.locality.wait", "600000"))
|
||||
.config("spark.maxRemoteBlockSizeFetchToMem", env.getProperty("spark.maxRemoteBlockSizeFetchToMem", "2000m"))
|
||||
.config("spark.shuffle.detectCorrupt", env.getProperty("spark.shuffle.detectCorrupt", "false"))
|
||||
.config("spark.shuffle.service.enabled", env.getProperty("spark.shuffle.service.enabled", "true"))
|
||||
.config("spark.sql.adaptive.enabled", env.getProperty("spark.sql.adaptive.enabled", "true"))
|
||||
.config("spark.sql.adaptive.shuffle.targetPostShuffleInputSize", env.getProperty("spark.sql.adaptive.shuffle.targetPostShuffleInputSize", "200M"))
|
||||
.config("spark.sql.broadcastTimeout", env.getProperty("spark.sql.broadcastTimeout", "12000"))
|
||||
.config("spark.sql.retainGroupColumns", env.getProperty("spark.sql.retainGroupColumns", "false"))
|
||||
.config("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold", env.getProperty("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold", "100000"))
|
||||
.config("spark.sql.sortMergeJoinExec.buffer.spill.threshold", env.getProperty("spark.sql.sortMergeJoinExec.buffer.spill.threshold", "100000"))
|
||||
.config("spark.sql.variable.substitute", env.getProperty("spark.sql.variable.substitute", "false"))
|
||||
.config("spark.temp.expired.time", env.getProperty("spark.temp.expired.time", "3600"))
|
||||
.getOrCreate();
|
||||
return spark;
|
||||
}
|
||||
// @Bean
|
||||
// @ConditionalOnMissingBean
|
||||
// public SparkSession javaSparkSession() {
|
||||
// SparkSession spark = SparkSession.builder()
|
||||
// .appName(env.getProperty("spark.appName", "DataeaseJob"))
|
||||
// .master(env.getProperty("spark.master", "local[*]"))
|
||||
// .config("spark.scheduler.mode", env.getProperty("spark.scheduler.mode", "FAIR"))
|
||||
//// .config("spark.serializer", env.getProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer"))
|
||||
//// .config("spark.executor.cores", env.getProperty("spark.executor.cores", "8"))
|
||||
//// .config("spark.executor.memory", env.getProperty("spark.executor.memory", "6442450944b"))
|
||||
//// .config("spark.locality.wait", env.getProperty("spark.locality.wait", "600000"))
|
||||
//// .config("spark.maxRemoteBlockSizeFetchToMem", env.getProperty("spark.maxRemoteBlockSizeFetchToMem", "2000m"))
|
||||
//// .config("spark.shuffle.detectCorrupt", env.getProperty("spark.shuffle.detectCorrupt", "false"))
|
||||
//// .config("spark.shuffle.service.enabled", env.getProperty("spark.shuffle.service.enabled", "true"))
|
||||
//// .config("spark.sql.adaptive.enabled", env.getProperty("spark.sql.adaptive.enabled", "true"))
|
||||
//// .config("spark.sql.adaptive.shuffle.targetPostShuffleInputSize", env.getProperty("spark.sql.adaptive.shuffle.targetPostShuffleInputSize", "200M"))
|
||||
//// .config("spark.sql.broadcastTimeout", env.getProperty("spark.sql.broadcastTimeout", "12000"))
|
||||
//// .config("spark.sql.retainGroupColumns", env.getProperty("spark.sql.retainGroupColumns", "false"))
|
||||
//// .config("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold", env.getProperty("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold", "100000"))
|
||||
//// .config("spark.sql.sortMergeJoinExec.buffer.spill.threshold", env.getProperty("spark.sql.sortMergeJoinExec.buffer.spill.threshold", "100000"))
|
||||
//// .config("spark.sql.variable.substitute", env.getProperty("spark.sql.variable.substitute", "false"))
|
||||
//// .config("spark.temp.expired.time", env.getProperty("spark.temp.expired.time", "3600"))
|
||||
// .getOrCreate();
|
||||
// return spark;
|
||||
// }
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean
|
||||
|
@ -1,21 +1,12 @@
|
||||
package io.dataease.listener;
|
||||
|
||||
import io.dataease.base.domain.DatasetTable;
|
||||
import io.dataease.base.domain.DatasetTableExample;
|
||||
import io.dataease.base.domain.DatasetTableField;
|
||||
import io.dataease.base.mapper.DatasetTableMapper;
|
||||
import io.dataease.commons.utils.CommonThreadPool;
|
||||
import io.dataease.datasource.service.DatasourceService;
|
||||
import io.dataease.service.dataset.DataSetTableFieldsService;
|
||||
import io.dataease.service.spark.SparkCalc;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.ApplicationListener;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Order(value = 2)
|
||||
|
@ -1,52 +1,47 @@
|
||||
package io.dataease.listener;
|
||||
|
||||
import io.dataease.base.domain.DatasetTable;
|
||||
import io.dataease.base.domain.DatasetTableExample;
|
||||
import io.dataease.base.domain.DatasetTableField;
|
||||
import io.dataease.base.mapper.DatasetTableMapper;
|
||||
import io.dataease.commons.utils.CommonThreadPool;
|
||||
import io.dataease.service.dataset.DataSetTableFieldsService;
|
||||
import io.dataease.service.spark.SparkCalc;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.ApplicationListener;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Order(value = 2)
|
||||
public class AppStartReadHBaseListener implements ApplicationListener<ApplicationReadyEvent> {
|
||||
@Resource
|
||||
private CommonThreadPool commonThreadPool;
|
||||
@Resource
|
||||
private SparkCalc sparkCalc;
|
||||
@Resource
|
||||
private Environment env; // 保存了配置文件的信息
|
||||
|
||||
@Resource
|
||||
private DatasetTableMapper datasetTableMapper;
|
||||
@Resource
|
||||
private DataSetTableFieldsService dataSetTableFieldsService;
|
||||
|
||||
@Override
|
||||
public void onApplicationEvent(ApplicationReadyEvent applicationReadyEvent) {
|
||||
// System.out.println("================= Read HBase start =================");
|
||||
// // 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存
|
||||
// DatasetTableExample datasetTableExample = new DatasetTableExample();
|
||||
// datasetTableExample.createCriteria().andModeEqualTo(1);
|
||||
// List<DatasetTable> datasetTables = datasetTableMapper.selectByExampleWithBLOBs(datasetTableExample);
|
||||
// for (DatasetTable table : datasetTables) {
|
||||
//// commonThreadPool.addTask(() -> {
|
||||
// try {
|
||||
// List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId());
|
||||
// sparkCalc.getHBaseDataAndCache(table.getId(), fields);
|
||||
// } catch (Exception e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
//// });
|
||||
// }
|
||||
}
|
||||
}
|
||||
//package io.dataease.listener;
|
||||
//
|
||||
//import io.dataease.base.mapper.DatasetTableMapper;
|
||||
//import io.dataease.commons.utils.CommonThreadPool;
|
||||
//import io.dataease.service.dataset.DataSetTableFieldsService;
|
||||
//import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
//import org.springframework.context.ApplicationListener;
|
||||
//import org.springframework.core.annotation.Order;
|
||||
//import org.springframework.core.env.Environment;
|
||||
//import org.springframework.stereotype.Component;
|
||||
//
|
||||
//import javax.annotation.Resource;
|
||||
//
|
||||
//@Component
|
||||
//@Order(value = 2)
|
||||
//public class AppStartReadHBaseListener implements ApplicationListener<ApplicationReadyEvent> {
|
||||
// @Resource
|
||||
// private CommonThreadPool commonThreadPool;
|
||||
//// @Resource
|
||||
//// private SparkCalc sparkCalc;
|
||||
// @Resource
|
||||
// private Environment env; // 保存了配置文件的信息
|
||||
//
|
||||
// @Resource
|
||||
// private DatasetTableMapper datasetTableMapper;
|
||||
// @Resource
|
||||
// private DataSetTableFieldsService dataSetTableFieldsService;
|
||||
//
|
||||
// @Override
|
||||
// public void onApplicationEvent(ApplicationReadyEvent applicationReadyEvent) {
|
||||
//// System.out.println("================= Read HBase start =================");
|
||||
//// // 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存
|
||||
//// DatasetTableExample datasetTableExample = new DatasetTableExample();
|
||||
//// datasetTableExample.createCriteria().andModeEqualTo(1);
|
||||
//// List<DatasetTable> datasetTables = datasetTableMapper.selectByExampleWithBLOBs(datasetTableExample);
|
||||
//// for (DatasetTable table : datasetTables) {
|
||||
////// commonThreadPool.addTask(() -> {
|
||||
//// try {
|
||||
//// List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId());
|
||||
//// sparkCalc.getHBaseDataAndCache(table.getId(), fields);
|
||||
//// } catch (Exception e) {
|
||||
//// e.printStackTrace();
|
||||
//// }
|
||||
////// });
|
||||
//// }
|
||||
// }
|
||||
//}
|
||||
|
@ -1,11 +1,13 @@
|
||||
package io.dataease.service.chart;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.reflect.TypeToken;
|
||||
import io.dataease.base.domain.*;
|
||||
import io.dataease.base.mapper.ChartViewMapper;
|
||||
import io.dataease.commons.utils.AuthUtils;
|
||||
import io.dataease.commons.utils.BeanUtils;
|
||||
import io.dataease.commons.utils.CommonBeanFactory;
|
||||
import io.dataease.controller.request.chart.ChartExtFilterRequest;
|
||||
import io.dataease.controller.request.chart.ChartExtRequest;
|
||||
import io.dataease.controller.request.chart.ChartViewRequest;
|
||||
@ -20,7 +22,6 @@ import io.dataease.dto.chart.Series;
|
||||
import io.dataease.dto.dataset.DataTableInfoDTO;
|
||||
import io.dataease.service.dataset.DataSetTableFieldsService;
|
||||
import io.dataease.service.dataset.DataSetTableService;
|
||||
import io.dataease.service.spark.SparkCalc;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.ObjectUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
@ -43,8 +44,8 @@ public class ChartViewService {
|
||||
private DataSetTableService dataSetTableService;
|
||||
@Resource
|
||||
private DatasourceService datasourceService;
|
||||
@Resource
|
||||
private SparkCalc sparkCalc;
|
||||
// @Resource
|
||||
// private SparkCalc sparkCalc;
|
||||
@Resource
|
||||
private DataSetTableFieldsService dataSetTableFieldsService;
|
||||
|
||||
@ -146,8 +147,18 @@ public class ChartViewService {
|
||||
data = datasourceProvider.getData(datasourceRequest);
|
||||
} else if (table.getMode() == 1) {// 抽取
|
||||
// 获取数据集de字段
|
||||
List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId());
|
||||
data = sparkCalc.getData(table.getId(), fields, xAxis, yAxis, "tmp_" + view.getId().split("-")[0], extFilterList);
|
||||
// List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId());
|
||||
// data = sparkCalc.getData(table.getId(), fields, xAxis, yAxis, "tmp_" + view.getId().split("-")[0], extFilterList);
|
||||
|
||||
// 连接doris,构建doris数据源查询
|
||||
Datasource ds = dorisDatasource();
|
||||
DatasourceProvider datasourceProvider = ProviderFactory.getProvider(ds.getType());
|
||||
DatasourceRequest datasourceRequest = new DatasourceRequest();
|
||||
datasourceRequest.setDatasource(ds);
|
||||
String tableName = "ds_" + table.getId().replaceAll("-", "_");
|
||||
datasourceRequest.setTable(tableName);
|
||||
datasourceRequest.setQuery(getSQL(ds.getType(), tableName, xAxis, yAxis, extFilterList));
|
||||
data = datasourceProvider.getData(datasourceRequest);
|
||||
}
|
||||
|
||||
// 图表组件可再扩展
|
||||
@ -214,6 +225,24 @@ public class ChartViewService {
|
||||
return filter.toString();
|
||||
}
|
||||
|
||||
public Datasource dorisDatasource() {
|
||||
JSONObject jsonObject = new JSONObject();
|
||||
jsonObject.put("dataSourceType", "jdbc");
|
||||
jsonObject.put("dataBase", "example_db");
|
||||
jsonObject.put("username", "root");
|
||||
jsonObject.put("password", "dataease");
|
||||
jsonObject.put("host", "59.110.64.159");
|
||||
jsonObject.put("port", "9030");
|
||||
|
||||
Datasource datasource = new Datasource();
|
||||
datasource.setId("doris");
|
||||
datasource.setName("doris");
|
||||
datasource.setDesc("doris");
|
||||
datasource.setType("mysql");
|
||||
datasource.setConfiguration(jsonObject.toJSONString());
|
||||
return datasource;
|
||||
}
|
||||
|
||||
public String getSQL(String type, String table, List<ChartViewFieldDTO> xAxis, List<ChartViewFieldDTO> yAxis, List<ChartExtFilterRequest> extFilterRequestList) {
|
||||
DatasourceTypes datasourceType = DatasourceTypes.valueOf(type);
|
||||
switch (datasourceType) {
|
||||
@ -321,7 +350,7 @@ public class ChartViewService {
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<ChartView> viewsByIds(List<String> viewIds){
|
||||
public List<ChartView> viewsByIds(List<String> viewIds) {
|
||||
ChartViewExample example = new ChartViewExample();
|
||||
example.createCriteria().andIdIn(viewIds);
|
||||
return chartViewMapper.selectByExample(example);
|
||||
|
@ -637,11 +637,12 @@ public class DataSetTableService {
|
||||
|
||||
private String saveFile(MultipartFile file) throws Exception {
|
||||
String filename = file.getOriginalFilename();
|
||||
File p = new File(path);
|
||||
String dirPath = path + AuthUtils.getUser().getUsername() + "/";
|
||||
File p = new File(dirPath);
|
||||
if (!p.exists()) {
|
||||
p.mkdirs();
|
||||
}
|
||||
String filePath = path + AuthUtils.getUser().getUsername() + "/" + filename;
|
||||
String filePath = dirPath + filename;
|
||||
File f = new File(filePath);
|
||||
FileOutputStream fileOutputStream = new FileOutputStream(f);
|
||||
fileOutputStream.write(file.getBytes());
|
||||
|
@ -1,7 +1,6 @@
|
||||
package io.dataease.service.dataset;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.sun.org.apache.bcel.internal.generic.SWITCH;
|
||||
import io.dataease.base.domain.*;
|
||||
import io.dataease.base.mapper.DatasourceMapper;
|
||||
import io.dataease.commons.constants.JobStatus;
|
||||
@ -13,31 +12,15 @@ import io.dataease.datasource.constants.DatasourceTypes;
|
||||
import io.dataease.datasource.dto.MysqlConfigrationDTO;
|
||||
import io.dataease.dto.dataset.DataSetTaskLogDTO;
|
||||
import io.dataease.dto.dataset.DataTableInfoDTO;
|
||||
import io.dataease.service.spark.SparkCalc;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.*;
|
||||
import org.pentaho.big.data.api.cluster.NamedCluster;
|
||||
import org.pentaho.big.data.api.cluster.NamedClusterService;
|
||||
import org.pentaho.big.data.api.cluster.service.locator.NamedClusterServiceLocator;
|
||||
import org.pentaho.big.data.api.cluster.service.locator.impl.NamedClusterServiceLocatorImpl;
|
||||
import org.pentaho.big.data.api.initializer.ClusterInitializer;
|
||||
import org.pentaho.big.data.api.initializer.ClusterInitializerProvider;
|
||||
import org.pentaho.big.data.api.initializer.impl.ClusterInitializerImpl;
|
||||
import org.pentaho.big.data.impl.cluster.NamedClusterImpl;
|
||||
import org.pentaho.big.data.impl.cluster.NamedClusterManager;
|
||||
import org.pentaho.big.data.kettle.plugins.hbase.MappingDefinition;
|
||||
import org.pentaho.big.data.kettle.plugins.hbase.output.HBaseOutputMeta;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.pentaho.di.cluster.SlaveServer;
|
||||
import org.pentaho.di.core.KettleEnvironment;
|
||||
import org.pentaho.di.core.database.DatabaseMeta;
|
||||
import org.pentaho.di.core.plugins.PluginRegistry;
|
||||
import org.pentaho.di.core.plugins.StepPluginType;
|
||||
import org.pentaho.di.core.util.EnvUtil;
|
||||
import org.pentaho.di.engine.configuration.impl.pentaho.DefaultRunConfiguration;
|
||||
import org.pentaho.di.job.Job;
|
||||
import org.pentaho.di.job.JobExecutionConfiguration;
|
||||
import org.pentaho.di.job.JobHopMeta;
|
||||
@ -45,49 +28,25 @@ import org.pentaho.di.job.JobMeta;
|
||||
import org.pentaho.di.job.entries.special.JobEntrySpecial;
|
||||
import org.pentaho.di.job.entries.success.JobEntrySuccess;
|
||||
import org.pentaho.di.job.entries.trans.JobEntryTrans;
|
||||
import org.pentaho.di.job.entries.writetolog.JobEntryWriteToLog;
|
||||
import org.pentaho.di.job.entry.JobEntryCopy;
|
||||
import org.pentaho.di.repository.RepositoryDirectoryInterface;
|
||||
import org.pentaho.di.repository.filerep.KettleFileRepository;
|
||||
import org.pentaho.di.repository.filerep.KettleFileRepositoryMeta;
|
||||
import org.pentaho.di.trans.TransConfiguration;
|
||||
import org.pentaho.di.trans.TransExecutionConfiguration;
|
||||
import org.pentaho.di.trans.TransHopMeta;
|
||||
import org.pentaho.di.trans.TransMeta;
|
||||
import org.pentaho.di.trans.step.StepMeta;
|
||||
import org.pentaho.di.trans.steps.tableinput.TableInputMeta;
|
||||
import org.pentaho.di.trans.steps.textfileoutput.TextFileField;
|
||||
import org.pentaho.di.trans.steps.textfileoutput.TextFileOutput;
|
||||
import org.pentaho.di.trans.steps.textfileoutput.TextFileOutputMeta;
|
||||
import org.pentaho.di.trans.steps.userdefinedjavaclass.InfoStepDefinition;
|
||||
import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassDef;
|
||||
import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassMeta;
|
||||
import org.pentaho.di.www.SlaveServerJobStatus;
|
||||
import org.pentaho.runtime.test.RuntimeTest;
|
||||
import org.pentaho.runtime.test.RuntimeTester;
|
||||
import org.pentaho.runtime.test.action.RuntimeTestActionHandler;
|
||||
import org.pentaho.runtime.test.action.RuntimeTestActionService;
|
||||
import org.pentaho.runtime.test.action.impl.RuntimeTestActionServiceImpl;
|
||||
import org.pentaho.runtime.test.impl.RuntimeTesterImpl;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.pentaho.di.core.row.ValueMetaInterface;
|
||||
import scala.annotation.meta.field;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import javax.sound.sampled.Line;
|
||||
import java.io.File;
|
||||
import java.security.MessageDigest;
|
||||
import java.sql.ResultSet;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
@Service
|
||||
public class ExtractDataService {
|
||||
|
||||
@ -125,8 +84,8 @@ public class ExtractDataService {
|
||||
@Value("${hbase.zookeeper.property.clientPort:2181}")
|
||||
private String zkPort;
|
||||
|
||||
@Resource
|
||||
private SparkCalc sparkCalc;
|
||||
// @Resource
|
||||
// private SparkCalc sparkCalc;
|
||||
|
||||
|
||||
public void extractData(String datasetTableId, String taskId, String type) {
|
||||
|
@ -1,308 +1,407 @@
|
||||
package io.dataease.service.spark;
|
||||
|
||||
import io.dataease.base.domain.DatasetTableField;
|
||||
import io.dataease.commons.utils.CommonBeanFactory;
|
||||
import io.dataease.controller.request.chart.ChartExtFilterRequest;
|
||||
import io.dataease.dto.chart.ChartViewFieldDTO;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.ObjectUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
|
||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.types.DataTypes;
|
||||
import org.apache.spark.sql.types.StructField;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import org.apache.spark.storage.StorageLevel;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.stereotype.Service;
|
||||
import scala.Tuple2;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @Author gin
|
||||
* @Date 2021/3/26 3:49 下午
|
||||
*/
|
||||
@Service
|
||||
public class SparkCalc {
|
||||
private static String column_family = "dataease";
|
||||
private static String data_path = "/opt/dataease/data/db/";
|
||||
@Resource
|
||||
private Environment env; // 保存了配置文件的信息
|
||||
|
||||
public List<String[]> getData(String hTable, List<DatasetTableField> fields, List<ChartViewFieldDTO> xAxis, List<ChartViewFieldDTO> yAxis, String tmpTable, List<ChartExtFilterRequest> requestList) throws Exception {
|
||||
// Spark Context
|
||||
SparkSession spark = CommonBeanFactory.getBean(SparkSession.class);
|
||||
JavaSparkContext sparkContext = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
// Spark SQL Context
|
||||
SQLContext sqlContext = new SQLContext(sparkContext);
|
||||
sqlContext.setConf("spark.sql.shuffle.partitions", env.getProperty("spark.sql.shuffle.partitions", "1"));
|
||||
sqlContext.setConf("spark.default.parallelism", env.getProperty("spark.default.parallelism", "1"));
|
||||
|
||||
Dataset<Row> dataFrame = getData(sparkContext, sqlContext, hTable, fields);
|
||||
//package io.dataease.service.spark;
|
||||
//
|
||||
//import io.dataease.base.domain.DatasetTableField;
|
||||
//import io.dataease.commons.utils.CommonBeanFactory;
|
||||
//import io.dataease.controller.request.chart.ChartExtFilterRequest;
|
||||
//import io.dataease.dto.chart.ChartViewFieldDTO;
|
||||
//import org.antlr.analysis.MachineProbe;
|
||||
//import org.apache.commons.collections4.CollectionUtils;
|
||||
//import org.apache.commons.lang3.ObjectUtils;
|
||||
//import org.apache.commons.lang3.StringUtils;
|
||||
//import org.apache.hadoop.hbase.client.Result;
|
||||
//import org.apache.hadoop.hbase.client.Scan;
|
||||
//import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
//import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
|
||||
//import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
//import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
|
||||
//import org.apache.hadoop.hbase.util.Bytes;
|
||||
//import org.apache.spark.api.java.JavaPairRDD;
|
||||
//import org.apache.spark.api.java.JavaRDD;
|
||||
//import org.apache.spark.api.java.JavaSparkContext;
|
||||
//import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
//import org.apache.spark.api.java.function.Function;
|
||||
//import org.apache.spark.sql.*;
|
||||
//import org.apache.spark.sql.types.DataTypes;
|
||||
//import org.apache.spark.sql.types.StructField;
|
||||
//import org.apache.spark.sql.types.StructType;
|
||||
//import org.apache.spark.storage.StorageLevel;
|
||||
//import org.springframework.core.env.Environment;
|
||||
//import org.springframework.stereotype.Service;
|
||||
//import scala.Tuple2;
|
||||
//
|
||||
//import javax.annotation.Resource;
|
||||
//import java.math.BigDecimal;
|
||||
//import java.text.MessageFormat;
|
||||
//import java.util.*;
|
||||
//
|
||||
///**
|
||||
// * @Author gin
|
||||
// * @Date 2021/3/26 3:49 下午
|
||||
// */
|
||||
//@Service
|
||||
//public class SparkCalc {
|
||||
// private static String column_family = "dataease";
|
||||
// private static String data_path = "/opt/dataease/data/db/";
|
||||
// @Resource
|
||||
// private Environment env; // 保存了配置文件的信息
|
||||
//
|
||||
// public List<String[]> getData(String hTable, List<DatasetTableField> fields, List<ChartViewFieldDTO> xAxis, List<ChartViewFieldDTO> yAxis, String tmpTable, List<ChartExtFilterRequest> requestList) throws Exception {
|
||||
// // Spark Context
|
||||
// SparkSession spark = CommonBeanFactory.getBean(SparkSession.class);
|
||||
// JavaSparkContext sparkContext = new JavaSparkContext(spark.sparkContext());
|
||||
//
|
||||
// // Spark SQL Context
|
||||
// SQLContext sqlContext = new SQLContext(sparkContext);
|
||||
// sqlContext.setConf("spark.sql.shuffle.partitions", env.getProperty("spark.sql.shuffle.partitions", "1"));
|
||||
// sqlContext.setConf("spark.default.parallelism", env.getProperty("spark.default.parallelism", "1"));
|
||||
//
|
||||
// /*Map<String, BigDecimal> dataFrame = getData(sparkContext, sqlContext, hTable, fields);
|
||||
// List<String[]> data = new ArrayList<>();
|
||||
// Iterator<Map.Entry<String, BigDecimal>> iterator = dataFrame.entrySet().iterator();
|
||||
// while (iterator.hasNext()) {
|
||||
// String[] r = new String[2];
|
||||
// Map.Entry<String, BigDecimal> next = iterator.next();
|
||||
// String key = next.getKey();
|
||||
// BigDecimal value = next.getValue();
|
||||
// r[0] = key;
|
||||
// r[1] = value.toString();
|
||||
// data.add(r);
|
||||
// }*/
|
||||
//
|
||||
//// Dataset<Row> dataFrame = getData(sparkContext, sqlContext, hTable, fields);
|
||||
// Dataset<Row> dataFrame = CacheUtil.getInstance().getCacheData(hTable);
|
||||
// if (ObjectUtils.isEmpty(dataFrame)) {
|
||||
// dataFrame = getData(sparkContext, sqlContext, hTable, fields);
|
||||
// dataFrame = getHBaseDataAndCache(sparkContext, sqlContext, hTable, fields);
|
||||
// }
|
||||
|
||||
dataFrame.createOrReplaceTempView( tmpTable);
|
||||
Dataset<Row> sql = sqlContext.sql(getSQL(xAxis, yAxis, tmpTable, requestList));
|
||||
// transform
|
||||
List<String[]> data = new ArrayList<>();
|
||||
List<Row> list = sql.collectAsList();
|
||||
for (Row row : list) {
|
||||
String[] r = new String[row.length()];
|
||||
for (int i = 0; i < row.length(); i++) {
|
||||
r[i] = row.get(i) == null ? "null" : row.get(i).toString();
|
||||
}
|
||||
data.add(r);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
public Dataset<Row> getHBaseDataAndCache(String hTable, List<DatasetTableField> fields) throws Exception {
|
||||
// Spark Context
|
||||
SparkSession spark = CommonBeanFactory.getBean(SparkSession.class);
|
||||
JavaSparkContext sparkContext = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
// Spark SQL Context
|
||||
SQLContext sqlContext = new SQLContext(sparkContext);
|
||||
sqlContext.setConf("spark.sql.shuffle.partitions", env.getProperty("spark.sql.shuffle.partitions", "1"));
|
||||
sqlContext.setConf("spark.default.parallelism", env.getProperty("spark.default.parallelism", "1"));
|
||||
return getHBaseDataAndCache(sparkContext, sqlContext, hTable, fields);
|
||||
}
|
||||
|
||||
public Dataset<Row> getData(JavaSparkContext sparkContext, SQLContext sqlContext, String tableId, List<DatasetTableField> fields) throws Exception {
|
||||
fields.sort((o1, o2) -> {
|
||||
if (o1.getOriginName() == null) {
|
||||
return -1;
|
||||
}
|
||||
if (o2.getOriginName() == null) {
|
||||
return 1;
|
||||
}
|
||||
return o1.getOriginName().compareTo(o2.getOriginName());
|
||||
});
|
||||
|
||||
JavaRDD<String> pairRDD = sparkContext.textFile(data_path + tableId + ".txt");
|
||||
|
||||
JavaRDD<Row> rdd = pairRDD.mapPartitions( (FlatMapFunction<java.util.Iterator<String>, Row>) tuple2Iterator -> {
|
||||
List<Row> iterator = new ArrayList<>();
|
||||
while (tuple2Iterator.hasNext()) {
|
||||
String[] items = tuple2Iterator.next().split(";");
|
||||
List<Object> list = new ArrayList<>();
|
||||
for(int i=0; i<items.length; i++){
|
||||
String l = items[i];
|
||||
DatasetTableField x = fields.get(i);
|
||||
if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
list.add(l);
|
||||
} else if (x.getDeType() == 2) {
|
||||
if (StringUtils.isEmpty(l)) {
|
||||
l = "0";
|
||||
}
|
||||
if (StringUtils.equalsIgnoreCase(l,"Y")) {
|
||||
l = "1";
|
||||
}
|
||||
if (StringUtils.equalsIgnoreCase(l,"N")) {
|
||||
l = "0";
|
||||
}
|
||||
list.add(Long.valueOf(l));
|
||||
} else if (x.getDeType() == 3) {
|
||||
if (StringUtils.isEmpty(l)) {
|
||||
l = "0.0";
|
||||
}
|
||||
list.add(Double.valueOf(l));
|
||||
}
|
||||
}
|
||||
iterator.add(RowFactory.create(list.toArray()));
|
||||
}
|
||||
return iterator.iterator();
|
||||
});
|
||||
|
||||
List<StructField> structFields = new ArrayList<>();
|
||||
// struct顺序要与rdd顺序一致
|
||||
fields.forEach(x -> {
|
||||
if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.StringType, true));
|
||||
} else if (x.getDeType() == 2) {
|
||||
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.LongType, true));
|
||||
} else if (x.getDeType() == 3) {
|
||||
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.DoubleType, true));
|
||||
}
|
||||
});
|
||||
StructType structType = DataTypes.createStructType(structFields);
|
||||
|
||||
Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType);
|
||||
return dataFrame;
|
||||
}
|
||||
|
||||
public Dataset<Row> getHBaseDataAndCache(JavaSparkContext sparkContext, SQLContext sqlContext, String hTable, List<DatasetTableField> fields) throws Exception {
|
||||
Scan scan = new Scan();
|
||||
scan.addFamily(Bytes.toBytes(column_family));
|
||||
for (DatasetTableField field : fields) {
|
||||
scan.addColumn(Bytes.toBytes(column_family), Bytes.toBytes(field.getOriginName()));
|
||||
}
|
||||
ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
|
||||
String scanToString = new String(Base64.getEncoder().encode(proto.toByteArray()));
|
||||
|
||||
// HBase config
|
||||
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
|
||||
conf.set("hbase.zookeeper.quorum", env.getProperty("hbase.zookeeper.quorum"));
|
||||
conf.set("hbase.zookeeper.property.clientPort", env.getProperty("hbase.zookeeper.property.clientPort"));
|
||||
conf.set("hbase.client.retries.number", env.getProperty("hbase.client.retries.number", "1"));
|
||||
conf.set(TableInputFormat.INPUT_TABLE, hTable);
|
||||
conf.set(TableInputFormat.SCAN, scanToString);
|
||||
|
||||
JavaPairRDD<ImmutableBytesWritable, Result> pairRDD = sparkContext.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
|
||||
|
||||
JavaRDD<Row> rdd = pairRDD.mapPartitions((FlatMapFunction<Iterator<Tuple2<ImmutableBytesWritable, Result>>, Row>) tuple2Iterator -> {
|
||||
List<Row> iterator = new ArrayList<>();
|
||||
while (tuple2Iterator.hasNext()) {
|
||||
Result result = tuple2Iterator.next()._2;
|
||||
List<Object> list = new ArrayList<>();
|
||||
fields.forEach(x -> {
|
||||
String l = Bytes.toString(result.getValue(column_family.getBytes(), x.getOriginName().getBytes()));
|
||||
if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
list.add(l);
|
||||
} else if (x.getDeType() == 2) {
|
||||
if (StringUtils.isEmpty(l)) {
|
||||
l = "0";
|
||||
}
|
||||
list.add(Long.valueOf(l));
|
||||
} else if (x.getDeType() == 3) {
|
||||
if (StringUtils.isEmpty(l)) {
|
||||
l = "0.0";
|
||||
}
|
||||
list.add(Double.valueOf(l));
|
||||
}
|
||||
});
|
||||
iterator.add(RowFactory.create(list.toArray()));
|
||||
}
|
||||
return iterator.iterator();
|
||||
});
|
||||
|
||||
List<StructField> structFields = new ArrayList<>();
|
||||
// struct顺序要与rdd顺序一致
|
||||
fields.forEach(x -> {
|
||||
if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.StringType, true));
|
||||
} else if (x.getDeType() == 2) {
|
||||
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.LongType, true));
|
||||
} else if (x.getDeType() == 3) {
|
||||
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.DoubleType, true));
|
||||
}
|
||||
});
|
||||
StructType structType = DataTypes.createStructType(structFields);
|
||||
|
||||
Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType).persist(StorageLevel.MEMORY_AND_DISK_SER());
|
||||
//
|
||||
// dataFrame.createOrReplaceTempView(tmpTable);
|
||||
// Dataset<Row> sql = sqlContext.sql(getSQL(xAxis, yAxis, tmpTable, requestList));
|
||||
// // transform
|
||||
// List<String[]> data = new ArrayList<>();
|
||||
// List<Row> list = sql.collectAsList();
|
||||
// for (Row row : list) {
|
||||
// String[] r = new String[row.length()];
|
||||
// for (int i = 0; i < row.length(); i++) {
|
||||
// r[i] = row.get(i) == null ? "null" : row.get(i).toString();
|
||||
// }
|
||||
// data.add(r);
|
||||
// }
|
||||
// return data;
|
||||
// }
|
||||
//
|
||||
// public Dataset<Row> getHBaseDataAndCache(String hTable, List<DatasetTableField> fields) throws Exception {
|
||||
// // Spark Context
|
||||
// SparkSession spark = CommonBeanFactory.getBean(SparkSession.class);
|
||||
// JavaSparkContext sparkContext = new JavaSparkContext(spark.sparkContext());
|
||||
//
|
||||
// // Spark SQL Context
|
||||
// SQLContext sqlContext = new SQLContext(sparkContext);
|
||||
// sqlContext.setConf("spark.sql.shuffle.partitions", env.getProperty("spark.sql.shuffle.partitions", "1"));
|
||||
// sqlContext.setConf("spark.default.parallelism", env.getProperty("spark.default.parallelism", "1"));
|
||||
// return getHBaseDataAndCache(sparkContext, sqlContext, hTable, fields);
|
||||
// }
|
||||
//
|
||||
// public Map<String, BigDecimal> getData(JavaSparkContext sparkContext, SQLContext sqlContext, String tableId, List<DatasetTableField> fields) throws Exception {
|
||||
// fields.sort((o1, o2) -> {
|
||||
// if (o1.getOriginName() == null) {
|
||||
// return -1;
|
||||
// }
|
||||
// if (o2.getOriginName() == null) {
|
||||
// return 1;
|
||||
// }
|
||||
// return o1.getOriginName().compareTo(o2.getOriginName());
|
||||
// });
|
||||
//
|
||||
// JavaRDD<String> pairRDD = sparkContext.textFile(data_path + tableId + ".txt");
|
||||
//// System.out.println(pairRDD.count());
|
||||
//
|
||||
//// JavaRDD<Map.Entry<String, BigDecimal>> rdd = pairRDD.map((Function<String, Map.Entry<String, BigDecimal>>) v1 -> {
|
||||
//// Map<String, BigDecimal> map = new HashMap<>();
|
||||
//// String[] items = v1.split(";");
|
||||
//// String day = null;
|
||||
//// BigDecimal res = new BigDecimal(0);
|
||||
//// for (int i = 0; i < items.length; i++) {
|
||||
//// String l = items[i];
|
||||
//// DatasetTableField x = fields.get(i);
|
||||
//// if (x.getOriginName().equalsIgnoreCase("sync_day")) {
|
||||
//// day = l;
|
||||
//// }
|
||||
//// if (x.getOriginName().equalsIgnoreCase("usage_cost")) {
|
||||
//// res = new BigDecimal(l);
|
||||
//// }
|
||||
//// }
|
||||
//// BigDecimal bigDecimal = map.get(day);
|
||||
//// if (bigDecimal == null) {
|
||||
//// map.put(day, res);
|
||||
//// } else {
|
||||
//// map.put(day, bigDecimal.add(res));
|
||||
//// }
|
||||
//// return map.entrySet().iterator().next();
|
||||
//// });
|
||||
//
|
||||
// JavaRDD<Map.Entry<String, BigDecimal>> rdd = pairRDD.mapPartitions((FlatMapFunction<java.util.Iterator<String>, Map.Entry<String, BigDecimal>>) tuple2Iterator -> {
|
||||
// Map<String, BigDecimal> map = new HashMap<>();
|
||||
// while (tuple2Iterator.hasNext()) {
|
||||
// String[] items = tuple2Iterator.next().split(";");
|
||||
// String day = null;
|
||||
// BigDecimal res = new BigDecimal(0);
|
||||
// for (int i = 0; i < items.length; i++) {
|
||||
// String l = items[i];
|
||||
// DatasetTableField x = fields.get(i);
|
||||
// if (x.getOriginName().equalsIgnoreCase("sync_day")) {
|
||||
// day = l;
|
||||
// }
|
||||
// if (x.getOriginName().equalsIgnoreCase("usage_cost")) {
|
||||
// res = new BigDecimal(l);
|
||||
// }
|
||||
// }
|
||||
// BigDecimal bigDecimal = map.get(day);
|
||||
// if (bigDecimal == null) {
|
||||
// map.put(day, res);
|
||||
// } else {
|
||||
// map.put(day, bigDecimal.add(res));
|
||||
// }
|
||||
// }
|
||||
// return map.entrySet().iterator();
|
||||
// });
|
||||
//
|
||||
//
|
||||
//// System.out.println(rdd.count());
|
||||
//
|
||||
// Map<String, BigDecimal> map = new HashMap<>();
|
||||
// List<Map.Entry<String, BigDecimal>> collect = rdd.collect();
|
||||
//// System.out.println(collect.size());
|
||||
//
|
||||
// collect.forEach(stringBigDecimalEntry -> {
|
||||
// String key = stringBigDecimalEntry.getKey();
|
||||
// BigDecimal value = stringBigDecimalEntry.getValue();
|
||||
//
|
||||
// BigDecimal bigDecimal = map.get(key);
|
||||
// if (bigDecimal == null) {
|
||||
// map.put(key, value);
|
||||
// } else {
|
||||
// map.put(key, bigDecimal.add(value));
|
||||
// }
|
||||
// });
|
||||
//
|
||||
// return map;
|
||||
// }
|
||||
//
|
||||
//// public Dataset<Row> getData(JavaSparkContext sparkContext, SQLContext sqlContext, String tableId, List<DatasetTableField> fields) throws Exception {
|
||||
//// fields.sort((o1, o2) -> {
|
||||
//// if (o1.getOriginName() == null) {
|
||||
//// return -1;
|
||||
//// }
|
||||
//// if (o2.getOriginName() == null) {
|
||||
//// return 1;
|
||||
//// }
|
||||
//// return o1.getOriginName().compareTo(o2.getOriginName());
|
||||
//// });
|
||||
////
|
||||
//// JavaRDD<String> pairRDD = sparkContext.textFile(data_path + tableId + ".txt");
|
||||
////
|
||||
//// JavaRDD<Row> rdd = pairRDD.mapPartitions((FlatMapFunction<java.util.Iterator<String>, Row>) tuple2Iterator -> {
|
||||
//// List<Row> iterator = new ArrayList<>();
|
||||
//// while (tuple2Iterator.hasNext()) {
|
||||
//// String[] items = tuple2Iterator.next().split(";");
|
||||
//// List<Object> list = new ArrayList<>();
|
||||
//// for (int i = 0; i < items.length; i++) {
|
||||
//// String l = items[i];
|
||||
//// DatasetTableField x = fields.get(i);
|
||||
//// if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
//// list.add(l);
|
||||
//// } else if (x.getDeType() == 2) {
|
||||
//// if (StringUtils.isEmpty(l)) {
|
||||
//// l = "0";
|
||||
//// }
|
||||
//// if (StringUtils.equalsIgnoreCase(l, "Y")) {
|
||||
//// l = "1";
|
||||
//// }
|
||||
//// if (StringUtils.equalsIgnoreCase(l, "N")) {
|
||||
//// l = "0";
|
||||
//// }
|
||||
//// list.add(Long.valueOf(l));
|
||||
//// } else if (x.getDeType() == 3) {
|
||||
//// if (StringUtils.isEmpty(l)) {
|
||||
//// l = "0.0";
|
||||
//// }
|
||||
//// list.add(Double.valueOf(l));
|
||||
//// }
|
||||
//// }
|
||||
//// iterator.add(RowFactory.create(list.toArray()));
|
||||
//// }
|
||||
//// return iterator.iterator();
|
||||
//// });
|
||||
////
|
||||
//// List<StructField> structFields = new ArrayList<>();
|
||||
//// // struct顺序要与rdd顺序一致
|
||||
//// fields.forEach(x -> {
|
||||
//// if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
//// structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.StringType, true));
|
||||
//// } else if (x.getDeType() == 2) {
|
||||
//// structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.LongType, true));
|
||||
//// } else if (x.getDeType() == 3) {
|
||||
//// structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.DoubleType, true));
|
||||
//// }
|
||||
//// });
|
||||
//// StructType structType = DataTypes.createStructType(structFields);
|
||||
////
|
||||
//// Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType);
|
||||
//// return dataFrame;
|
||||
//// }
|
||||
//
|
||||
// public Dataset<Row> getHBaseDataAndCache(JavaSparkContext sparkContext, SQLContext sqlContext, String hTable, List<DatasetTableField> fields) throws Exception {
|
||||
// Scan scan = new Scan();
|
||||
// scan.addFamily(Bytes.toBytes(column_family));
|
||||
// for (DatasetTableField field : fields) {
|
||||
// scan.addColumn(Bytes.toBytes(column_family), Bytes.toBytes(field.getOriginName()));
|
||||
// }
|
||||
// ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
|
||||
// String scanToString = new String(Base64.getEncoder().encode(proto.toByteArray()));
|
||||
//
|
||||
// // HBase config
|
||||
// org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
|
||||
// conf.set("hbase.zookeeper.quorum", env.getProperty("hbase.zookeeper.quorum"));
|
||||
// conf.set("hbase.zookeeper.property.clientPort", env.getProperty("hbase.zookeeper.property.clientPort"));
|
||||
// conf.set("hbase.client.retries.number", env.getProperty("hbase.client.retries.number", "1"));
|
||||
// conf.set(TableInputFormat.INPUT_TABLE, hTable);
|
||||
// conf.set(TableInputFormat.SCAN, scanToString);
|
||||
//
|
||||
// JavaPairRDD<ImmutableBytesWritable, Result> pairRDD = sparkContext.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
|
||||
//
|
||||
// JavaRDD<Row> rdd = pairRDD.mapPartitions((FlatMapFunction<Iterator<Tuple2<ImmutableBytesWritable, Result>>, Row>) tuple2Iterator -> {
|
||||
// List<Row> iterator = new ArrayList<>();
|
||||
// while (tuple2Iterator.hasNext()) {
|
||||
// Result result = tuple2Iterator.next()._2;
|
||||
// List<Object> list = new ArrayList<>();
|
||||
// fields.forEach(x -> {
|
||||
// String l = Bytes.toString(result.getValue(column_family.getBytes(), x.getOriginName().getBytes()));
|
||||
// if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
// list.add(l);
|
||||
// } else if (x.getDeType() == 2) {
|
||||
// if (StringUtils.isEmpty(l)) {
|
||||
// l = "0";
|
||||
// }
|
||||
// list.add(Long.valueOf(l));
|
||||
// } else if (x.getDeType() == 3) {
|
||||
// if (StringUtils.isEmpty(l)) {
|
||||
// l = "0.0";
|
||||
// }
|
||||
// list.add(Double.valueOf(l));
|
||||
// }
|
||||
// });
|
||||
// iterator.add(RowFactory.create(list.toArray()));
|
||||
// }
|
||||
// return iterator.iterator();
|
||||
// });
|
||||
//
|
||||
// List<StructField> structFields = new ArrayList<>();
|
||||
// // struct顺序要与rdd顺序一致
|
||||
// fields.forEach(x -> {
|
||||
// if (x.getDeType() == 0 || x.getDeType() == 1) {
|
||||
// structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.StringType, true));
|
||||
// } else if (x.getDeType() == 2) {
|
||||
// structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.LongType, true));
|
||||
// } else if (x.getDeType() == 3) {
|
||||
// structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.DoubleType, true));
|
||||
// }
|
||||
// });
|
||||
// StructType structType = DataTypes.createStructType(structFields);
|
||||
//
|
||||
// Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType).persist(StorageLevel.MEMORY_AND_DISK_SER());
|
||||
// CacheUtil.getInstance().addCacheData(hTable, dataFrame);
|
||||
dataFrame.count();
|
||||
return dataFrame;
|
||||
}
|
||||
|
||||
public String getSQL(List<ChartViewFieldDTO> xAxis, List<ChartViewFieldDTO> yAxis, String table, List<ChartExtFilterRequest> extFilterRequestList) {
|
||||
// 字段汇总 排序等
|
||||
String[] field = yAxis.stream().map(y -> "CAST(" + y.getSummary() + "(" + y.getOriginName() + ") AS DECIMAL(20,2)) AS _" + y.getSummary() + "_" + y.getOriginName()).toArray(String[]::new);
|
||||
String[] group = xAxis.stream().map(ChartViewFieldDTO::getOriginName).toArray(String[]::new);
|
||||
String[] order = yAxis.stream().filter(y -> StringUtils.isNotEmpty(y.getSort()) && !StringUtils.equalsIgnoreCase(y.getSort(), "none"))
|
||||
.map(y -> "_" + y.getSummary() + "_" + y.getOriginName() + " " + y.getSort()).toArray(String[]::new);
|
||||
|
||||
String sql = MessageFormat.format("SELECT {0},{1} FROM {2} WHERE 1=1 {3} GROUP BY {4} ORDER BY null,{5}",
|
||||
StringUtils.join(group, ","),
|
||||
StringUtils.join(field, ","),
|
||||
table,
|
||||
transExtFilter(extFilterRequestList),// origin field filter and panel field filter,
|
||||
StringUtils.join(group, ","),
|
||||
StringUtils.join(order, ","));
|
||||
if (sql.endsWith(",")) {
|
||||
sql = sql.substring(0, sql.length() - 1);
|
||||
}
|
||||
// 如果是对结果字段过滤,则再包裹一层sql
|
||||
String[] resultFilter = yAxis.stream().filter(y -> CollectionUtils.isNotEmpty(y.getFilter()) && y.getFilter().size() > 0)
|
||||
.map(y -> {
|
||||
String[] s = y.getFilter().stream().map(f -> "AND _" + y.getSummary() + "_" + y.getOriginName() + transFilterTerm(f.getTerm()) + f.getValue()).toArray(String[]::new);
|
||||
return StringUtils.join(s, " ");
|
||||
}).toArray(String[]::new);
|
||||
if (resultFilter.length == 0) {
|
||||
return sql;
|
||||
} else {
|
||||
String filterSql = MessageFormat.format("SELECT * FROM {0} WHERE 1=1 {1}",
|
||||
"(" + sql + ") AS tmp",
|
||||
StringUtils.join(resultFilter, " "));
|
||||
return filterSql;
|
||||
}
|
||||
}
|
||||
|
||||
public String transFilterTerm(String term) {
|
||||
switch (term) {
|
||||
case "eq":
|
||||
return " = ";
|
||||
case "not_eq":
|
||||
return " <> ";
|
||||
case "lt":
|
||||
return " < ";
|
||||
case "le":
|
||||
return " <= ";
|
||||
case "gt":
|
||||
return " > ";
|
||||
case "ge":
|
||||
return " >= ";
|
||||
case "in":
|
||||
return " IN ";
|
||||
case "not in":
|
||||
return " NOT IN ";
|
||||
case "like":
|
||||
return " LIKE ";
|
||||
case "not like":
|
||||
return " NOT LIKE ";
|
||||
case "null":
|
||||
return " IS NULL ";
|
||||
case "not_null":
|
||||
return " IS NOT NULL ";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public String transExtFilter(List<ChartExtFilterRequest> requestList) {
|
||||
if (CollectionUtils.isEmpty(requestList)) {
|
||||
return "";
|
||||
}
|
||||
StringBuilder filter = new StringBuilder();
|
||||
for (ChartExtFilterRequest request : requestList) {
|
||||
List<String> value = request.getValue();
|
||||
if (CollectionUtils.isEmpty(value)) {
|
||||
continue;
|
||||
}
|
||||
DatasetTableField field = request.getDatasetTableField();
|
||||
filter.append(" AND ")
|
||||
.append(field.getOriginName())
|
||||
.append(" ")
|
||||
.append(transFilterTerm(request.getOperator()))
|
||||
.append(" ");
|
||||
if (StringUtils.containsIgnoreCase(request.getOperator(), "in")) {
|
||||
filter.append("('").append(StringUtils.join(value, "','")).append("')");
|
||||
} else if (StringUtils.containsIgnoreCase(request.getOperator(), "like")) {
|
||||
filter.append("'%").append(value.get(0)).append("%'");
|
||||
} else {
|
||||
filter.append("'").append(value.get(0)).append("'");
|
||||
}
|
||||
}
|
||||
return filter.toString();
|
||||
}
|
||||
}
|
||||
// dataFrame.count();
|
||||
// return dataFrame;
|
||||
// }
|
||||
//
|
||||
// public String getSQL(List<ChartViewFieldDTO> xAxis, List<ChartViewFieldDTO> yAxis, String table, List<ChartExtFilterRequest> extFilterRequestList) {
|
||||
// // 字段汇总 排序等
|
||||
// String[] field = yAxis.stream().map(y -> "CAST(" + y.getSummary() + "(" + y.getOriginName() + ") AS DECIMAL(20,2)) AS _" + y.getSummary() + "_" + y.getOriginName()).toArray(String[]::new);
|
||||
// String[] group = xAxis.stream().map(ChartViewFieldDTO::getOriginName).toArray(String[]::new);
|
||||
// String[] order = yAxis.stream().filter(y -> StringUtils.isNotEmpty(y.getSort()) && !StringUtils.equalsIgnoreCase(y.getSort(), "none"))
|
||||
// .map(y -> "_" + y.getSummary() + "_" + y.getOriginName() + " " + y.getSort()).toArray(String[]::new);
|
||||
//
|
||||
// String sql = MessageFormat.format("SELECT {0},{1} FROM {2} WHERE 1=1 {3} GROUP BY {4} ORDER BY null,{5}",
|
||||
// StringUtils.join(group, ","),
|
||||
// StringUtils.join(field, ","),
|
||||
// table,
|
||||
// transExtFilter(extFilterRequestList),// origin field filter and panel field filter,
|
||||
// StringUtils.join(group, ","),
|
||||
// StringUtils.join(order, ","));
|
||||
// if (sql.endsWith(",")) {
|
||||
// sql = sql.substring(0, sql.length() - 1);
|
||||
// }
|
||||
// // 如果是对结果字段过滤,则再包裹一层sql
|
||||
// String[] resultFilter = yAxis.stream().filter(y -> CollectionUtils.isNotEmpty(y.getFilter()) && y.getFilter().size() > 0)
|
||||
// .map(y -> {
|
||||
// String[] s = y.getFilter().stream().map(f -> "AND _" + y.getSummary() + "_" + y.getOriginName() + transFilterTerm(f.getTerm()) + f.getValue()).toArray(String[]::new);
|
||||
// return StringUtils.join(s, " ");
|
||||
// }).toArray(String[]::new);
|
||||
// if (resultFilter.length == 0) {
|
||||
// return sql;
|
||||
// } else {
|
||||
// String filterSql = MessageFormat.format("SELECT * FROM {0} WHERE 1=1 {1}",
|
||||
// "(" + sql + ") AS tmp",
|
||||
// StringUtils.join(resultFilter, " "));
|
||||
// return filterSql;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public String transFilterTerm(String term) {
|
||||
// switch (term) {
|
||||
// case "eq":
|
||||
// return " = ";
|
||||
// case "not_eq":
|
||||
// return " <> ";
|
||||
// case "lt":
|
||||
// return " < ";
|
||||
// case "le":
|
||||
// return " <= ";
|
||||
// case "gt":
|
||||
// return " > ";
|
||||
// case "ge":
|
||||
// return " >= ";
|
||||
// case "in":
|
||||
// return " IN ";
|
||||
// case "not in":
|
||||
// return " NOT IN ";
|
||||
// case "like":
|
||||
// return " LIKE ";
|
||||
// case "not like":
|
||||
// return " NOT LIKE ";
|
||||
// case "null":
|
||||
// return " IS NULL ";
|
||||
// case "not_null":
|
||||
// return " IS NOT NULL ";
|
||||
// default:
|
||||
// return "";
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public String transExtFilter(List<ChartExtFilterRequest> requestList) {
|
||||
// if (CollectionUtils.isEmpty(requestList)) {
|
||||
// return "";
|
||||
// }
|
||||
// StringBuilder filter = new StringBuilder();
|
||||
// for (ChartExtFilterRequest request : requestList) {
|
||||
// List<String> value = request.getValue();
|
||||
// if (CollectionUtils.isEmpty(value)) {
|
||||
// continue;
|
||||
// }
|
||||
// DatasetTableField field = request.getDatasetTableField();
|
||||
// filter.append(" AND ")
|
||||
// .append(field.getOriginName())
|
||||
// .append(" ")
|
||||
// .append(transFilterTerm(request.getOperator()))
|
||||
// .append(" ");
|
||||
// if (StringUtils.containsIgnoreCase(request.getOperator(), "in")) {
|
||||
// filter.append("('").append(StringUtils.join(value, "','")).append("')");
|
||||
// } else if (StringUtils.containsIgnoreCase(request.getOperator(), "like")) {
|
||||
// filter.append("'%").append(value.get(0)).append("%'");
|
||||
// } else {
|
||||
// filter.append("'").append(value.get(0)).append("'");
|
||||
// }
|
||||
// }
|
||||
// return filter.toString();
|
||||
// }
|
||||
//}
|
||||
|
@ -600,8 +600,8 @@ export default {
|
||||
avg: '平均',
|
||||
max: '最大值',
|
||||
min: '最小值',
|
||||
std: '标准差',
|
||||
var_samp: '方差',
|
||||
stddev_pop: '标准差',
|
||||
var_pop: '方差',
|
||||
quick_calc: '快速计算',
|
||||
show_name_set: '显示名设置',
|
||||
color: '颜色',
|
||||
|
@ -22,8 +22,8 @@
|
||||
<el-dropdown-item :command="beforeSummary('avg')">{{ $t('chart.avg') }}</el-dropdown-item>
|
||||
<el-dropdown-item :command="beforeSummary('max')">{{ $t('chart.max') }}</el-dropdown-item>
|
||||
<el-dropdown-item :command="beforeSummary('min')">{{ $t('chart.min') }}</el-dropdown-item>
|
||||
<el-dropdown-item :command="beforeSummary('std')">{{ $t('chart.std') }}</el-dropdown-item>
|
||||
<el-dropdown-item :command="beforeSummary('var_samp')">{{ $t('chart.var_samp') }}</el-dropdown-item>
|
||||
<el-dropdown-item :command="beforeSummary('stddev_pop')">{{ $t('chart.stddev_pop') }}</el-dropdown-item>
|
||||
<el-dropdown-item :command="beforeSummary('var_pop')">{{ $t('chart.var_pop') }}</el-dropdown-item>
|
||||
</el-dropdown-menu>
|
||||
</el-dropdown>
|
||||
</el-dropdown-item>
|
||||
|
Loading…
Reference in New Issue
Block a user