From 971b35c6c27831fa074d2fe9470da72fcbeae473 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Fri, 11 Aug 2023 17:50:18 +0800 Subject: [PATCH] [Improvement](statistics)Improve show analyze performance. (#22484) (#22882) --- .../apache/doris/statistics/AnalysisInfo.java | 17 ++++++++++++++++- .../doris/statistics/AnalysisInfoBuilder.java | 11 ++++++++++- .../doris/statistics/AnalysisManager.java | 18 +++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 592fb4a99babd6..ee39582aac4df0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -37,6 +37,7 @@ import java.lang.reflect.Type; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringJoiner; @@ -77,9 +78,14 @@ public enum ScheduleType { @SerializedName("jobId") public final long jobId; + // When this AnalysisInfo represent a task, this is the task id for it. @SerializedName("taskId") public final long taskId; + // When this AnalysisInfo represent a job, this is the list of task ids belong to this job. + @SerializedName("taskIds") + public final List taskIds; + @SerializedName("catalogName") public final String catalogName; @@ -153,7 +159,11 @@ public enum ScheduleType { @SerializedName("samplingPartition") public boolean samplingPartition; - public AnalysisInfo(long jobId, long taskId, String catalogName, String dbName, String tblName, + // For serialize + @SerializedName("cronExpr") + public String cronExprStr; + + public AnalysisInfo(long jobId, long taskId, List taskIds, String catalogName, String dbName, String tblName, Map> colToPartitions, Set partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, int samplePercent, int sampleRows, int maxBucketNum, long periodTimeInMs, String message, @@ -161,6 +171,7 @@ public AnalysisInfo(long jobId, long taskId, String catalogName, String dbName, boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition) { this.jobId = jobId; this.taskId = taskId; + this.taskIds = taskIds; this.catalogName = catalogName; this.dbName = dbName; this.tblName = tblName; @@ -231,6 +242,10 @@ public boolean isJob() { return taskId == -1; } + public void addTaskId(long taskId) { + taskIds.add(taskId); + } + // TODO: use thrift public static AnalysisInfo fromResultRow(ResultRow resultRow) { try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 7d8b99502ccc98..2fd0e25d727cc1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -23,12 +23,14 @@ import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.AnalysisInfo.ScheduleType; +import java.util.List; import java.util.Map; import java.util.Set; public class AnalysisInfoBuilder { private long jobId; private long taskId; + private List taskIds; private String catalogName; private String dbName; private String tblName; @@ -59,6 +61,7 @@ public AnalysisInfoBuilder() { public AnalysisInfoBuilder(AnalysisInfo info) { jobId = info.jobId; taskId = info.taskId; + taskIds = info.taskIds; catalogName = info.catalogName; dbName = info.dbName; tblName = info.tblName; @@ -94,6 +97,11 @@ public AnalysisInfoBuilder setTaskId(long taskId) { return this; } + public AnalysisInfoBuilder setTaskIds(List taskIds) { + this.taskIds = taskIds; + return this; + } + public AnalysisInfoBuilder setCatalogName(String catalogName) { this.catalogName = catalogName; return this; @@ -210,7 +218,7 @@ public AnalysisInfoBuilder setSamplingPartition(boolean samplingPartition) { } public AnalysisInfo build() { - return new AnalysisInfo(jobId, taskId, catalogName, dbName, tblName, colToPartitions, partitionNames, + return new AnalysisInfo(jobId, taskId, taskIds, catalogName, dbName, tblName, colToPartitions, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition); @@ -220,6 +228,7 @@ public AnalysisInfoBuilder copy() { return new AnalysisInfoBuilder() .setJobId(jobId) .setTaskId(taskId) + .setTaskIds(taskIds) .setCatalogName(catalogName) .setDbName(dbName) .setTblName(tblName) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index a7e545f98cc65f..e549bd6b0bda5a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -58,6 +58,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.logging.log4j.LogManager; @@ -439,6 +440,7 @@ private AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExcepti Map> colToPartitions = validateAndGetPartitions(table, columnNames, partitionNames, analysisType, analysisMode); taskInfoBuilder.setColToPartitions(colToPartitions); + taskInfoBuilder.setTaskIds(Lists.newArrayList()); return taskInfoBuilder.build(); } @@ -511,6 +513,7 @@ private void createTaskForMVIdx(AnalysisInfo jobInfo, Map showAnalysisJob(ShowAnalyzeStmt stmt) { } public String getJobProgress(long jobId) { - List tasks = findTasks(jobId); + List tasks = findTasksByTaskIds(jobId); + if (tasks == null) { + return "N/A"; + } int finished = 0; int failed = 0; int inProgress = 0; @@ -921,6 +929,14 @@ public List findTasks(long jobId) { } } + public List findTasksByTaskIds(long jobId) { + AnalysisInfo jobInfo = analysisJobInfoMap.get(jobId); + if (jobInfo != null && jobInfo.taskIds != null) { + return jobInfo.taskIds.stream().map(id -> analysisTaskInfoMap.get(id)).collect(Collectors.toList()); + } + return null; + } + public void removeAll(List analysisInfos) { for (AnalysisInfo analysisInfo : analysisInfos) { analysisTaskInfoMap.remove(analysisInfo.taskId);