-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Improvement][MasterServer] event response handle parallel (#7560)
* [Feature][dolphinscheduler-api] parse traceId in http header for Cross system delivery to #7237 (#7238) * to #7237 * rerun test Co-authored-by: honghuo.zw <[email protected]> * chery-pick 05aef27 and handle conflicts * to #7065: fix ExecutorService and schedulerService (#7072) Co-authored-by: honghuo.zw <[email protected]> * [Feature][dolphinscheduler-api] access control of taskDefinition and taskInstance in project to #7081 (#7082) * to #7081 * fix #7081 * to #7081 Co-authored-by: honghuo.zw <[email protected]> * chery-pick 8ebe060 and handle conflicts * cherry-pick 1f18444 and handle conflicts * fix #6807: dolphinscheduler.zookeeper.env_vars - > dolphinscheduler.registry.env_vars (#6808) Co-authored-by: honghuo.zw <[email protected]> Co-authored-by: Kirs <[email protected]> * add default constructor (#6780) Co-authored-by: honghuo.zw <[email protected]> * to #7108 (#7109) * to #7450 * to #7450: fix parallel bug * add index * expose config to user * fix bug * fix bug * add delay delete * fix bug * add License * fix ut * fix ut * fix name Co-authored-by: honghuo.zw <[email protected]> Co-authored-by: Kirs <[email protected]>
- Loading branch information
1 parent
8fa61cf
commit f8942bf
Showing
12 changed files
with
383 additions
and
107 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
189 changes: 189 additions & 0 deletions
189
.../org/apache/dolphinscheduler/server/master/processor/queue/TaskResponsePersistThread.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.dolphinscheduler.server.master.processor.queue; | ||
|
||
import org.apache.dolphinscheduler.common.enums.Event; | ||
import org.apache.dolphinscheduler.common.enums.ExecutionStatus; | ||
import org.apache.dolphinscheduler.common.enums.StateEvent; | ||
import org.apache.dolphinscheduler.common.enums.StateEventType; | ||
import org.apache.dolphinscheduler.dao.entity.TaskInstance; | ||
import org.apache.dolphinscheduler.remote.command.DBTaskAckCommand; | ||
import org.apache.dolphinscheduler.remote.command.DBTaskResponseCommand; | ||
import org.apache.dolphinscheduler.server.master.runner.WorkflowExecuteThread; | ||
import org.apache.dolphinscheduler.service.process.ProcessService; | ||
|
||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.ConcurrentLinkedQueue; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import io.netty.channel.Channel; | ||
|
||
public class TaskResponsePersistThread implements Runnable { | ||
|
||
/** | ||
* logger of TaskResponsePersistThread | ||
*/ | ||
private static final Logger logger = LoggerFactory.getLogger(TaskResponsePersistThread.class); | ||
|
||
private final ConcurrentLinkedQueue<TaskResponseEvent> events = new ConcurrentLinkedQueue<>(); | ||
|
||
private final Integer processInstanceId; | ||
|
||
/** | ||
* process service | ||
*/ | ||
private ProcessService processService; | ||
|
||
private ConcurrentHashMap<Integer, WorkflowExecuteThread> processInstanceMapper; | ||
|
||
public TaskResponsePersistThread(ProcessService processService, | ||
ConcurrentHashMap<Integer, WorkflowExecuteThread> processInstanceMapper, | ||
Integer processInstanceId) { | ||
this.processService = processService; | ||
this.processInstanceMapper = processInstanceMapper; | ||
this.processInstanceId = processInstanceId; | ||
} | ||
|
||
@Override | ||
public void run() { | ||
while (!this.events.isEmpty()) { | ||
TaskResponseEvent event = this.events.peek(); | ||
try { | ||
boolean result = persist(event); | ||
if (!result) { | ||
logger.error("persist meta error, task id:{}, instance id:{}", event.getTaskInstanceId(), event.getProcessInstanceId()); | ||
} | ||
} catch (Exception e) { | ||
logger.error("persist error, task id:{}, instance id:{}", event.getTaskInstanceId(), event.getProcessInstanceId(), e); | ||
} finally { | ||
this.events.remove(event); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* persist taskResponseEvent | ||
* | ||
* @param taskResponseEvent taskResponseEvent | ||
*/ | ||
private boolean persist(TaskResponseEvent taskResponseEvent) { | ||
Event event = taskResponseEvent.getEvent(); | ||
Channel channel = taskResponseEvent.getChannel(); | ||
|
||
TaskInstance taskInstance = processService.findTaskInstanceById(taskResponseEvent.getTaskInstanceId()); | ||
|
||
boolean result = true; | ||
|
||
switch (event) { | ||
case ACK: | ||
try { | ||
if (taskInstance != null) { | ||
ExecutionStatus status = taskInstance.getState().typeIsFinished() ? taskInstance.getState() : taskResponseEvent.getState(); | ||
processService.changeTaskState(taskInstance, status, | ||
taskResponseEvent.getStartTime(), | ||
taskResponseEvent.getWorkerAddress(), | ||
taskResponseEvent.getExecutePath(), | ||
taskResponseEvent.getLogPath(), | ||
taskResponseEvent.getTaskInstanceId()); | ||
logger.debug("changeTaskState in ACK , changed in meta:{} ,task instance state:{}, task response event state:{}, taskInstance id:{},taskInstance host:{}", | ||
result, taskInstance.getState(), taskResponseEvent.getState(), taskInstance.getId(), taskInstance.getHost()); | ||
} | ||
// if taskInstance is null (maybe deleted) . retry will be meaningless . so ack success | ||
DBTaskAckCommand taskAckCommand = new DBTaskAckCommand(ExecutionStatus.SUCCESS.getCode(), taskResponseEvent.getTaskInstanceId()); | ||
channel.writeAndFlush(taskAckCommand.convert2Command()); | ||
logger.debug("worker ack master success, taskInstance id:{},taskInstance host:{}", taskInstance.getId(), taskInstance.getHost()); | ||
} catch (Exception e) { | ||
result = false; | ||
logger.error("worker ack master error", e); | ||
DBTaskAckCommand taskAckCommand = new DBTaskAckCommand(ExecutionStatus.FAILURE.getCode(), taskInstance == null ? -1 : taskInstance.getId()); | ||
channel.writeAndFlush(taskAckCommand.convert2Command()); | ||
} | ||
break; | ||
case RESULT: | ||
try { | ||
if (taskInstance != null) { | ||
result = processService.changeTaskState(taskInstance, taskResponseEvent.getState(), | ||
taskResponseEvent.getEndTime(), | ||
taskResponseEvent.getProcessId(), | ||
taskResponseEvent.getAppIds(), | ||
taskResponseEvent.getTaskInstanceId(), | ||
taskResponseEvent.getVarPool() | ||
); | ||
logger.debug("changeTaskState in RESULT , changed in meta:{} task instance state:{}, task response event state:{}, taskInstance id:{},taskInstance host:{}", | ||
result, taskInstance.getState(), taskResponseEvent.getState(), taskInstance.getId(), taskInstance.getHost()); | ||
} | ||
if (!result) { | ||
DBTaskResponseCommand taskResponseCommand = new DBTaskResponseCommand(ExecutionStatus.FAILURE.getCode(), taskResponseEvent.getTaskInstanceId()); | ||
channel.writeAndFlush(taskResponseCommand.convert2Command()); | ||
logger.debug("worker response master failure, taskInstance id:{},taskInstance host:{}", taskInstance.getId(), taskInstance.getHost()); | ||
} else { | ||
// if taskInstance is null (maybe deleted) . retry will be meaningless . so response success | ||
DBTaskResponseCommand taskResponseCommand = new DBTaskResponseCommand(ExecutionStatus.SUCCESS.getCode(), taskResponseEvent.getTaskInstanceId()); | ||
channel.writeAndFlush(taskResponseCommand.convert2Command()); | ||
logger.debug("worker response master success, taskInstance id:{},taskInstance host:{}", taskInstance.getId(), taskInstance.getHost()); | ||
} | ||
} catch (Exception e) { | ||
result = false; | ||
logger.error("worker response master error", e); | ||
DBTaskResponseCommand taskResponseCommand = new DBTaskResponseCommand(ExecutionStatus.FAILURE.getCode(), -1); | ||
channel.writeAndFlush(taskResponseCommand.convert2Command()); | ||
} | ||
break; | ||
default: | ||
throw new IllegalArgumentException("invalid event type : " + event); | ||
} | ||
|
||
WorkflowExecuteThread workflowExecuteThread = this.processInstanceMapper.get(taskResponseEvent.getProcessInstanceId()); | ||
if (workflowExecuteThread != null) { | ||
StateEvent stateEvent = new StateEvent(); | ||
stateEvent.setProcessInstanceId(taskResponseEvent.getProcessInstanceId()); | ||
stateEvent.setTaskInstanceId(taskResponseEvent.getTaskInstanceId()); | ||
stateEvent.setExecutionStatus(taskResponseEvent.getState()); | ||
stateEvent.setType(StateEventType.TASK_STATE_CHANGE); | ||
workflowExecuteThread.addStateEvent(stateEvent); | ||
} | ||
return result; | ||
} | ||
|
||
public boolean addEvent(TaskResponseEvent event) { | ||
if (event.getProcessInstanceId() != this.processInstanceId) { | ||
logger.info("event would be abounded, task instance id:{}, process instance id:{}, this.processInstanceId:{}", | ||
event.getTaskInstanceId(), event.getProcessInstanceId(), this.processInstanceId); | ||
return false; | ||
} | ||
return this.events.add(event); | ||
} | ||
|
||
public int eventSize() { | ||
return this.events.size(); | ||
} | ||
|
||
public boolean isEmpty() { | ||
return this.events.isEmpty(); | ||
} | ||
|
||
public Integer getProcessInstanceId() { | ||
return processInstanceId; | ||
} | ||
|
||
public String getKey() { | ||
return String.valueOf(processInstanceId); | ||
} | ||
} |
Oops, something went wrong.