Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add canary jitter workflow debugging log #6278

Merged
merged 14 commits into from
Sep 11, 2024
15 changes: 14 additions & 1 deletion service/frontend/api/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -1889,13 +1889,26 @@ func (wh *WorkflowHandler) StartWorkflowExecution(
if err != nil {
return nil, err
}
wh.GetLogger().Debug("Start workflow execution request domainID", tag.WorkflowDomainID(domainID))
historyRequest, err := common.CreateHistoryStartWorkflowRequest(
domainID, startRequest, time.Now(), wh.getPartitionConfig(ctx, domainName))
if err != nil {
return nil, err
}

// for debugging jitter workflow
// will be removed later
jitterStartSeconds := startRequest.GetJitterStartSeconds()
if startRequest.GetDomain() == "cadence-canary" && jitterStartSeconds > 0 {
wh.GetLogger().Debug("Start workflow execution request domainID",
tag.WorkflowDomainID(domainID),
tag.WorkflowID(startRequest.WorkflowID),
tag.Dynamic("JitterStartSeconds", jitterStartSeconds),
tag.Dynamic("firstDecisionTaskBackoffSeconds", historyRequest.GetFirstDecisionTaskBackoffSeconds()),
)
Comment on lines +1902 to +1907
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to cover these in tests ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bowenxia I wonder if you need to have this permanently or just for debugging.
If it's purely for debugging (I guess so since tags are too specific) then you can temporarily deploy your own branch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I talked about this in Cadence all hands, this kind of error happens randomly. It had been happened in Prod07, Prod04, Prod12 etc. I can't predict which environment will have that error, so I'll have to merge it to main and turn on the debug mode once this is once happen in the future. :(

} else {
wh.GetLogger().Debug("Start workflow execution request domainID", tag.WorkflowDomainID(domainID))
}

resp, err = wh.GetHistoryClient().StartWorkflowExecution(ctx, historyRequest)
if err != nil {
return nil, err
Expand Down
34 changes: 34 additions & 0 deletions service/frontend/api/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
const (
numHistoryShards = 10
testDomain = "test-domain"
canaryDomain = "cadence-canary"
testDomainID = "e4f90ec0-1313-45be-9877-8aa41f72a45a"
testWorkflowID = "test-workflow-id"
testRunID = "2c8b555f-1f55-4955-9d1c-b980194555c9"
Expand Down Expand Up @@ -596,6 +597,39 @@ func (s *workflowHandlerSuite) TestStartWorkflowExecution_IsolationGroupDrained(
s.IsType(err, &types.BadRequestError{})
}

func (s *workflowHandlerSuite) TestStartWorkflowExecution_LogJitterTime() {
config := s.newConfig(dc.NewInMemoryClient())
config.UserRPS = dc.GetIntPropertyFn(10)
wh := s.getWorkflowHandler(config)
jitterStart := int32(10)

startWorkflowExecutionRequest := &types.StartWorkflowExecutionRequest{
Domain: canaryDomain,
WorkflowID: "workflow-id",
WorkflowType: &types.WorkflowType{
Name: "workflow-type",
},
TaskList: &types.TaskList{
Name: "task-list",
},
JitterStartSeconds: &jitterStart,
ExecutionStartToCloseTimeoutSeconds: common.Int32Ptr(1),
TaskStartToCloseTimeoutSeconds: common.Int32Ptr(1),
RetryPolicy: &types.RetryPolicy{
InitialIntervalInSeconds: 1,
BackoffCoefficient: 2,
MaximumIntervalInSeconds: 2,
MaximumAttempts: 1,
ExpirationIntervalInSeconds: 1,
},
RequestID: uuid.New(),
}
s.mockDomainCache.EXPECT().GetDomainID(canaryDomain).Return(s.testDomainID, nil).Times(2)
s.mockHistoryClient.EXPECT().StartWorkflowExecution(gomock.Any(), gomock.Any()).Return(&types.StartWorkflowExecutionResponse{RunID: "test-rid"}, nil)
_, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest)
s.NoError(err)
}

func (s *workflowHandlerSuite) TestDiagnoseWorkflowExecution_Success() {
wh := s.getWorkflowHandler(s.newConfig(dc.NewInMemoryClient()))

Expand Down
Loading