WorkflowTask failure because of an invalid state transition in LocalActivity state machine

I’ve got a workflow that is stuck after hitting the following workflow task failure:

	LocalActivity: invalid REQUEST_PREPARED->NON_REPLAY_WORKFLOW_TASK_STARTED, transition history is [CREATED->CHECK_EXECUTION_STATE, EXECUTING->SCHEDULE]

I haven’t been able to determine the cause. It appears to be happening when trying to run a local activity in a detached cancellation scope after the Workflow was cancelled. As far as I can tell the same JVM processed the whole workflow execution, so I don’t think non-deterministic code changes is a factor (I could be proven wrong, though!).

I attached the relevant snippet of the Workflow history here. If you need the whole thing I can provide it, it’d just take me a lot longer to redact the full history, so i started small :slight_smile:

{
  "events": [
    {
      "eventId": "118",
      "eventTime": "2024-07-18T09:50:30.289040195Z",
      "eventType": "WorkflowExecutionCancelRequested",
      "version": "0",
      "taskId": "103874247",
      "workerMayIgnore": false,
      "workflowExecutionCancelRequestedEventAttributes": {
        "cause": "",
        "externalInitiatedEventId": "0",
        "externalWorkflowExecution": null,
        "identity": "1@another-ms-77d4787df9-wgfvn"
      }
    },
    {
      "eventId": "119",
      "eventTime": "2024-07-18T09:50:30.289044943Z",
      "eventType": "WorkflowTaskScheduled",
      "version": "0",
      "taskId": "103874248",
      "workerMayIgnore": false,
      "workflowTaskScheduledEventAttributes": {
        "taskQueue": {
          "name": "1@my-ms-77b9c5496b-hfdrj:5f8ed300-2a8b-4d03-8460-5573727d4e4e",
          "kind": "Sticky",
          "normalName": "MY_WORKFLOW_QUEUE"
        },
        "startToCloseTimeout": "10s",
        "attempt": 1
      }
    },
    {
      "eventId": "120",
      "eventTime": "2024-07-18T09:50:30.302768987Z",
      "eventType": "WorkflowTaskStarted",
      "version": "0",
      "taskId": "103874252",
      "workerMayIgnore": false,
      "workflowTaskStartedEventAttributes": {
        "scheduledEventId": "119",
        "identity": "1@my-ms-77b9c5496b-hfdrj",
        "requestId": "d0ede366-9480-4ac7-a064-b6001d389a8d",
        "suggestContinueAsNew": false,
        "historySizeBytes": "785518"
      }
    },
    {
      "eventId": "121",
      "eventTime": "2024-07-18T09:50:30.335662031Z",
      "eventType": "WorkflowTaskCompleted",
      "version": "0",
      "taskId": "103874256",
      "workerMayIgnore": false,
      "workflowTaskCompletedEventAttributes": {
        "scheduledEventId": "119",
        "startedEventId": "120",
        "identity": "1@my-ms-77b9c5496b-hfdrj",
        "binaryChecksum": "",
        "workerVersion": {
          "buildId": "MY-Worker-SVC-2.62.0",
          "bundleId": "",
          "useVersioning": true
        },
        "sdkMetadata": null,
        "meteringMetadata": {
          "nonfirstLocalActivityExecutionAttempts": 0
        }
      }
    },
    {
      "eventId": "122",
      "eventTime": "2024-07-18T09:50:30.335747730Z",
      "eventType": "RequestCancelExternalWorkflowExecutionInitiated",
      "version": "0",
      "taskId": "103874257",
      "workerMayIgnore": false,
      "requestCancelExternalWorkflowExecutionInitiatedEventAttributes": {
        "workflowTaskCompletedEventId": "121",
        "namespace": "my-ns",
        "namespaceId": "ec8de175-f1ca-4553-8987-c13534087ab3",
        "workflowExecution": {
          "workflowId": "5ad0d90a-bf2b-4997-b245-55317728f66b-child1",
          "runId": ""
        },
        "control": "",
        "childWorkflowOnly": true,
        "reason": ""
      }
    },
    {
      "eventId": "123",
      "eventTime": "2024-07-18T09:50:30.379125983Z",
      "eventType": "ExternalWorkflowExecutionCancelRequested",
      "version": "0",
      "taskId": "103874260",
      "workerMayIgnore": false,
      "externalWorkflowExecutionCancelRequestedEventAttributes": {
        "initiatedEventId": "122",
        "namespace": "my-ns",
        "namespaceId": "ec8de175-f1ca-4553-8987-c13534087ab3",
        "workflowExecution": {
          "workflowId": "5ad0d90a-bf2b-4997-b245-55317728f66b-child1",
          "runId": ""
        }
      }
    },
    {
      "eventId": "124",
      "eventTime": "2024-07-18T09:50:30.379137379Z",
      "eventType": "WorkflowTaskScheduled",
      "version": "0",
      "taskId": "103874261",
      "workerMayIgnore": false,
      "workflowTaskScheduledEventAttributes": {
        "taskQueue": {
          "name": "1@my-ms-77b9c5496b-hfdrj:5f8ed300-2a8b-4d03-8460-5573727d4e4e",
          "kind": "Sticky",
          "normalName": "MY_WORKFLOW_QUEUE"
        },
        "startToCloseTimeout": "10s",
        "attempt": 1
      }
    },
    {
      "eventId": "125",
      "eventTime": "2024-07-18T09:50:30.391839560Z",
      "eventType": "WorkflowTaskStarted",
      "version": "0",
      "taskId": "103874265",
      "workerMayIgnore": false,
      "workflowTaskStartedEventAttributes": {
        "scheduledEventId": "124",
        "identity": "1@my-ms-77b9c5496b-hfdrj",
        "requestId": "0000a071-ab01-4be4-b96f-407067fb06e5",
        "suggestContinueAsNew": false,
        "historySizeBytes": "786199"
      }
    },
    {
      "eventId": "126",
      "eventTime": "2024-07-18T09:50:30.431177839Z",
      "eventType": "WorkflowTaskFailed",
      "version": "0",
      "taskId": "103874269",
      "workerMayIgnore": false,
      "workflowTaskFailedEventAttributes": {
        "scheduledEventId": "124",
        "startedEventId": "125",
        "cause": "Unspecified",
        "failure": {
          "message": "Failure handling event 125 of type 'EVENT_TYPE_WORKFLOW_TASK_STARTED' during execution. {WorkflowTaskStartedEventId=125, CurrentStartedEventId=120}",
          "source": "JavaSDK",
          "stackTrace": "io.temporal.internal.statemachines.WorkflowStateMachines.createEventProcessingException(WorkflowStateMachines.java:423)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEventsBatch(WorkflowStateMachines.java:333)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEvent(WorkflowStateMachines.java:292)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.applyServerHistory(ReplayWorkflowRunTaskHandler.java:249)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTaskImpl(ReplayWorkflowRunTaskHandler.java:231)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTask(ReplayWorkflowRunTaskHandler.java:165)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTaskWithQuery(ReplayWorkflowTaskHandler.java:135)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTask(ReplayWorkflowTaskHandler.java:100)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handleTask(WorkflowWorker.java:446)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:337)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:278)\nio.temporal.internal.worker.PollTaskExecutor.lambda$process$0(PollTaskExecutor.java:105)\njava.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\njava.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\njava.base/java.lang.Thread.run(Thread.java:840)\n",
          "encodedAttributes": null,
          "cause": {
            "message": "WorkflowTask: failure executing SCHEDULED->WORKFLOW_TASK_STARTED, transition history is [CREATED->WORKFLOW_TASK_SCHEDULED]",
            "source": "JavaSDK",
            "stackTrace": "io.temporal.internal.statemachines.StateMachine.executeTransition(StateMachine.java:163)\nio.temporal.internal.statemachines.StateMachine.handleHistoryEvent(StateMachine.java:103)\nio.temporal.internal.statemachines.EntityStateMachineBase.handleEvent(EntityStateMachineBase.java:84)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleSingleEvent(WorkflowStateMachines.java:474)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEventsBatch(WorkflowStateMachines.java:331)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEvent(WorkflowStateMachines.java:292)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.applyServerHistory(ReplayWorkflowRunTaskHandler.java:249)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTaskImpl(ReplayWorkflowRunTaskHandler.java:231)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTask(ReplayWorkflowRunTaskHandler.java:165)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTaskWithQuery(ReplayWorkflowTaskHandler.java:135)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTask(ReplayWorkflowTaskHandler.java:100)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handleTask(WorkflowWorker.java:446)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:337)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:278)\nio.temporal.internal.worker.PollTaskExecutor.lambda$process$0(PollTaskExecutor.java:105)\njava.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\njava.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\njava.base/java.lang.Thread.run(Thread.java:840)\n",
            "encodedAttributes": null,
            "cause": {
              "message": "LocalActivity: invalid REQUEST_PREPARED->NON_REPLAY_WORKFLOW_TASK_STARTED, transition history is [CREATED->CHECK_EXECUTION_STATE, EXECUTING->SCHEDULE]",
              "source": "JavaSDK",
              "stackTrace": "io.temporal.internal.statemachines.StateMachine.executeTransition(StateMachine.java:152)\nio.temporal.internal.statemachines.StateMachine.handleExplicitEvent(StateMachine.java:93)\nio.temporal.internal.statemachines.EntityStateMachineBase.explicitEvent(EntityStateMachineBase.java:95)\nio.temporal.internal.statemachines.LocalActivityStateMachine.nonReplayWorkflowTaskStarted(LocalActivityStateMachine.java:246)\nio.temporal.internal.statemachines.WorkflowStateMachines$WorkflowTaskCommandsListener.workflowTaskStarted(WorkflowStateMachines.java:1227)\nio.temporal.internal.statemachines.WorkflowTaskStateMachine.handleCompleted(WorkflowTaskStateMachine.java:139)\nio.temporal.internal.statemachines.WorkflowTaskStateMachine.handleStarted(WorkflowTaskStateMachine.java:129)\nio.temporal.internal.statemachines.FixedTransitionAction.apply(FixedTransitionAction.java:46)\nio.temporal.internal.statemachines.StateMachine.executeTransition(StateMachine.java:159)\nio.temporal.internal.statemachines.StateMachine.handleHistoryEvent(StateMachine.java:103)\nio.temporal.internal.statemachines.EntityStateMachineBase.handleEvent(EntityStateMachineBase.java:84)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleSingleEvent(WorkflowStateMachines.java:474)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEventsBatch(WorkflowStateMachines.java:331)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEvent(WorkflowStateMachines.java:292)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.applyServerHistory(ReplayWorkflowRunTaskHandler.java:249)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTaskImpl(ReplayWorkflowRunTaskHandler.java:231)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTask(ReplayWorkflowRunTaskHandler.java:165)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTaskWithQuery(ReplayWorkflowTaskHandler.java:135)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTask(ReplayWorkflowTaskHandler.java:100)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handleTask(WorkflowWorker.java:446)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:337)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:278)\nio.temporal.internal.worker.PollTaskExecutor.lambda$process$0(PollTaskExecutor.java:105)\njava.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\njava.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\njava.base/java.lang.Thread.run(Thread.java:840)\n",
              "encodedAttributes": null,
              "cause": null,
              "applicationFailureInfo": {
                "type": "java.lang.IllegalArgumentException",
                "nonRetryable": false,
                "details": null
              }
            },
            "applicationFailureInfo": {
              "type": "java.lang.RuntimeException",
              "nonRetryable": false,
              "details": null
            }
          },
          "applicationFailureInfo": {
            "type": "io.temporal.internal.statemachines.InternalWorkflowTaskException",
            "nonRetryable": false,
            "details": null
          }
        },
        "identity": "1@my-ms-77b9c5496b-hfdrj",
        "baseRunId": "",
        "newRunId": "",
        "forkEventVersion": "0",
        "binaryChecksum": "",
        "workerVersion": null
      }
    },
    {
      "eventId": "127",
      "eventTime": "2024-07-18T09:51:13.751245236Z",
      "eventType": "ChildWorkflowExecutionCanceled",
      "version": "0",
      "taskId": "103874334",
      "workerMayIgnore": false,
      "childWorkflowExecutionCanceledEventAttributes": {
        "details": null,
        "namespace": "my-ns",
        "namespaceId": "ec8de175-f1ca-4553-8987-c13534087ab3",
        "workflowExecution": {
          "workflowId": "5ad0d90a-bf2b-4997-b245-55317728f66b-child1",
          "runId": "6889a7a2-f453-4cf9-b58b-643b9e948985"
        },
        "workflowType": {
          "name": "ChildWorkflow"
        },
        "initiatedEventId": "109",
        "startedEventId": "110"
      }
    },
    {
      "eventId": "128",
      "eventTime": "2024-07-18T09:51:23.649266052Z",
      "eventType": "WorkflowTaskScheduled",
      "version": "0",
      "taskId": "103874335",
      "workerMayIgnore": false,
      "workflowTaskScheduledEventAttributes": {
        "taskQueue": {
          "name": "MY_WORKFLOW_QUEUE",
          "kind": "Normal",
          "normalName": ""
        },
        "startToCloseTimeout": "10s",
        "attempt": 1
      }
    },
    {
      "eventId": "129",
      "eventTime": "2024-07-18T09:51:23.664528807Z",
      "eventType": "WorkflowTaskStarted",
      "version": "0",
      "taskId": "103874338",
      "workerMayIgnore": false,
      "workflowTaskStartedEventAttributes": {
        "scheduledEventId": "128",
        "identity": "1@my-ms-77b9c5496b-hfdrj",
        "requestId": "2c5ea0b7-fcde-4725-b991-9abcfd2da6f3",
        "suggestContinueAsNew": false,
        "historySizeBytes": "793172"
      }
    },
    {
      "eventId": "130",
      "eventTime": "2024-07-18T09:51:23.928737791Z",
      "eventType": "WorkflowTaskFailed",
      "version": "0",
      "taskId": "103874342",
      "workerMayIgnore": false,
      "workflowTaskFailedEventAttributes": {
        "scheduledEventId": "128",
        "startedEventId": "129",
        "cause": "Unspecified",
        "failure": {
          "message": "Failure handling event 129 of type 'EVENT_TYPE_WORKFLOW_TASK_STARTED' during execution. {WorkflowTaskStartedEventId=129, CurrentStartedEventId=120}",
          "source": "JavaSDK",
          "stackTrace": "io.temporal.internal.statemachines.WorkflowStateMachines.createEventProcessingException(WorkflowStateMachines.java:423)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEventsBatch(WorkflowStateMachines.java:333)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEvent(WorkflowStateMachines.java:292)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.applyServerHistory(ReplayWorkflowRunTaskHandler.java:249)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTaskImpl(ReplayWorkflowRunTaskHandler.java:231)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTask(ReplayWorkflowRunTaskHandler.java:165)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTaskWithQuery(ReplayWorkflowTaskHandler.java:135)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTask(ReplayWorkflowTaskHandler.java:100)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handleTask(WorkflowWorker.java:446)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:337)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:278)\nio.temporal.internal.worker.PollTaskExecutor.lambda$process$0(PollTaskExecutor.java:105)\njava.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\njava.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\njava.base/java.lang.Thread.run(Thread.java:840)\n",
          "encodedAttributes": null,
          "cause": {
            "message": "WorkflowTask: failure executing SCHEDULED->WORKFLOW_TASK_STARTED, transition history is [CREATED->WORKFLOW_TASK_SCHEDULED]",
            "source": "JavaSDK",
            "stackTrace": "io.temporal.internal.statemachines.StateMachine.executeTransition(StateMachine.java:163)\nio.temporal.internal.statemachines.StateMachine.handleHistoryEvent(StateMachine.java:103)\nio.temporal.internal.statemachines.EntityStateMachineBase.handleEvent(EntityStateMachineBase.java:84)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleSingleEvent(WorkflowStateMachines.java:474)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEventsBatch(WorkflowStateMachines.java:331)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEvent(WorkflowStateMachines.java:292)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.applyServerHistory(ReplayWorkflowRunTaskHandler.java:249)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTaskImpl(ReplayWorkflowRunTaskHandler.java:231)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTask(ReplayWorkflowRunTaskHandler.java:165)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTaskWithQuery(ReplayWorkflowTaskHandler.java:135)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTask(ReplayWorkflowTaskHandler.java:100)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handleTask(WorkflowWorker.java:446)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:337)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:278)\nio.temporal.internal.worker.PollTaskExecutor.lambda$process$0(PollTaskExecutor.java:105)\njava.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\njava.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\njava.base/java.lang.Thread.run(Thread.java:840)\n",
            "encodedAttributes": null,
            "cause": {
              "message": "LocalActivity: invalid REQUEST_PREPARED->NON_REPLAY_WORKFLOW_TASK_STARTED, transition history is [CREATED->CHECK_EXECUTION_STATE, EXECUTING->SCHEDULE]",
              "source": "JavaSDK",
              "stackTrace": "io.temporal.internal.statemachines.StateMachine.executeTransition(StateMachine.java:152)\nio.temporal.internal.statemachines.StateMachine.handleExplicitEvent(StateMachine.java:93)\nio.temporal.internal.statemachines.EntityStateMachineBase.explicitEvent(EntityStateMachineBase.java:95)\nio.temporal.internal.statemachines.LocalActivityStateMachine.nonReplayWorkflowTaskStarted(LocalActivityStateMachine.java:246)\nio.temporal.internal.statemachines.WorkflowStateMachines$WorkflowTaskCommandsListener.workflowTaskStarted(WorkflowStateMachines.java:1227)\nio.temporal.internal.statemachines.WorkflowTaskStateMachine.handleCompleted(WorkflowTaskStateMachine.java:139)\nio.temporal.internal.statemachines.WorkflowTaskStateMachine.handleStarted(WorkflowTaskStateMachine.java:129)\nio.temporal.internal.statemachines.FixedTransitionAction.apply(FixedTransitionAction.java:46)\nio.temporal.internal.statemachines.StateMachine.executeTransition(StateMachine.java:159)\nio.temporal.internal.statemachines.StateMachine.handleHistoryEvent(StateMachine.java:103)\nio.temporal.internal.statemachines.EntityStateMachineBase.handleEvent(EntityStateMachineBase.java:84)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleSingleEvent(WorkflowStateMachines.java:474)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEventsBatch(WorkflowStateMachines.java:331)\nio.temporal.internal.statemachines.WorkflowStateMachines.handleEvent(WorkflowStateMachines.java:292)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.applyServerHistory(ReplayWorkflowRunTaskHandler.java:249)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTaskImpl(ReplayWorkflowRunTaskHandler.java:231)\nio.temporal.internal.replay.ReplayWorkflowRunTaskHandler.handleWorkflowTask(ReplayWorkflowRunTaskHandler.java:165)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTaskWithQuery(ReplayWorkflowTaskHandler.java:135)\nio.temporal.internal.replay.ReplayWorkflowTaskHandler.handleWorkflowTask(ReplayWorkflowTaskHandler.java:100)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handleTask(WorkflowWorker.java:446)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:337)\nio.temporal.internal.worker.WorkflowWorker$TaskHandlerImpl.handle(WorkflowWorker.java:278)\nio.temporal.internal.worker.PollTaskExecutor.lambda$process$0(PollTaskExecutor.java:105)\njava.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\njava.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\njava.base/java.lang.Thread.run(Thread.java:840)\n",
              "encodedAttributes": null,
              "cause": null,
              "applicationFailureInfo": {
                "type": "java.lang.IllegalArgumentException",
                "nonRetryable": false,
                "details": null
              }
            },
            "applicationFailureInfo": {
              "type": "java.lang.RuntimeException",
              "nonRetryable": false,
              "details": null
            }
          },
          "applicationFailureInfo": {
            "type": "io.temporal.internal.statemachines.InternalWorkflowTaskException",
            "nonRetryable": false,
            "details": null
          }
        },
        "identity": "1@my-ms-77b9c5496b-hfdrj",
        "baseRunId": "",
        "newRunId": "",
        "forkEventVersion": "0",
        "binaryChecksum": "",
        "workerVersion": null
      }
    }
  ]
}

Could you create a reproduction of the issue for us to troubleshoot?

Sorry for the delay, you can reproduce it with a parent workflow that does this:

public class ParentWorkflowImpl implements ParentWorkflow {

    @Override
    public void performTask(String input) {
        try {
            ChildWorkflowOptions childOptions = ChildWorkflowOptions.newBuilder()
                    .setWorkflowId(Workflow.getInfo() .getWorkflowId() + "-child1")
                    .setCancellationType(ChildWorkflowCancellationType.WAIT_CANCELLATION_REQUESTED)
                    .setParentClosePolicy(ParentClosePolicy.PARENT_CLOSE_POLICY_REQUEST_CANCEL)
                    .validateAndBuildWithDefaults();
            ChildWorkflow1 child = Workflow.newChildWorkflowStub(ChildWorkflow1.class, childOptions);
            child.performChildTask("testInput");
        } catch (TemporalFailure e) {
            if (CancellationScope.current().isCancelRequested()) {
                Workflow.newDetachedCancellationScope(() -> {
                    LocalActivityTest act = Workflow.newLocalActivityStub(LocalActivityTest.class,
                            LocalActivityOptions.newBuilder()
                                    .setStartToCloseTimeout(Duration.ofSeconds(5))
                                    .validateAndBuildWithDefaults());
                    act.execute();
                }).run();
                throw e;
            }
        }

And a child that just blocks on a Workflow.await(() -> false) call. If you cancel the parent, the child will cancel, but the parent hits the LocalActivity: invalid REQUEST_PREPARED->NON_REPLAY_WORKFLOW_TASK_STARTED error.

The issue started with 1.24.0, it doesn’t reproduce with 1.23.2. It also doesn’t reproduce if the ChildWorkflowCancellationType is WAIT_CANCELLATION_COMPLETED. Let me know if you need a complete reproducer project, and I will put that together.

1 Like

@dano Thank you for the reproduction, I opened an issue for this and will try to publish a patch for this ASAP WorkflowTask failure because of an invalid state transition in LocalActivity state machine · Issue #2155 · temporalio/sdk-java · GitHub

1 Like