Cancelling timer inside select causes worker panic

I have a use case where I start a selector on a signal channel and a timer, and inside the select case of the signal channel I cancel the timer and then start an activity. Outside of the workflow, I run the workflow and send a signal immediately. If I cancel the timer after running the activity, it works fine. If I cancel the timer before running the activity, it gives this error:

ERROR Workflow panic. Namespace default TaskQueue hello-world WorkflowID hello_world_workflowIDcdfbc15e-33c4-4dc9-a63b-c6013267b10f RunID 54709e14-460d-45c7-b7ee-788d58ee19cd PanicError lookup failed for scheduledEventID to activityID: scheduleEventID: 6, activityID: 7 PanicStack process event for hello-world [panic]

Why can’t I cancel the timer before running an activity? Is this by design or a bug?
This is a dummy code that presents the use case:

func Workflow(ctx workflow.Context, name string) (string, error) {
	ao := workflow.ActivityOptions{
		ScheduleToStartTimeout: time.Minute,
		StartToCloseTimeout:    time.Minute,
	}
	ctx = workflow.WithActivityOptions(ctx, ao)

	logger := workflow.GetLogger(ctx)
	logger.Info("HelloWorld workflow started", "name", name)

	childCtx, cancelHandler := workflow.WithCancel(ctx)
	selector := workflow.NewSelector(ctx)

	var result string
	var err error
	selector.AddReceive(workflow.GetSignalChannel(childCtx, "signal"), func(c workflow.ReceiveChannel, more bool) {
		var signal string
		if channelActive := c.Receive(ctx, &signal); channelActive {
			cancelHandler() // in this case the timer will be canceled
			err = workflow.ExecuteActivity(ctx, Activity, name).Get(ctx, &result)
		}
	})
	selector.AddFuture(workflow.NewTimer(childCtx, time.Minute), func(future workflow.Future) {
		err = fmt.Errorf("timeout reached, no signal within allowed duration %s", time.Minute)
	})
	// Block until finished
	selector.Select(ctx)

	if err != nil {
		logger.Error("Activity failed.", "Error", err)
		return "", err
	}

	logger.Info("HelloWorld workflow completed.", "result", result)

	return result, nil
}

func Activity(ctx context.Context, name string) (string, error) {
	logger := activity.GetLogger(ctx)
	logger.Info("Activity", "name", name)
	return "Hello " + name + "!", nil
}

func main() {
	// The client is a heavyweight object that should be created once per process.
	c, err := client.NewClient(client.Options{})
	if err != nil {
		log.Fatalln("Unable to create client", err)
	}
	defer c.Close()

	workflowOptions := client.StartWorkflowOptions{
		ID:        "hello_world_workflowID" + uuid.New().String(),
		TaskQueue: "hello-world",
		WorkflowTaskTimeout: time.Minute,
	}

	we, err := c.ExecuteWorkflow(context.Background(), workflowOptions, helloworld.Workflow, "Temporal")
	if err != nil {
		log.Fatalln("Unable to execute workflow", err)
	}

	log.Println("Started workflow", "WorkflowID", we.GetID(), "RunID", we.GetRunID())

	err = c.SignalWorkflow(context.Background(), workflowOptions.ID, "", "signal", "signal-value")
	if err != nil {
		log.Fatalln("Unable to signal workflow", err)
	}
}
2 Likes

Thanks for reporting! This is known bug in the cancellation logic which is triggered by immediate cancellation. We are working on the fix.

Is there an id for the bug so we can track status?