Cancelling timer inside select causes worker panic

I have a use case where I start a selector on a signal channel and a timer, and inside the select case of the signal channel I cancel the timer and then start an activity. Outside of the workflow, I run the workflow and send a signal immediately. If I cancel the timer after running the activity, it works fine. If I cancel the timer before running the activity, it gives this error:

ERROR Workflow panic. Namespace default TaskQueue hello-world WorkflowID hello_world_workflowIDcdfbc15e-33c4-4dc9-a63b-c6013267b10f RunID 54709e14-460d-45c7-b7ee-788d58ee19cd PanicError lookup failed for scheduledEventID to activityID: scheduleEventID: 6, activityID: 7 PanicStack process event for hello-world [panic]

Why can’t I cancel the timer before running an activity? Is this by design or a bug?
This is a dummy code that presents the use case:

func Workflow(ctx workflow.Context, name string) (string, error) {
	ao := workflow.ActivityOptions{
		ScheduleToStartTimeout: time.Minute,
		StartToCloseTimeout:    time.Minute,
	}
	ctx = workflow.WithActivityOptions(ctx, ao)

	logger := workflow.GetLogger(ctx)
	logger.Info("HelloWorld workflow started", "name", name)

	childCtx, cancelHandler := workflow.WithCancel(ctx)
	selector := workflow.NewSelector(ctx)

	var result string
	var err error
	selector.AddReceive(workflow.GetSignalChannel(childCtx, "signal"), func(c workflow.ReceiveChannel, more bool) {
		var signal string
		if channelActive := c.Receive(ctx, &signal); channelActive {
			cancelHandler() // in this case the timer will be canceled
			err = workflow.ExecuteActivity(ctx, Activity, name).Get(ctx, &result)
		}
	})
	selector.AddFuture(workflow.NewTimer(childCtx, time.Minute), func(future workflow.Future) {
		err = fmt.Errorf("timeout reached, no signal within allowed duration %s", time.Minute)
	})
	// Block until finished
	selector.Select(ctx)

	if err != nil {
		logger.Error("Activity failed.", "Error", err)
		return "", err
	}

	logger.Info("HelloWorld workflow completed.", "result", result)

	return result, nil
}

func Activity(ctx context.Context, name string) (string, error) {
	logger := activity.GetLogger(ctx)
	logger.Info("Activity", "name", name)
	return "Hello " + name + "!", nil
}

func main() {
	// The client is a heavyweight object that should be created once per process.
	c, err := client.NewClient(client.Options{})
	if err != nil {
		log.Fatalln("Unable to create client", err)
	}
	defer c.Close()

	workflowOptions := client.StartWorkflowOptions{
		ID:        "hello_world_workflowID" + uuid.New().String(),
		TaskQueue: "hello-world",
		WorkflowTaskTimeout: time.Minute,
	}

	we, err := c.ExecuteWorkflow(context.Background(), workflowOptions, helloworld.Workflow, "Temporal")
	if err != nil {
		log.Fatalln("Unable to execute workflow", err)
	}

	log.Println("Started workflow", "WorkflowID", we.GetID(), "RunID", we.GetRunID())

	err = c.SignalWorkflow(context.Background(), workflowOptions.ID, "", "signal", "signal-value")
	if err != nil {
		log.Fatalln("Unable to signal workflow", err)
	}
}
2 Likes

Thanks for reporting! This is known bug in the cancellation logic which is triggered by immediate cancellation. We are working on the fix.