Hi All,
Need help here. Got problem where requests in temporal fail due to time out because queue is full.
The suspect why queue is full is due to old requests to Cassandra fail due to some attributes not found in table in keyspace in cassandra (because the attributes not created yet in cassandra). We need to terminate the problematic workflow manually from temporal web console.
The problem here seems like those workflows / requests are repeatedly executed without retry limit. The question here is how to put the retry limit in temporal for request to cassandra if the request fails?
Please help.
Below are the snippet of the error log regarding this issue:
{"level":"error","ts":"2022-03-18T03:36:21.137Z","msg":"Fail to process task","service":"history","shard-id":2,"address":"10.11.11.116:7234","shard-item":"0xc001ca8280","component":"visibility-queue-processor","shard-id":2,"queue-task-id":8396587,"queue-task-visibility-timestamp":"2022-02-15T05:31:57.516Z","xdc-failover-version":0,"queue-task-type":"VisibilityUpsertExecution","wf-namespace-id":"78d10593-d54f-433f-863a-05cd5c9b345c","wf-id":"test003","wf-run-id":"dac0a45b-6417-4757-b062-4ada8f4e98de","error":"Unable to decode search attributes: invalid search attribute type: Unspecified","lifecycle":"ProcessingFailed","logging-call-at":"taskProcessor.go:341","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/service/history.(*taskProcessor).handleTaskError\n\t/temporal/service/history/taskProcessor.go:341\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck.func1\n\t/temporal/service/history/taskProcessor.go:222\ngo.temporal.io/server/common/backoff.Retry.func1\n\t/temporal/common/backoff/retry.go:104\ngo.temporal.io/server/common/backoff.RetryContext\n\t/temporal/common/backoff/retry.go:125\ngo.temporal.io/server/common/backoff.Retry\n\t/temporal/common/backoff/retry.go:105\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck\n\t/temporal/service/history/taskProcessor.go:248\ngo.temporal.io/server/service/history.(*taskProcessor).taskWorker\n\t/temporal/service/history/taskProcessor.go:171"}
{"level":"error","ts":"2022-03-18T03:36:21.137Z","msg":"Critical error processing task, retrying.","service":"history","shard-id":2,"address":"10.11.11.116:7234","shard-item":"0xc001ca8280","component":"visibility-queue-processor","shard-id":2,"queue-task-id":8396587,"queue-task-visibility-timestamp":"2022-02-15T05:31:57.516Z","xdc-failover-version":0,"queue-task-type":"VisibilityUpsertExecution","wf-namespace-id":"78d10593-d54f-433f-863a-05cd5c9b345c","wf-id":"test003","wf-run-id":"dac0a45b-6417-4757-b062-4ada8f4e98de","error":"Unable to decode search attributes: invalid search attribute type: Unspecified","operation-result":"OperationCritical","queue-task-type":"VisibilityUpsertExecution","logging-call-at":"taskProcessor.go:227","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck.func1\n\t/temporal/service/history/taskProcessor.go:227\ngo.temporal.io/server/common/backoff.Retry.func1\n\t/temporal/common/backoff/retry.go:104\ngo.temporal.io/server/common/backoff.RetryContext\n\t/temporal/common/backoff/retry.go:125\ngo.temporal.io/server/common/backoff.Retry\n\t/temporal/common/backoff/retry.go:105\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck\n\t/temporal/service/history/taskProcessor.go:248\ngo.temporal.io/server/service/history.(*taskProcessor).taskWorker\n\t/temporal/service/history/taskProcessor.go:171"}
{"level":"error","ts":"2022-03-18T03:36:21.249Z","msg":"Fail to process task","service":"history","shard-id":2,"address":"10.11.11.116:7234","shard-item":"0xc001ca8280","component":"visibility-queue-processor","shard-id":2,"queue-task-id":8396570,"queue-task-visibility-timestamp":"2022-02-15T05:31:56.434Z","xdc-failover-version":0,"queue-task-type":"VisibilityUpsertExecution","wf-namespace-id":"78d10593-d54f-433f-863a-05cd5c9b345c","wf-id":"test003","wf-run-id":"dac0a45b-6417-4757-b062-4ada8f4e98de","error":"Unable to decode search attributes: invalid search attribute type: Unspecified","lifecycle":"ProcessingFailed","logging-call-at":"taskProcessor.go:341","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/service/history.(*taskProcessor).handleTaskError\n\t/temporal/service/history/taskProcessor.go:341\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck.func1\n\t/temporal/service/history/taskProcessor.go:222\ngo.temporal.io/server/common/backoff.Retry.func1\n\t/temporal/common/backoff/retry.go:104\ngo.temporal.io/server/common/backoff.RetryContext\n\t/temporal/common/backoff/retry.go:125\ngo.temporal.io/server/common/backoff.Retry\n\t/temporal/common/backoff/retry.go:105\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck\n\t/temporal/service/history/taskProcessor.go:248\ngo.temporal.io/server/service/history.(*taskProcessor).taskWorker\n\t/temporal/service/history/taskProcessor.go:171"}
{"level":"error","ts":"2022-03-18T03:36:21.249Z","msg":"Critical error processing task, retrying.","service":"history","shard-id":2,"address":"10.11.11.116:7234","shard-item":"0xc001ca8280","component":"visibility-queue-processor","shard-id":2,"queue-task-id":8396570,"queue-task-visibility-timestamp":"2022-02-15T05:31:56.434Z","xdc-failover-version":0,"queue-task-type":"VisibilityUpsertExecution","wf-namespace-id":"78d10593-d54f-433f-863a-05cd5c9b345c","wf-id":"test003","wf-run-id":"dac0a45b-6417-4757-b062-4ada8f4e98de","error":"Unable to decode search attributes: invalid search attribute type: Unspecified","operation-result":"OperationCritical","queue-task-type":"VisibilityUpsertExecution","logging-call-at":"taskProcessor.go:227","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck.func1\n\t/temporal/service/history/taskProcessor.go:227\ngo.temporal.io/server/common/backoff.Retry.func1\n\t/temporal/common/backoff/retry.go:104\ngo.temporal.io/server/common/backoff.RetryContext\n\t/temporal/common/backoff/retry.go:125\ngo.temporal.io/server/common/backoff.Retry\n\t/temporal/common/backoff/retry.go:105\ngo.temporal.io/server/service/history.(*taskProcessor).processTaskAndAck\n\t/temporal/service/history/taskProcessor.go:248\ngo.temporal.io/server/service/history.(*taskProcessor).taskWorker\n\t/temporal/service/history/taskProcessor.go:171"}
{"level":"error","ts":"2022-03-18T03:36:21.332Z","msg":"Fail to process task","service":"history","shard-id":2,"address":"10.11.11.116:7234","shard-item":"0xc001ca8280","component":"visibility-queue-processor","shard-id":2,"queue-task-id":8396571,"queue-task-visibility-timestamp":"2022-02-15T05:31:56.434Z","xdc-failover-version":0,"queue-task-type":"VisibilityUpsertExecution","wf-namespace-id":"78d10593-d54f-433f-863a-05cd5c9b345c","wf-id":"test003","wf-run-id":"dac0a45b-6417-4757-b062-4ada8f4e98de","error":"Unable to decode search attributes: invalid search attribute type: Unspecified","lifecycle":"ProcessingFailed","logging-call-at":"taskProcessor.go:341","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/service/history.