After upgrading Self-Hosted temporal to the latest OSS(1.22.4
) version, all workflows and activities have been breaking with errors like:
- WorkflowTaskTimedOut
- “Workflow is busy.”; 429
Workers are also not picking up tasks quickly, resulting in long running tasks, that cascade, and cause the aforementioned errors.
Should I revert back to an older version?
Do I need to upgrade the workflow clients also to be on 1.22.4
? or is the client forwards compatible?
Here are my configuration values:
--set web.image.tag=2.23.0 \
--set server.config.persistence.default.driver=sql \
--set server.config.persistence.default.sql.driver=postgres12 \
--set server.config.persistence.default.sql.host=$TEMPORAL_DB_HOST \
--set server.config.persistence.default.sql.port=5432 \
--set server.config.persistence.default.sql.database=temporal \
--set server.config.persistence.default.sql.user=db_user \
--set server.config.persistence.default.sql.existingSecret=existing-db-secret \
--set server.config.persistence.default.sql.maxConns=200 \
--set server.config.persistence.default.sql.maxConnLifetime=1h \
--set server.config.persistence.visibility.driver=sql \
--set server.config.persistence.visibility.sql.driver=postgres12 \
--set server.config.persistence.visibility.sql.host=$TEMPORAL_DB_HOST \
--set server.config.persistence.visibility.sql.port=5432 \
--set server.config.persistence.visibility.sql.database=temporal_visibility \
--set server.config.persistence.visibility.sql.user=db_user \
--set server.config.persistence.visibility.sql.existingSecret=existing-db-secret \
--set server.config.persistence.visibility.sql.maxConns=200 \
--set server.config.persistence.visibility.sql.maxConnLifetime=1h \
--set server.frontend.resources.requests.cpu=2 \
--set server.frontend.resources.requests.memory=8Gi \
--set server.history.resources.requests.cpu=2 \
--set server.history.resources.requests.memory=8Gi \
--set server.history.replicaCount=3 \
--set server.matching.resources.requests.cpu=2 \
--set server.matching.resources.requests.memory=8Gi \
--set server.worker.resources.requests.cpu=2 \
--set server.worker.resources.requests.memory=8Gi \
--set "server.dynamicConfig.history\.cacheInitialSize[0].value"=256 \
--set "server.dynamicConfig.history\.cacheMaxSize[0].value"=512 \
--set "server.dynamicConfig.history\.eventsCacheInitialSize[0].value"=256 \
--set "server.dynamicConfig.history\.eventsCacheMaxSize[0].value"=512 \
--set "server.dynamicConfig.frontend\.namespaceCount[0].value"=19200 \
--set "server.dynamicConfig.frontend\.namespaceRPS[0].value"=24000 \
--set "server.dynamicConfig.history\.namespaceRPS[0].value"=24000 \
--set "server.dynamicConfig.frontend\.rps[0].value"=100000 \
--set "server.dynamicConfig.history\.rps[0].value"=100000 \
--set "server.dynamicConfig.matching\.rps[0].value"=100000 \
--set server.archival.history.state=enabled \
--set server.archival.history.enableRead=true \
--set server.archival.history.provider.s3store.region=$REGION \
--set server.archival.visibility.state=enabled \
--set server.archival.visibility.enableRead=true \
--set server.archival.visibility.provider.s3store.region=$REGION \
--set server.namespaceDefaults.archival.history.state=enabled \
--set server.namespaceDefaults.archival.history.URI=s3://$TEMPORAL_ARCHIVAL_S3_BUCKET \
--set server.namespaceDefaults.archival.visibility.state=enabled \
--set server.namespaceDefaults.archival.visibility.URI=s3://$TEMPORAL_ARCHIVAL_S3_BUCKET \
--set cassandra.enabled=false \
--set mysql.enabled=false \
--set postgresql.enabled=true \
--set prometheus.enabled=false \
--set grafana.enabled=false \
--set elasticsearch.enabled=false \
--set schema.setup.enabled=true \
--set schema.update.enabled=true \
--set web.enabled=true \
--set web.config.auth.enabled=true \
--set "web.additionalEnv[0].name"=TEMPORAL_AUTH_ENABLED \
--set-string "web.additionalEnv[0].value"=true \
--set "web.additionalEnv[1].name"=TEMPORAL_AUTH_ISSUER_URL \
--set "web.additionalEnv[1].value"="https://accounts.google.com" \
--set "web.additionalEnv[2].name"=TEMPORAL_AUTH_PROVIDER_URL \
--set "web.additionalEnv[2].value"="https://accounts.google.com/" \
--set "web.additionalEnv[3].name"=TEMPORAL_AUTH_CLIENT_ID \
--set "web.additionalEnv[3].valueFrom.secretKeyRef.name"="existing-secret-with-oidc-creds" \
--set "web.additionalEnv[3].valueFrom.secretKeyRef.key"="CLIENT_ID" \
--set "web.additionalEnv[4].name"=TEMPORAL_AUTH_CLIENT_SECRET \
--set "web.additionalEnv[4].valueFrom.secretKeyRef.name"="existing-secret-with-oidc-creds" \
--set "web.additionalEnv[4].valueFrom.secretKeyRef.key"="CLIENT_SECRET" \
--set "web.additionalEnv[5].name"=TEMPORAL_AUTH_CALLBACK_URL \
--set "web.additionalEnv[5].value"="https://${TEMPORAL_HOSTED_HOSTNAME}/auth/sso_callback" \
--set "web.additionalEnv[6].name"=TEMPORAL_AUTH_SCOPES \
--set-string "web.additionalEnv[6].value"="openid\,profile\,email" \
--set serviceAccount.create=true \
--set serviceAccount.name=temporal-archiver \
--set serviceAccount.extraAnnotations."eks\.amazonaws\.com\/role-arn"=$TEMPORAL_ARCHIVAL_SERVICE_ACCOUNT_ARN \