I’m attempting to deploy a self-hosted control plane with mTLS enabled for the internode communications. The system worker appears to be unable to connect when I enabled requireClientAuth
on the internode TLS config.
I’ve deployed the same certificate to the system worker as all the other control plane services, and I’ve also setup the internal-frontend since I’ve seen comments on here and GitHub suggesting it should help.
The error I get is as follows:
fatal error creating sdk client {"service": "worker", "error": "failed reaching server: last connection error: connection error: desc = \"error reading server preface: remote error: tls: bad certificate\"", "logging-call-at": "factory.go:128"}
To me this suggests that the (internal-)frontend is rejecting the system worker’s client certificate, which is odd since it’s using the same certificate as all the other control plane services.
I’ve got a full debug log (sanitized and redacted) which includes the config below. Would really appreciate some help as this has got me stumped!
TEMPORAL_ADDRESS is not set, setting it to fd20:14c:ccb7:5000:0:9:0:76:7233
2024/06/12 22:41:14 Loading config; env=docker,zone=,configDir=config
2024/06/12 22:41:14 Loading config files=[config/docker.yaml]
2024-06-12T22:41:14.191Z info Build info. {"git-time": "2024-06-12T06:12:52.000Z", "git-revision": "8d4f6c5cda5beacc10186f8584df3dc42cd7e2a0", "git-modified": true, "go-arch": "amd64", "go-os": "linux", "go-version": "go1.22.3", "cgo-enabled": false, "server-version": "1.23.0", "debug-mode": false, "logging-call-at": "main.go:146"}
2024-06-12T22:41:14.191Z info Updated dynamic config {"logging-call-at": "file_based_client.go:195"}
2024-06-12T22:41:14.191Z warn Not using any authorizer and flag `--allow-no-auth` not detected. Future versions will require using the flag `--allow-no-auth` if you do not want to set an authorizer. {"logging-call-at": "main.go:176"}
go.temporal.io/server/common/log.(*zapLogger).Warn
go.temporal.io/server@v1.23.0/common/log/zap_logger.go:148
main.buildCLI.func2
github.com/example/workflows/cmd/server/main.go:176
github.com/urfave/cli/v2.(*Command).Run
github.com/urfave/cli/v2@v2.4.0/command.go:163
github.com/urfave/cli/v2.(*App).RunContext
github.com/urfave/cli/v2@v2.4.0/app.go:313
github.com/urfave/cli/v2.(*App).Run
github.com/urfave/cli/v2@v2.4.0/app.go:224
main.main
github.com/example/workflows/cmd/server/main.go:53
runtime.main
runtime/proc.go:271
2024-06-12T22:41:14.228Z info loading certificate from file {"tls-cert-file": "/var/tls/internode/tls.crt", "logging-call-at": "local_store_cert_provider.go:297"}
2024-06-12T22:41:14.228Z info loading private key from file {"tls-key-file": "/var/tls/internode/tls.key", "logging-call-at": "local_store_cert_provider.go:310"}
2024-06-12T22:41:14.229Z info loading CA certs from {"tls-cert-files": ["/var/tls/internode/ca.crt"], "logging-call-at": "local_store_cert_provider.go:457"}
2024-06-12T22:41:14.229Z info loading CA certs from {"tls-cert-files": ["/var/tls/internode/ca.crt"], "logging-call-at": "local_store_cert_provider.go:457"}
2024-06-12T22:41:14.229Z info loading certificate from file {"tls-cert-file": "/var/tls/frontend/tls.crt", "logging-call-at": "local_store_cert_provider.go:297"}
2024-06-12T22:41:14.229Z info loading private key from file {"tls-key-file": "/var/tls/frontend/tls.key", "logging-call-at": "local_store_cert_provider.go:310"}
2024-06-12T22:41:14.229Z info loading CA certs from {"tls-cert-files": ["/var/tls/internode/ca.crt", "/var/tls/client/ca.crt"], "logging-call-at": "local_store_cert_provider.go:457"}
2024-06-12T22:41:14.230Z info loading CA certs from {"tls-cert-files": ["/var/tls/frontend/ca.crt"], "logging-call-at": "local_store_cert_provider.go:457"}
2024-06-12T22:41:14.230Z info loading certificate from file {"tls-cert-file": "/var/tls/internode/tls.crt", "logging-call-at": "local_store_cert_provider.go:297"}
2024-06-12T22:41:14.230Z info loading private key from file {"tls-key-file": "/var/tls/internode/tls.key", "logging-call-at": "local_store_cert_provider.go:310"}
2024-06-12T22:41:14.230Z info loading CA certs from {"tls-cert-files": ["/var/tls/internode/ca.crt"], "logging-call-at": "local_store_cert_provider.go:457"}
2024-06-12T22:41:14.231Z debug initialized custom fxevent.Logger {"component": "fx", "function": "go.temporal.io/server/temporal.init.func8()", "logging-call-at": "fx.go:1163"}
2024-06-12T22:41:14.231Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/pprof.LifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.231Z debug invoking {"component": "fx", "function": "go.temporal.io/server/temporal.init.func1()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.231Z debug invoking {"component": "fx", "function": "go.temporal.io/server/temporal.ServerLifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.251Z info Use rpc address 127.0.0.1:7933 for cluster active. {"component": "metadata-initializer", "logging-call-at": "fx.go:724"}
2024-06-12T22:41:14.251Z info Service is not requested, skipping initialization. {"service": "history", "logging-call-at": "fx.go:472"}
2024-06-12T22:41:14.251Z info Service is not requested, skipping initialization. {"service": "matching", "logging-call-at": "fx.go:493"}
2024-06-12T22:41:14.252Z info Service is not requested, skipping initialization. {"service": "frontend", "logging-call-at": "fx.go:522"}
2024-06-12T22:41:14.252Z info Service is not requested, skipping initialization. {"service": "internal-frontend", "logging-call-at": "fx.go:522"}
2024-06-12T22:41:14.262Z debug initialized custom fxevent.Logger {"component": "fx", "function": "go.temporal.io/server/temporal.init.func8()", "logging-call-at": "fx.go:1163"}
2024-06-12T22:41:14.262Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/persistence/client.BeanLifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.282Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/cluster.MetadataLifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.282Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/namespace.RegistryLifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.283Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/metrics.RuntimeMetricsReporterLifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.283Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/resource.RegisterBootstrapContainer()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.283Z debug invoking {"component": "fx", "function": "go.temporal.io/server/common/deadlock.init.func1()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.283Z debug invoking {"component": "fx", "function": "go.temporal.io/server/service.initPersistenceLazyLoadedServiceResolver()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.287Z info Initialized service resolver for persistence rate limiting {"service": "worker", "service": "worker", "logging-call-at": "fx.go:92"}
2024-06-12T22:41:14.287Z debug invoking {"component": "fx", "function": "go.temporal.io/server/service/worker.ServiceLifetimeHooks()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.288Z info historyClient: ownership caching disabled {"service": "worker", "logging-call-at": "client.go:91"}
2024-06-12T22:41:14.303Z info creating new visibility manager {"visibility_plugin_name": "postgres12", "visibility_index_name": "temporal_visibility", "logging-call-at": "factory.go:166"}
2024-06-12T22:41:14.305Z debug invoking {"component": "fx", "function": "reflect.makeFuncStub()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.305Z debug invoking {"component": "fx", "function": "reflect.makeFuncStub()", "module": "", "logging-call-at": "fx.go:1124"}
2024-06-12T22:41:14.305Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/common/pprof.LifetimeHooks.func1()", "caller": "go.temporal.io/server/common/pprof.LifetimeHooks", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.305Z debug OnStart hook executed {"component": "fx", "callee": "go.temporal.io/server/common/pprof.LifetimeHooks.func1()", "caller": "go.temporal.io/server/common/pprof.LifetimeHooks", "runtime": "10.119µs", "logging-call-at": "fx.go:1054"}
2024-06-12T22:41:14.305Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/temporal.init.func2.startAll.1()", "caller": "go.temporal.io/server/temporal.init.func2", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.305Z debug OnStart hook executed {"component": "fx", "callee": "go.temporal.io/server/temporal.init.func2.startAll.1()", "caller": "go.temporal.io/server/temporal.init.func2", "runtime": "305ns", "logging-call-at": "fx.go:1054"}
2024-06-12T22:41:14.305Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/temporal.(*ServerImpl).Start-fm()", "caller": "go.temporal.io/server/temporal.ServerLifetimeHooks", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.305Z info Starting server for services {"value": {"worker":{}}, "logging-call-at": "server_impl.go:93"}
2024-06-12T22:41:14.305Z info PProf listen on {"host": "localhost", "port": 7936, "logging-call-at": "pprof.go:82"}
2024-06-12T22:41:14.310Z debug archival:
history:
enableRead: true
provider:
filestore: null
gstorage:
credentialsPath: ""
s3store: null
state: enabled
visibility:
enableRead: true
provider:
filestore: null
gstorage:
credentialsPath: ""
s3store: null
state: enabled
clusterMetadata:
clusterInformation:
active:
enabled: true
initialFailoverVersion: 1
rpcAddress: 127.0.0.1:7933
currentClusterName: active
enableGlobalNamespace: false
failoverVersionIncrement: 10
masterClusterName: active
tags: {}
dcRedirectionPolicy:
policy: noop
dynamicConfigClient:
filepath: /etc/temporal/dynamic_config/dynamic_config.yaml
pollInterval: 10s
global:
authorization:
authExtraHeaderName: ""
authHeaderName: ""
authorizer: ""
claimMapper: ""
jwtKeyProvider:
keySourceURIs: []
refreshInterval: 0s
permissionsClaimName: ""
membership:
broadcastAddress: 10.1.3.118
maxJoinDuration: 30s
pprof:
host: ""
port: 7936
tls:
expirationChecks:
checkInterval: 1h0m0s
errorWindow: 12h0m0s
warningWindow: 24h0m0s
frontend:
client:
disableHostVerification: false
forceTLS: false
rootCaFiles:
- /var/tls/frontend/ca.crt
serverName: frontend.example.com
hostOverrides: {}
server:
certFile: /var/tls/frontend/tls.crt
clientCaFiles:
- /var/tls/internode/ca.crt
- /var/tls/client/ca.crt
keyFile: /var/tls/frontend/tls.key
requireClientAuth: true
internode:
client:
disableHostVerification: false
forceTLS: false
rootCaFiles:
- /var/tls/internode/ca.crt
serverName: internode.example.com
hostOverrides: {}
server:
certFile: /var/tls/internode/tls.crt
clientCaFiles:
- /var/tls/internode/ca.crt
keyFile: /var/tls/internode/tls.key
requireClientAuth: true
refreshInterval: 10m0s
remoteClusters: {}
systemWorker:
certFile: /var/tls/internode/tls.crt
client:
disableHostVerification: false
forceTLS: false
rootCaFiles:
- /var/tls/internode/ca.crt
serverName: internode.example.com
keyFile: /var/tls/internode/tls.key
log:
development: true
format: console
level: debug
outputFile: ""
stdout: true
namespaceDefaults: REDACTED
otel: {}
persistence: REDACTED
publicClient:
forceTLSConfig: ""
hostPort: ""
services:
frontend:
rpc:
bindOnIP: 0.0.0.0
bindOnLocalHost: false
grpcPort: 7233
httpAdditionalForwardedHeaders: []
httpPort: 0
membershipPort: 6933
history:
rpc:
bindOnIP: 0.0.0.0
bindOnLocalHost: false
grpcPort: 7234
httpAdditionalForwardedHeaders: []
httpPort: 0
membershipPort: 6934
internal-frontend:
rpc:
bindOnIP: 0.0.0.0
bindOnLocalHost: false
grpcPort: 7236
httpAdditionalForwardedHeaders: []
httpPort: 0
membershipPort: 6936
matching:
rpc:
bindOnIP: 0.0.0.0
bindOnLocalHost: false
grpcPort: 7235
httpAdditionalForwardedHeaders: []
httpPort: 0
membershipPort: 6935
worker:
rpc:
bindOnIP: 0.0.0.0
bindOnLocalHost: false
grpcPort: 7239
httpAdditionalForwardedHeaders: []
httpPort: 0
membershipPort: 6939
{"logging-call-at": "server_impl.go:94"}
2024-06-12T22:41:14.336Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/common/cluster.MetadataLifetimeHooks.func1()", "caller": "go.temporal.io/server/common/cluster.MetadataLifetimeHooks", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.338Z debug OnStart hook executed {"component": "fx", "callee": "go.temporal.io/server/common/cluster.MetadataLifetimeHooks.func1()", "caller": "go.temporal.io/server/common/cluster.MetadataLifetimeHooks", "runtime": "1.830071ms", "logging-call-at": "fx.go:1054"}
2024-06-12T22:41:14.338Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/common/namespace.RegistryLifetimeHooks.func1()", "caller": "go.temporal.io/server/common/namespace.RegistryLifetimeHooks", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.340Z debug OnStart hook executed {"component": "fx", "callee": "go.temporal.io/server/common/namespace.RegistryLifetimeHooks.func1()", "caller": "go.temporal.io/server/common/namespace.RegistryLifetimeHooks", "runtime": "1.936675ms", "logging-call-at": "fx.go:1054"}
2024-06-12T22:41:14.340Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/common/metrics.RuntimeMetricsReporterLifetimeHooks.func1()", "caller": "go.temporal.io/server/common/metrics.RuntimeMetricsReporterLifetimeHooks", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.341Z info RuntimeMetricsReporter started {"service": "worker", "logging-call-at": "runtime.go:138"}
2024-06-12T22:41:14.341Z debug OnStart hook executed {"component": "fx", "callee": "go.temporal.io/server/common/metrics.RuntimeMetricsReporterLifetimeHooks.func1()", "caller": "go.temporal.io/server/common/metrics.RuntimeMetricsReporterLifetimeHooks", "runtime": "538.112µs", "logging-call-at": "fx.go:1054"}
2024-06-12T22:41:14.341Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/common/deadlock.init.func1.1()", "caller": "go.temporal.io/server/common/deadlock.init.func1", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.341Z debug OnStart hook executed {"component": "fx", "callee": "go.temporal.io/server/common/deadlock.init.func1.1()", "caller": "go.temporal.io/server/common/deadlock.init.func1", "runtime": "11.515µs", "logging-call-at": "fx.go:1054"}
2024-06-12T22:41:14.341Z debug OnStart hook executing {"component": "fx", "callee": "go.temporal.io/server/service/worker.(*Service).Start-fm()", "caller": "go.temporal.io/server/service/worker.ServiceLifetimeHooks", "logging-call-at": "fx.go:1040"}
2024-06-12T22:41:14.341Z info worker starting {"service": "worker", "component": "worker", "logging-call-at": "service.go:343"}
2024-06-12T22:41:14.342Z debug starting ping check {"service": "worker", "name": "namespace registry lock", "logging-call-at": "deadlock.go:200"}
2024-06-12T22:41:14.342Z debug ping check succeeded {"service": "worker", "name": "namespace registry lock", "logging-call-at": "deadlock.go:219"}
2024-06-12T22:41:14.342Z debug starting ping check {"service": "worker", "name": "cluster metadata lock", "logging-call-at": "deadlock.go:200"}
2024-06-12T22:41:14.342Z debug ping check succeeded {"service": "worker", "name": "cluster metadata lock", "logging-call-at": "deadlock.go:219"}
2024-06-12T22:41:14.343Z debug starting ping check {"service": "worker", "name": "cluster metadata callback lock", "logging-call-at": "deadlock.go:200"}
2024-06-12T22:41:14.343Z debug ping check succeeded {"service": "worker", "name": "cluster metadata callback lock", "logging-call-at": "deadlock.go:219"}
2024-06-12T22:41:14.347Z debug Membership heartbeat upserted successfully {"address": "10.1.3.118", "port": 6939, "hostId": "dfccf8ea-290c-11ef-a2de-520d6ad4cf05", "logging-call-at": "monitor.go:244"}
2024-06-12T22:41:14.347Z info Membership heartbeat upserted successfully {"address": "10.1.3.118", "port": 6939, "hostId": "dfccf8ea-290c-11ef-a2de-520d6ad4cf05", "logging-call-at": "monitor.go:306"}
2024-06-12T22:41:14.349Z info bootstrap hosts fetched {"bootstrap-hostports": "10.1.2.123:6933,10.1.3.113:6935,10.1.3.118:6939,10.1.3.117:6936,10.1.2.120:6934", "logging-call-at": "monitor.go:348"}
2024-06-12T22:41:14.357Z info Current reachable members {"component": "service-resolver", "service": "internal-frontend", "addresses": ["10.1.3.117:7236"], "logging-call-at": "service_resolver.go:275"}
2024-06-12T22:41:14.357Z info Current reachable members {"component": "service-resolver", "service": "worker", "addresses": ["10.1.3.118:7239"], "logging-call-at": "service_resolver.go:275"}
2024-06-12T22:41:14.358Z info Current reachable members {"component": "service-resolver", "service": "frontend", "addresses": ["10.1.2.123:7233"], "logging-call-at": "service_resolver.go:275"}
2024-06-12T22:41:14.359Z info Current reachable members {"component": "service-resolver", "service": "history", "addresses": ["10.1.2.120:7234"], "logging-call-at": "service_resolver.go:275"}
2024-06-12T22:41:14.360Z info Current reachable members {"component": "service-resolver", "service": "matching", "addresses": ["10.1.3.113:7235"], "logging-call-at": "service_resolver.go:275"}
2024-06-12T22:41:14.368Z debug Membership heartbeat upserted successfully {"address": "10.1.3.118", "port": 6939, "hostId": "dfccf8ea-290c-11ef-a2de-520d6ad4cf05", "logging-call-at": "monitor.go:244"}
2024-06-12T22:41:14.380Z warn error creating sdk client {"service": "worker", "error": "failed reaching server: last connection error: connection error: desc = \"error reading server preface: remote error: tls: bad certificate\"", "logging-call-at": "factory.go:121"}
go.temporal.io/server/common/log.(*zapLogger).Warn
go.temporal.io/server@v1.23.0/common/log/zap_logger.go:148
go.temporal.io/server/common/sdk.(*clientFactory).GetSystemClient.func1.1
go.temporal.io/server@v1.23.0/common/sdk/factory.go:121
go.temporal.io/server/common/backoff.ThrottleRetry.func1
go.temporal.io/server@v1.23.0/common/backoff/retry.go:117
go.temporal.io/server/common/backoff.ThrottleRetryContext
go.temporal.io/server@v1.23.0/common/backoff/retry.go:143
go.temporal.io/server/common/backoff.ThrottleRetry
go.temporal.io/server@v1.23.0/common/backoff/retry.go:118
go.temporal.io/server/common/sdk.(*clientFactory).GetSystemClient.func1
go.temporal.io/server@v1.23.0/common/sdk/factory.go:116
sync.(*Once).doSlow
sync/once.go:74
sync.(*Once).Do
sync/once.go:65
go.temporal.io/server/common/sdk.(*clientFactory).GetSystemClient
go.temporal.io/server@v1.23.0/common/sdk/factory.go:115
go.temporal.io/server/service/worker/scanner.(*Scanner).startWorkflowWithRetry.func1
go.temporal.io/server@v1.23.0/service/worker/scanner/scanner.go:261
go.temporal.io/server/common/backoff.ThrottleRetryContext
go.temporal.io/server@v1.23.0/common/backoff/retry.go:143
go.temporal.io/server/service/worker/scanner.(*Scanner).startWorkflowWithRetry
go.temporal.io/server@v1.23.0/service/worker/scanner/scanner.go:258
2024-06-12T22:41:14.380Z fatal error creating sdk client {"service": "worker", "error": "failed reaching server: last connection error: connection error: desc = \"error reading server preface: remote error: tls: bad certificate\"", "logging-call-at": "factory.go:128"}
go.temporal.io/server/common/log.(*zapLogger).Fatal
go.temporal.io/server@v1.23.0/common/log/zap_logger.go:180
go.temporal.io/server/common/sdk.(*clientFactory).GetSystemClient.func1
go.temporal.io/server@v1.23.0/common/sdk/factory.go:128
sync.(*Once).doSlow
sync/once.go:74
sync.(*Once).Do
sync/once.go:65
go.temporal.io/server/common/sdk.(*clientFactory).GetSystemClient
go.temporal.io/server@v1.23.0/common/sdk/factory.go:115
go.temporal.io/server/service/worker/scanner.(*Scanner).startWorkflowWithRetry.func1
go.temporal.io/server@v1.23.0/service/worker/scanner/scanner.go:261
go.temporal.io/server/common/backoff.ThrottleRetryContext
go.temporal.io/server@v1.23.0/common/backoff/retry.go:143
go.temporal.io/server/service/worker/scanner.(*Scanner).startWorkflowWithRetry
go.temporal.io/server@v1.23.0/service/worker/scanner/scanner.go:258