Multi-Cluster Replication configuration

Hello guys.
We are trying to configure multicluster temporal. There are two temporal deployed to different kubernetes clusters. We created ingress for temporal web service and trying to add cluster connection to primary cluster and receive connection error.
primary:

  config:
    logLevel: "error"
    clusterMetadata:
      enableGlobalNamespace: true
      failoverVersionIncrement: 10
      currentClusterName: staging-primary
      masterClusterName: staging-primary
      clusterInformation:
        staging-primary:
          enabled: true
          initialFailoverVersion: 1
          rpcAddress: temporal-dev-primary.test-domain.com:443

backup:

  config:
    logLevel: "error"
    clusterMetadata:
      enableGlobalNamespace: true
      failoverVersionIncrement: 10
      currentClusterName: staging-backup
      masterClusterName: staging-backup
      clusterInformation:
        staging-backup:
          enabled: true
          initialFailoverVersion: 2
          rpcAddress: temporal-dev-backup.test-domain.com:443
k -n staging  port-forward svc/temporal-frontend  7233
tctl -address 127.0.0.1:7233 admin cluster upsert-remote-cluster --frontend_address "temporal-dev-backup.test-domain.com:443"
Error: Operation AddOrUpdateRemoteCluster failed.
Error Details: rpc error: code = Unavailable desc = last connection error: connection closed before server preface received
Stack trace:
goroutine 1 [running]:
runtime/debug.Stack()
	runtime/debug/stack.go:24 +0x65
runtime/debug.PrintStack()
	runtime/debug/stack.go:16 +0x19
github.com/temporalio/tctl/cli_curr.printError({0x2d42eff, 0x2a}, {0x317c840, 0xc000010058})
	github.com/temporalio/tctl/cli_curr/util.go:392 +0x21e
github.com/temporalio/tctl/cli_curr.ErrorAndExit({0x2d42eff?, 0x318e900?}, {0x317c840?, 0xc000010058?})
	github.com/temporalio/tctl/cli_curr/util.go:403 +0x28
github.com/temporalio/tctl/cli_curr.AdminAddOrUpdateRemoteCluster(0xc0001bf8c0)
	github.com/temporalio/tctl/cli_curr/adminClusterCommands.go:89 +0x147
github.com/temporalio/tctl/cli_curr.newAdminClusterCommands.func6(0xc0001bf8c0?)
	github.com/temporalio/tctl/cli_curr/admin.go:537 +0x19
github.com/urfave/cli.HandleAction({0x28a4e60?, 0x2dc5320?}, 0x15?)
	github.com/urfave/cli@v1.22.5/app.go:526 +0x50
github.com/urfave/cli.Command.Run({{0x2d0719c, 0x15}, {0x0, 0x0}, {0xc000285460, 0x1, 0x1}, {0x2d7331d, 0x3f}, {0x0, ...}, ...}, ...)
	github.com/urfave/cli@v1.22.5/command.go:173 +0x652
github.com/urfave/cli.(*App).RunAsSubcommand(0xc00083e1c0, 0xc0001bf600)
	github.com/urfave/cli@v1.22.5/app.go:405 +0x91b
github.com/urfave/cli.Command.startApp({{0x2ce5429, 0x7}, {0x0, 0x0}, {0xc000285650, 0x1, 0x1}, {0x2d1e536, 0x1e}, {0x0, ...}, ...}, ...)
	github.com/urfave/cli@v1.22.5/command.go:372 +0x6e7
github.com/urfave/cli.Command.Run({{0x2ce5429, 0x7}, {0x0, 0x0}, {0xc000285650, 0x1, 0x1}, {0x2d1e536, 0x1e}, {0x0, ...}, ...}, ...)
	github.com/urfave/cli@v1.22.5/command.go:102 +0x808
github.com/urfave/cli.(*App).RunAsSubcommand(0xc00083e000, 0xc0001bf4a0)
	github.com/urfave/cli@v1.22.5/app.go:405 +0x91b
github.com/urfave/cli.Command.startApp({{0x2ce0892, 0x5}, {0x0, 0x0}, {0xc0002855b0, 0x1, 0x1}, {0x2d00506, 0x13}, {0x0, ...}, ...}, ...)
	github.com/urfave/cli@v1.22.5/command.go:372 +0x6e7
github.com/urfave/cli.Command.Run({{0x2ce0892, 0x5}, {0x0, 0x0}, {0xc0002855b0, 0x1, 0x1}, {0x2d00506, 0x13}, {0x0, ...}, ...}, ...)
	github.com/urfave/cli@v1.22.5/command.go:102 +0x808
github.com/urfave/cli.(*App).Run(0xc0002a7500, {0xc00003c080, 0x8, 0x8})
	github.com/urfave/cli@v1.22.5/app.go:277 +0x8a7
main.main()
	./main.go:45 +0xa6

Service logs:

{"level":"error","ts":"2022-08-01T08:25:00.882Z","msg":"unavailable error","service":"frontend","error":"last connection error: connection closed before server preface received","logging-call-at":"adminHandler.go:1791","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/home/builder/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/service/frontend.(*AdminHandler).error\n\t/home/builder/temporal/service/frontend/adminHandler.go:1791\ngo.temporal.io/server/service/frontend.(*AdminHandler).AddOrUpdateRemoteCluster\n\t/home/builder/temporal/service/frontend/adminHandler.go:1001\ngo.temporal.io/server/api/adminservice/v1._AdminService_AddOrUpdateRemoteCluster_Handler.func1\n\t/home/builder/temporal/api/adminservice/v1/service.pb.go:928\ngo.temporal.io/server/common/rpc/interceptor.(*SDKVersionInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/sdk_version.go:64\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1117\ngo.temporal.io/server/common/authorization.(*interceptor).Interceptor\n\t/home/builder/temporal/common/authorization/interceptor.go:152\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*RateLimitInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/rate_limit.go:84\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*NamespaceRateLimitInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/namespace_rate_limit.go:89\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*NamespaceCountLimitInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/namespace_count_limit.go:99\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*NamespaceValidatorInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/namespace_validator.go:112\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*TelemetryInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/telemetry.go:135\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/metrics.NewServerMetricsContextInjectorInterceptor.func1\n\t/home/builder/temporal/common/metrics/grpc.go:66\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc.ServiceErrorInterceptor\n\t/home/builder/temporal/common/rpc/grpc.go:132\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*NamespaceLogInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/namespace_logger.go:84\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1122\ngo.temporal.io/server/api/adminservice/v1._AdminService_AddOrUpdateRemoteCluster_Handler\n\t/home/builder/temporal/api/adminservice/v1/service.pb.go:930\ngoogle.golang.org/grpc.(*Server).processUnaryRPC\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1283\ngoogle.golang.org/grpc.(*Server).handleStream\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1620\ngoogle.golang.org/grpc.(*Server).serveStreams.func1.2\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:922"}
{"level":"error","ts":"2022-08-01T08:25:00.882Z","msg":"unavailable error","operation":"AddOrUpdateRemoteCluster","error":"last connection error: connection closed before server preface received","logging-call-at":"telemetry.go:280","stacktrace":"go.temporal.io/server/common/log.(*zapLogger).Error\n\t/home/builder/temporal/common/log/zap_logger.go:142\ngo.temporal.io/server/common/rpc/interceptor.(*TelemetryInterceptor).handleError\n\t/home/builder/temporal/common/rpc/interceptor/telemetry.go:280\ngo.temporal.io/server/common/rpc/interceptor.(*TelemetryInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/telemetry.go:144\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/metrics.NewServerMetricsContextInjectorInterceptor.func1\n\t/home/builder/temporal/common/metrics/grpc.go:66\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc.ServiceErrorInterceptor\n\t/home/builder/temporal/common/rpc/grpc.go:132\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngo.temporal.io/server/common/rpc/interceptor.(*NamespaceLogInterceptor).Intercept\n\t/home/builder/temporal/common/rpc/interceptor/namespace_logger.go:84\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1.1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1120\ngoogle.golang.org/grpc.chainUnaryInterceptors.func1\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1122\ngo.temporal.io/server/api/adminservice/v1._AdminService_AddOrUpdateRemoteCluster_Handler\n\t/home/builder/temporal/api/adminservice/v1/service.pb.go:930\ngoogle.golang.org/grpc.(*Server).processUnaryRPC\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1283\ngoogle.golang.org/grpc.(*Server).handleStream\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:1620\ngoogle.golang.org/grpc.(*Server).serveStreams.func1.2\n\t/go/pkg/mod/google.golang.org/grpc@v1.47.0/server.go:922"}

I checked connection:

grpcurl temporal-dev-backup.test-domain.com:443 list
grpc.health.v1.Health
grpc.reflection.v1alpha.ServerReflection
temporal.api.operatorservice.v1.OperatorService
temporal.api.workflowservice.v1.WorkflowService
temporal.server.api.adminservice.v1.AdminService

temporal version: temporalio/server:1.17.1

last connection error: connection closed before server preface received

This is typically connection issues, do you have TLS enabled?

What do you get via:

tctl --address localhost:7233 cl h

or grpcurl:
grpcurl -plaintext -d '{"service": "temporal.api.workflowservice.v1.WorkflowService"}' 127.0.0.1:7233 grpc.health.v1.Health/Check

Can you also show result of

tctl admin cl d

tctl admin cl ls

1 Like
➜  ~ tctl --address localhost:7233 cl h
temporal.api.workflowservice.v1.WorkflowService: SERVING
➜  ~ grpcurl -plaintext -d '{"service": "temporal.api.workflowservice.v1.WorkflowService"}' 127.0.0.1:7233 grpc.health.v1.Health/Check
{
  "status": "SERVING"
}
➜  ~ tctl admin cl d
{
  "supportedClients": {
    "temporal-cli": "\u003c2.0.0",
    "temporal-go": "\u003c2.0.0",
    "temporal-java": "\u003c2.0.0",
    "temporal-php": "\u003c2.0.0",
    "temporal-server": "\u003c2.0.0",
    "temporal-typescript": "\u003c2.0.0",
    "temporal-ui": "\u003c3.0.0"
  },
  "serverVersion": "1.17.1",
  "membershipInfo": {
    "currentHost": {
      "identity": "10.221.137.28:7233"
    },
    "reachableMembers": [
      "10.221.137.28:6933",
      "10.221.131.13:6935",
      "10.221.138.24:6939",
      "10.221.133.25:6934"
    ],
    "rings": [
      {
        "role": "frontend",
        "memberCount": 1,
        "members": [
          {
            "identity": "10.221.137.28:7233"
          }
        ]
      },
      {
        "role": "history",
        "memberCount": 1,
        "members": [
          {
            "identity": "10.221.133.25:7234"
          }
        ]
      },
      {
        "role": "matching",
        "memberCount": 1,
        "members": [
          {
            "identity": "10.221.131.13:7235"
          }
        ]
      },
      {
        "role": "worker",
        "memberCount": 1,
        "members": [
          {
            "identity": "10.221.138.24:7239"
          }
        ]
      }
    ]
  },
  "clusterId": "9ab7462f-01ca-4733-889d-cef4b4413da4",
  "clusterName": "staging-backup",
  "historyShardCount": 512,
  "persistenceStore": "cassandra",
  "visibilityStore": "cassandra",
  "versionInfo": {
    "current": {
      "version": "1.17.1",
      "releaseTime": "2022-07-13T03:23:00Z",
      "notes": "Temporal 1.17.1"
    },
    "recommended": {
      "version": "1.17.2",
      "releaseTime": "2022-08-03T02:09:00Z",
      "notes": "Temporal 1.17.2"
    },
    "alerts": [
      {
        "message": "🪐 A new release is available!",
        "severity": "Low"
      }
    ],
    "lastUpdateTime": "2022-08-15T00:40:39.591810333Z"
  },
  "failoverVersionIncrement": "10",
  "initialFailoverVersion": "2",
  "isGlobalNamespaceEnabled": true
}
➜  ~ tctl admin cl ls
[
  {
    "cluster_name": "active",
    "history_shard_count": 512,
    "cluster_id": "dfa98122-9f3c-463c-9d58-c0367764e759",
    "version_info": {
      "current": {
        "version": "1.15.2",
        "release_time": "2022-03-04T23:00:00Z"
      },
      "recommended": {
        "version": "1.16.2",
        "release_time": "2022-05-09T22:33:00Z",
        "notes": "Temporal 1.16.2"
      },
      "alerts": [
        {
          "message": "🪐 A new release is available!",
          "severity": 3
        }
      ],
      "last_update_time": "2022-07-28T09:02:46.587866262Z"
    },
    "cluster_address": "127.0.0.1:7933",
    "failover_version_increment": 10,
    "initial_failover_version": 1,
    "is_connection_enabled": true
  },
  {
    "cluster_name": "staging-backup",
    "history_shard_count": 512,
    "cluster_id": "9ab7462f-01ca-4733-889d-cef4b4413da4",
    "version_info": {
      "current": {
        "version": "1.17.1",
        "release_time": "2022-07-13T03:23:00Z",
        "notes": "Temporal 1.17.1"
      },
      "recommended": {
        "version": "1.17.2",
        "release_time": "2022-08-03T02:09:00Z",
        "notes": "Temporal 1.17.2"
      },
      "alerts": [
        {
          "message": "🪐 A new release is available!",
          "severity": 3
        }
      ],
      "last_update_time": "2022-08-15T00:40:39.591810333Z"
    },
    "cluster_address": "127.0.0.1:7933",
    "failover_version_increment": 10,
    "initial_failover_version": 2,
    "is_global_namespace_enabled": true,
    "is_connection_enabled": true
  }
]
tctl --address temporal-dev-backup.test-domain.com:443 cl h
Error: Unable to get "temporal.api.workflowservice.v1.WorkflowService" health check status.
Error Details: rpc error: code = Unavailable desc = connection closed before server preface received
('export TEMPORAL_CLI_SHOW_STACKS=1' to see stack traces)
k -n staging-backup-datashop  get ingress temporal-ha-frontend -o yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  annotations:
    nginx.ingress.kubernetes.io/backend-protocol: GRPC
    nginx.ingress.kubernetes.io/server-snippet: grpc_read_timeout 3600s; grpc_send_timeout
      3600s; client_body_timeout 3600s; http2_idle_timeout 10s; keepalive_timeout
      10s;
  labels:
  name: temporal-ha-frontend
  namespace: dev-backup
spec:
  ingressClassName: nginx
  rules:
  - host: temporal-dev-backup.test-domain.com
    http:
      paths:
      - backend:
          service:
            name: temporal-frontend
            port:
              name: grpc-rpc
        path: /
        pathType: ImplementationSpecific
  tls:
  - hosts:
    - temporal-dev-backup.test-domain.com
    secretName: test-domain-com