Allow more frequent keep-alive checking on server
Fixes #2711 (closed)
Fixes #2715 (closed) (this is context cancellation because of connection closure)
When keepalives were added to the various gitaly clients, the serverside keepalive enforcement was left as default. It turns out this default is a minimum of 5 minutes between keepalives. This means our client keepalive of 20 seconds is actually causing the connection to eventually be discarded for breaching enforcement.
This was replicated by sleeping in an RPC for several minutes:
TestSuccessfulFetchInternalRemote: fetch_internal_remote_test.go:44:
Error Trace: fetch_internal_remote_test.go:44
Error: Received unexpected error:
rpc error: code = Unavailable desc = transport is closing
Test: TestSuccessfulFetchInternalRemote
--- FAIL: TestSuccessfulFetchInternalRemote (160.04s)
Test changes:
diff --git a/internal/service/remote/fetch_internal_remote.go b/internal/service/remote/fetch_internal_remote.go
index 6365e3ce..e178aaaf 100644
--- a/internal/service/remote/fetch_internal_remote.go
+++ b/internal/service/remote/fetch_internal_remote.go
@@ -3,6 +3,7 @@ package remote
import (
"context"
"fmt"
+ "time"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
"gitlab.com/gitlab-org/gitaly/internal/git"
@@ -20,6 +21,9 @@ const (
// FetchInternalRemote fetches another Gitaly repository set as a remote
func (s *server) FetchInternalRemote(ctx context.Context, req *gitalypb.FetchInternalRemoteRequest) (*gitalypb.FetchInternalRemoteResponse, error) {
+
+ time.Sleep(170 * time.Second)
+
if err := validateFetchInternalRemoteRequest(req); err != nil {
return nil, status.Errorf(codes.InvalidArgument, "FetchInternalRemote: %v", err)
}
diff --git a/internal/service/remote/testhelper_test.go b/internal/service/remote/testhelper_test.go
index 1d07353e..1e9e6ed3 100644
--- a/internal/service/remote/testhelper_test.go
+++ b/internal/service/remote/testhelper_test.go
@@ -4,12 +4,14 @@ import (
"log"
"os"
"testing"
+ "time"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/internal/rubyserver"
"gitlab.com/gitlab-org/gitaly/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"google.golang.org/grpc"
+ "google.golang.org/grpc/keepalive"
"google.golang.org/grpc/reflection"
)
@@ -47,6 +49,10 @@ func runRemoteServiceServer(t *testing.T) (string, func()) {
func NewRemoteClient(t *testing.T, serverSocketPath string) (gitalypb.RemoteServiceClient, *grpc.ClientConn) {
connOpts := []grpc.DialOption{
grpc.WithInsecure(),
+ grpc.WithKeepaliveParams(keepalive.ClientParameters{
+ Time: 20 * time.Second,
+ PermitWithoutStream: true,
+ }),
}
conn, err := grpc.Dial(serverSocketPath, connOpts...)
if err != nil {
These test changes are too slow/impractical to add as automated testing as it stands.
This bug is likely the cause of https://gitlab.com/gitlab-org/gitaly/-/issues/2555
Edited by GitLab Release Tools Bot