From d0d29168a587a414ad5ff364a713ac6f167211c9 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 10 Feb 2023 14:22:00 +0000 Subject: [PATCH] controller: use grpc with contexts for improved timeouts This patch improves timeout logic for testing and creating a buildx server. Instead of needing to have a custom loop, we can just use the primitives provided by go and grpc for easier handling. Additionally, without the explicit time.Sleeps, we defer to GRPCs exponential backoff algorithm which should provide substantially better results. Signed-off-by: Justin Chadwell --- controller/remote/client.go | 5 ++-- controller/remote/controller.go | 53 ++++++++++++++++----------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/controller/remote/client.go b/controller/remote/client.go index 3ce5850e..68921fcb 100644 --- a/controller/remote/client.go +++ b/controller/remote/client.go @@ -20,20 +20,21 @@ import ( "google.golang.org/grpc/credentials/insecure" ) -func NewClient(addr string) (*Client, error) { +func NewClient(ctx context.Context, addr string) (*Client, error) { backoffConfig := backoff.DefaultConfig backoffConfig.MaxDelay = 3 * time.Second connParams := grpc.ConnectParams{ Backoff: backoffConfig, } gopts := []grpc.DialOption{ + grpc.WithBlock(), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithConnectParams(connParams), grpc.WithContextDialer(dialer.ContextDialer), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)), grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)), } - conn, err := grpc.Dial(dialer.DialAddress(addr), gopts...) + conn, err := grpc.DialContext(ctx, dialer.DialAddress(addr), gopts...) if err != nil { return nil, err } diff --git a/controller/remote/controller.go b/controller/remote/controller.go index 3f51529b..01c20d2d 100644 --- a/controller/remote/controller.go +++ b/controller/remote/controller.go @@ -52,10 +52,21 @@ func NewRemoteBuildxController(ctx context.Context, dockerCli command.Cli, opts rootDir = rootDataDir(dockerCli) } serverRoot := filepath.Join(rootDir, "shared") - c, err := newBuildxClientAndCheck(filepath.Join(serverRoot, "buildx.sock"), 1, 0) + + // connect to buildx server if it is already running + ctx2, cancel := context.WithTimeout(ctx, 1*time.Second) + c, err := newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, "buildx.sock")) + cancel() if err != nil { + if !errors.Is(err, context.DeadlineExceeded) { + return nil, errors.Wrap(err, "cannot connect to the buildx server") + } + } else { + return &buildxController{c, serverRoot}, nil + } + + // start buildx server via subcommand logrus.Info("no buildx server found; launching...") - // start buildx server via subcommand launchFlags := []string{} if opts.ServerConfig != "" { launchFlags = append(launchFlags, "--config", opts.ServerConfig) @@ -69,11 +80,14 @@ func NewRemoteBuildxController(ctx context.Context, dockerCli command.Cli, opts return nil, err } go wait() - c, err = newBuildxClientAndCheck(filepath.Join(serverRoot, "buildx.sock"), 10, time.Second) + + // wait for buildx server to be ready + ctx2, cancel = context.WithTimeout(ctx, 10*time.Second) + c, err = newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, "buildx.sock")) + cancel() if err != nil { return nil, errors.Wrap(err, "cannot connect to the buildx server") } - } return &buildxController{c, serverRoot}, nil } @@ -180,15 +194,14 @@ func getLogFilePath(dockerCli command.Cli, configPath string) (string, error) { if err != nil { return "", err } - logFile := config.LogFile - if logFile == "" { + if config.LogFile == "" { root, err := prepareRootDir(dockerCli, config) if err != nil { return "", err } - logFile = filepath.Join(root, "log") + return filepath.Join(root, "log"), nil } - return logFile, nil + return config.LogFile, nil } func getConfig(dockerCli command.Cli, configPath string) (*serverConfig, error) { @@ -232,34 +245,18 @@ func rootDataDir(dockerCli command.Cli) string { return filepath.Join(confutil.ConfigDir(dockerCli), "controller") } -func newBuildxClientAndCheck(addr string, checkNum int, duration time.Duration) (*Client, error) { - c, err := NewClient(addr) +func newBuildxClientAndCheck(ctx context.Context, addr string) (*Client, error) { + c, err := NewClient(ctx, addr) if err != nil { return nil, err } - var lastErr error - for i := 0; i < checkNum; i++ { - _, err := c.List(context.TODO()) - if err == nil { - lastErr = nil - break - } - err = errors.Wrapf(err, "failed to access server (tried %d times)", i) - logrus.Debugf("connection failure: %v", err) - lastErr = err - time.Sleep(duration) - } - if lastErr != nil { - return nil, lastErr - } - p, v, r, err := c.Version(context.TODO()) + p, v, r, err := c.Version(ctx) if err != nil { return nil, err } logrus.Debugf("connected to server (\"%v %v %v\")", p, v, r) if !(p == version.Package && v == version.Version && r == version.Revision) { - logrus.Warnf("version mismatch (server: \"%v %v %v\", client: \"%v %v %v\"); please kill and restart buildx server", - p, v, r, version.Package, version.Version, version.Revision) + return nil, errors.Errorf("version mismatch (client: \"%v %v %v\", server: \"%v %v %v\")", version.Package, version.Version, version.Revision, p, v, r) } return c, nil }