From 56b9e785e5cbd3de918d50d5d01cdb98fa26b102 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 10 Feb 2023 11:51:39 +0000 Subject: [PATCH 1/6] build: don't kill remote controller after build We don't know if other builds might be running, etc, so we should allow the server to decide when to exit. Signed-off-by: Justin Chadwell --- commands/build.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/commands/build.go b/commands/build.go index cf9612da..9cf8334b 100644 --- a/commands/build.go +++ b/commands/build.go @@ -392,10 +392,6 @@ func launchControllerAndRunBuild(dockerCli command.Cli, options buildOptions) er if err := c.Disconnect(ctx, ref); err != nil { logrus.Warnf("disconnect error: %v", err) } - // If "invoke" isn't specified, further inspection ins't provided. Finish the buildx server. - if err := c.Kill(ctx); err != nil { - return err - } } return nil } From 1b91bc2e02c6a4db397df6a6a4ea6cb01944fe2d Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 10 Feb 2023 12:02:52 +0000 Subject: [PATCH 2/6] controller: add more informative server exit messages When exiting, we should ideally always print a message, and give details as to exactly what error we received. Signed-off-by: Justin Chadwell --- controller/remote/controller.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/controller/remote/controller.go b/controller/remote/controller.go index beb21abe..a13f27af 100644 --- a/controller/remote/controller.go +++ b/controller/remote/controller.go @@ -151,18 +151,21 @@ func serveCmd(dockerCli command.Cli) *cobra.Command { errCh <- errors.Wrapf(err, "error on serving via socket %q", addr) } }() + var s os.Signal sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, os.Interrupt) select { - case s = <-sigCh: - logrus.Debugf("got signal %v", s) case err := <-errCh: + logrus.Errorf("got error %s, exiting", err) return err + case s = <-sigCh: + logrus.Infof("got signal %s, exiting", s) + return nil case <-doneCh: + logrus.Infof("rpc server done, exiting") + return nil } - return nil - }, } From abda257763a170c67c8f1f660a7c348af550eb47 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 10 Feb 2023 12:33:35 +0000 Subject: [PATCH 3/6] controller: exit cleanly on SIGTERM This signal may be sent using an external tool such as pkill, and since we can handle it neatly, we should. Signed-off-by: Justin Chadwell --- controller/remote/controller.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/controller/remote/controller.go b/controller/remote/controller.go index a13f27af..3f51529b 100644 --- a/controller/remote/controller.go +++ b/controller/remote/controller.go @@ -154,7 +154,8 @@ func serveCmd(dockerCli command.Cli) *cobra.Command { var s os.Signal sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, os.Interrupt) + signal.Notify(sigCh, syscall.SIGINT) + signal.Notify(sigCh, syscall.SIGTERM) select { case err := <-errCh: logrus.Errorf("got error %s, exiting", err) From d0d29168a587a414ad5ff364a713ac6f167211c9 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 10 Feb 2023 14:22:00 +0000 Subject: [PATCH 4/6] controller: use grpc with contexts for improved timeouts This patch improves timeout logic for testing and creating a buildx server. Instead of needing to have a custom loop, we can just use the primitives provided by go and grpc for easier handling. Additionally, without the explicit time.Sleeps, we defer to GRPCs exponential backoff algorithm which should provide substantially better results. Signed-off-by: Justin Chadwell --- controller/remote/client.go | 5 ++-- controller/remote/controller.go | 53 ++++++++++++++++----------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/controller/remote/client.go b/controller/remote/client.go index 3ce5850e..68921fcb 100644 --- a/controller/remote/client.go +++ b/controller/remote/client.go @@ -20,20 +20,21 @@ import ( "google.golang.org/grpc/credentials/insecure" ) -func NewClient(addr string) (*Client, error) { +func NewClient(ctx context.Context, addr string) (*Client, error) { backoffConfig := backoff.DefaultConfig backoffConfig.MaxDelay = 3 * time.Second connParams := grpc.ConnectParams{ Backoff: backoffConfig, } gopts := []grpc.DialOption{ + grpc.WithBlock(), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithConnectParams(connParams), grpc.WithContextDialer(dialer.ContextDialer), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)), grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)), } - conn, err := grpc.Dial(dialer.DialAddress(addr), gopts...) + conn, err := grpc.DialContext(ctx, dialer.DialAddress(addr), gopts...) if err != nil { return nil, err } diff --git a/controller/remote/controller.go b/controller/remote/controller.go index 3f51529b..01c20d2d 100644 --- a/controller/remote/controller.go +++ b/controller/remote/controller.go @@ -52,10 +52,21 @@ func NewRemoteBuildxController(ctx context.Context, dockerCli command.Cli, opts rootDir = rootDataDir(dockerCli) } serverRoot := filepath.Join(rootDir, "shared") - c, err := newBuildxClientAndCheck(filepath.Join(serverRoot, "buildx.sock"), 1, 0) + + // connect to buildx server if it is already running + ctx2, cancel := context.WithTimeout(ctx, 1*time.Second) + c, err := newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, "buildx.sock")) + cancel() if err != nil { + if !errors.Is(err, context.DeadlineExceeded) { + return nil, errors.Wrap(err, "cannot connect to the buildx server") + } + } else { + return &buildxController{c, serverRoot}, nil + } + + // start buildx server via subcommand logrus.Info("no buildx server found; launching...") - // start buildx server via subcommand launchFlags := []string{} if opts.ServerConfig != "" { launchFlags = append(launchFlags, "--config", opts.ServerConfig) @@ -69,11 +80,14 @@ func NewRemoteBuildxController(ctx context.Context, dockerCli command.Cli, opts return nil, err } go wait() - c, err = newBuildxClientAndCheck(filepath.Join(serverRoot, "buildx.sock"), 10, time.Second) + + // wait for buildx server to be ready + ctx2, cancel = context.WithTimeout(ctx, 10*time.Second) + c, err = newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, "buildx.sock")) + cancel() if err != nil { return nil, errors.Wrap(err, "cannot connect to the buildx server") } - } return &buildxController{c, serverRoot}, nil } @@ -180,15 +194,14 @@ func getLogFilePath(dockerCli command.Cli, configPath string) (string, error) { if err != nil { return "", err } - logFile := config.LogFile - if logFile == "" { + if config.LogFile == "" { root, err := prepareRootDir(dockerCli, config) if err != nil { return "", err } - logFile = filepath.Join(root, "log") + return filepath.Join(root, "log"), nil } - return logFile, nil + return config.LogFile, nil } func getConfig(dockerCli command.Cli, configPath string) (*serverConfig, error) { @@ -232,34 +245,18 @@ func rootDataDir(dockerCli command.Cli) string { return filepath.Join(confutil.ConfigDir(dockerCli), "controller") } -func newBuildxClientAndCheck(addr string, checkNum int, duration time.Duration) (*Client, error) { - c, err := NewClient(addr) +func newBuildxClientAndCheck(ctx context.Context, addr string) (*Client, error) { + c, err := NewClient(ctx, addr) if err != nil { return nil, err } - var lastErr error - for i := 0; i < checkNum; i++ { - _, err := c.List(context.TODO()) - if err == nil { - lastErr = nil - break - } - err = errors.Wrapf(err, "failed to access server (tried %d times)", i) - logrus.Debugf("connection failure: %v", err) - lastErr = err - time.Sleep(duration) - } - if lastErr != nil { - return nil, lastErr - } - p, v, r, err := c.Version(context.TODO()) + p, v, r, err := c.Version(ctx) if err != nil { return nil, err } logrus.Debugf("connected to server (\"%v %v %v\")", p, v, r) if !(p == version.Package && v == version.Version && r == version.Revision) { - logrus.Warnf("version mismatch (server: \"%v %v %v\", client: \"%v %v %v\"); please kill and restart buildx server", - p, v, r, version.Package, version.Version, version.Revision) + return nil, errors.Errorf("version mismatch (client: \"%v %v %v\", server: \"%v %v %v\")", version.Package, version.Version, version.Revision, p, v, r) } return c, nil } From ed9ea2476d5d463443436cce7188623de799622a Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 10 Feb 2023 14:24:55 +0000 Subject: [PATCH 5/6] controller: use unique files per buildx version This ensures that we should never accidentally connect to a server with a mismatched version, while also allowing us to run multiple buildx servers at a time. Signed-off-by: Justin Chadwell --- controller/remote/controller.go | 52 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/controller/remote/controller.go b/controller/remote/controller.go index 01c20d2d..7ba70ff6 100644 --- a/controller/remote/controller.go +++ b/controller/remote/controller.go @@ -35,6 +35,12 @@ const ( serveCommandName = "_INTERNAL_SERVE" ) +var ( + defaultLogFilename = fmt.Sprintf("buildx.%s.log", version.Revision) + defaultSocketFilename = fmt.Sprintf("buildx.%s.sock", version.Revision) + defaultPIDFilename = fmt.Sprintf("buildx.%s.pid", version.Revision) +) + type serverConfig struct { // Specify buildx server root Root string `toml:"root"` @@ -55,7 +61,7 @@ func NewRemoteBuildxController(ctx context.Context, dockerCli command.Cli, opts // connect to buildx server if it is already running ctx2, cancel := context.WithTimeout(ctx, 1*time.Second) - c, err := newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, "buildx.sock")) + c, err := newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, defaultSocketFilename)) cancel() if err != nil { if !errors.Is(err, context.DeadlineExceeded) { @@ -66,28 +72,28 @@ func NewRemoteBuildxController(ctx context.Context, dockerCli command.Cli, opts } // start buildx server via subcommand - logrus.Info("no buildx server found; launching...") - launchFlags := []string{} - if opts.ServerConfig != "" { - launchFlags = append(launchFlags, "--config", opts.ServerConfig) - } - logFile, err := getLogFilePath(dockerCli, opts.ServerConfig) - if err != nil { - return nil, err - } - wait, err := launch(ctx, logFile, append([]string{serveCommandName}, launchFlags...)...) - if err != nil { - return nil, err - } - go wait() + logrus.Info("no buildx server found; launching...") + launchFlags := []string{} + if opts.ServerConfig != "" { + launchFlags = append(launchFlags, "--config", opts.ServerConfig) + } + logFile, err := getLogFilePath(dockerCli, opts.ServerConfig) + if err != nil { + return nil, err + } + wait, err := launch(ctx, logFile, append([]string{serveCommandName}, launchFlags...)...) + if err != nil { + return nil, err + } + go wait() // wait for buildx server to be ready ctx2, cancel = context.WithTimeout(ctx, 10*time.Second) - c, err = newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, "buildx.sock")) + c, err = newBuildxClientAndCheck(ctx2, filepath.Join(serverRoot, defaultSocketFilename)) cancel() - if err != nil { - return nil, errors.Wrap(err, "cannot connect to the buildx server") - } + if err != nil { + return nil, errors.Wrap(err, "cannot connect to the buildx server") + } return &buildxController{c, serverRoot}, nil } @@ -124,7 +130,7 @@ func serveCmd(dockerCli command.Cli) *cobra.Command { if err != nil { return err } - pidF := filepath.Join(root, "pid") + pidF := filepath.Join(root, defaultPIDFilename) if err := os.WriteFile(pidF, []byte(fmt.Sprintf("%d", os.Getpid())), 0600); err != nil { return err } @@ -141,7 +147,7 @@ func serveCmd(dockerCli command.Cli) *cobra.Command { defer b.Close() // serve server - addr := filepath.Join(root, "buildx.sock") + addr := filepath.Join(root, defaultSocketFilename) if err := os.Remove(addr); err != nil && !os.IsNotExist(err) { // avoid EADDRINUSE return err } @@ -199,7 +205,7 @@ func getLogFilePath(dockerCli command.Cli, configPath string) (string, error) { if err != nil { return "", err } - return filepath.Join(root, "log"), nil + return filepath.Join(root, defaultLogFilename), nil } return config.LogFile, nil } @@ -267,7 +273,7 @@ type buildxController struct { } func (c *buildxController) Kill(ctx context.Context) error { - pidB, err := os.ReadFile(filepath.Join(c.serverRoot, "pid")) + pidB, err := os.ReadFile(filepath.Join(c.serverRoot, defaultPIDFilename)) if err != nil { return err } From 54f4dc8f6e224e3842225b508715f42b065b6ad4 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Tue, 14 Feb 2023 11:17:09 +0000 Subject: [PATCH 6/6] controller: set absolute path of server binary before execution Signed-off-by: Justin Chadwell --- controller/remote/controller.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/controller/remote/controller.go b/controller/remote/controller.go index 7ba70ff6..3e3303a6 100644 --- a/controller/remote/controller.go +++ b/controller/remote/controller.go @@ -296,7 +296,12 @@ func (c *buildxController) Kill(ctx context.Context) error { } func launch(ctx context.Context, logFile string, args ...string) (func() error, error) { - bCmd := exec.CommandContext(ctx, os.Args[0], args...) + // set absolute path of binary, since we set the working directory to the root + pathname, err := filepath.Abs(os.Args[0]) + if err != nil { + return nil, err + } + bCmd := exec.CommandContext(ctx, pathname, args...) if logFile != "" { f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil {