diff --git a/cmd/agent/main.go b/cmd/agent/main.go index 1bf1954..ac43d3c 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -17,6 +17,7 @@ import ( "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/runner" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/scheduler" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets" + "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/service" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient" "gitea.dcglab.co.uk/steve/restic-manager/internal/api" @@ -33,6 +34,27 @@ func main() { } func run() error { + // Optional first positional verb for SCM control on Windows. + // `restic-manager-agent install|uninstall|start|stop` route into + // the service package; everything else falls through to the + // flag-driven default (which is what systemd / interactive runs + // hit). On non-Windows builds these verbs return a clear error. + if len(os.Args) > 1 { + switch os.Args[1] { + case "install": + return service.Install() + case "uninstall": + return service.Uninstall() + case "start": + return service.Start() + case "stop": + return service.Stop() + case "run": + // Strip the verb so flag.Parse sees the rest unchanged. + os.Args = append([]string{os.Args[0]}, os.Args[2:]...) + } + } + configPath := flag.String("config", config.DefaultPath(), "path to agent.yaml") enrollServer := flag.String("enroll-server", "", "server URL (used with -enroll-token to perform first-run enrollment)") enrollToken := flag.String("enroll-token", "", "one-time enrollment token (operator copies this from the UI)") diff --git a/internal/agent/service/install_windows.go b/internal/agent/service/install_windows.go new file mode 100644 index 0000000..2092b53 --- /dev/null +++ b/internal/agent/service/install_windows.go @@ -0,0 +1,103 @@ +//go:build windows + +// install_windows.go — thin wrappers around the Service Control +// Manager via golang.org/x/sys/windows/svc/mgr. Used by the agent's +// `install` / `uninstall` / `start` / `stop` subcommands. +// +// UNTESTED in CI. Mirrors the canonical example shape; if you need +// to extend this, prefer copying from x/sys/windows/svc/example +// over inventing new patterns. +package service + +import ( + "fmt" + "os" + "path/filepath" + + "golang.org/x/sys/windows/svc/mgr" +) + +// Install registers the service with the SCM, pointing it at the +// currently-running binary. The service starts on every boot and +// runs as LocalSystem (default). +func Install() error { + exe, err := os.Executable() + if err != nil { + return fmt.Errorf("install: locate executable: %w", err) + } + exe, err = filepath.Abs(exe) + if err != nil { + return fmt.Errorf("install: absolutise path: %w", err) + } + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("install: connect SCM: %w", err) + } + defer m.Disconnect() + if existing, err := m.OpenService(ServiceName); err == nil { + _ = existing.Close() + return fmt.Errorf("service %q already installed; uninstall first", ServiceName) + } + s, err := m.CreateService(ServiceName, exe, mgr.Config{ + StartType: mgr.StartAutomatic, + DisplayName: "Restic-manager agent", + Description: "Backs up this host on the schedule the central restic-manager dictates.", + }, "run") + if err != nil { + return fmt.Errorf("install: create service: %w", err) + } + defer s.Close() + return nil +} + +// Uninstall removes the service from the SCM. Caller is expected to +// stop the service first; this returns the SCM's error if it's +// still running. +func Uninstall() error { + m, err := mgr.Connect() + if err != nil { + return fmt.Errorf("uninstall: connect SCM: %w", err) + } + defer m.Disconnect() + s, err := m.OpenService(ServiceName) + if err != nil { + return fmt.Errorf("uninstall: open service: %w", err) + } + defer s.Close() + if err := s.Delete(); err != nil { + return fmt.Errorf("uninstall: delete service: %w", err) + } + return nil +} + +// Start asks the SCM to start the installed service. No-op if it's +// already running (the SCM returns an error which we surface). +func Start() error { + m, err := mgr.Connect() + if err != nil { + return err + } + defer m.Disconnect() + s, err := m.OpenService(ServiceName) + if err != nil { + return err + } + defer s.Close() + return s.Start() +} + +// Stop sends a stop control to the service. +func Stop() error { + m, err := mgr.Connect() + if err != nil { + return err + } + defer m.Disconnect() + s, err := m.OpenService(ServiceName) + if err != nil { + return err + } + defer s.Close() + _, err = s.Control(0x00000001) // SERVICE_CONTROL_STOP + return err +} diff --git a/internal/agent/service/service_other.go b/internal/agent/service/service_other.go new file mode 100644 index 0000000..d6d27c2 --- /dev/null +++ b/internal/agent/service/service_other.go @@ -0,0 +1,44 @@ +//go:build !windows + +// service_other.go — non-Windows fallback for the service package. +// Linux uses systemd to wrap the agent; the binary itself just runs +// in the foreground. Run() therefore just executes the agent loop +// and returns. install/uninstall sub-commands return a clear error +// directing the operator at the install.sh + systemd unit shipped +// in deploy/install/. +package service + +import ( + "context" + "errors" +) + +// AgentRun is the function-pointer shape main passes in. Same shape +// as the Windows variant so the call site is portable. +type AgentRun func(ctx context.Context) error + +// Run executes the agent loop in the foreground; on Unix the +// systemd unit (or whatever runs us) supplies the lifecycle. +func Run(agentRun AgentRun) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + return agentRun(ctx) +} + +// Install registers the agent as a service. Windows-only; on Unix +// the systemd unit covers this — returns an error pointing there. +func Install() error { return errUnsupported("install") } + +// Uninstall is the inverse of Install. Windows-only. +func Uninstall() error { return errUnsupported("uninstall") } + +// Start asks the OS service manager to start the installed service. +// Windows-only. +func Start() error { return errUnsupported("start") } + +// Stop sends a stop signal to the installed service. Windows-only. +func Stop() error { return errUnsupported("stop") } + +func errUnsupported(verb string) error { + return errors.New("service " + verb + " is Windows-only; use the systemd unit on Linux") +} diff --git a/internal/agent/service/service_windows.go b/internal/agent/service/service_windows.go new file mode 100644 index 0000000..f045ada --- /dev/null +++ b/internal/agent/service/service_windows.go @@ -0,0 +1,93 @@ +//go:build windows + +// service_windows.go — Service Control Manager integration for the +// agent on Windows (P2-16). Implements the svc.Handler interface so +// `restic-manager-agent run` works under both interactive and SCM +// contexts. install/uninstall live in install_windows.go. +// +// UNTESTED on Windows in this repo's CI (the runners are Linux). +// The shape mirrors the canonical example in +// golang.org/x/sys/windows/svc/example. Treat any deviation from +// that example as suspicious. +package service + +import ( + "context" + "errors" + "log/slog" + + "golang.org/x/sys/windows/svc" +) + +// ServiceName is the SCM identifier for the agent service. +const ServiceName = "restic-manager-agent" + +// AgentRun is the function the service handler calls to start the +// agent's main loop. Pass cmd/agent's run-loop entry point at the +// call site so this package stays free of cross-cmd imports. +type AgentRun func(ctx context.Context) error + +// Run delegates to the SCM dispatcher when running under Windows +// service control, otherwise runs the agent loop in the foreground +// (for `restic-manager-agent run` from a console, e.g. while +// debugging on a developer's box). +func Run(agentRun AgentRun) error { + isService, err := svc.IsWindowsService() + if err != nil { + return err + } + if !isService { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + return agentRun(ctx) + } + return svc.Run(ServiceName, &handler{run: agentRun}) +} + +// handler implements svc.Handler. Execute is called once when the +// service is started. We spawn the agent loop in a goroutine and +// listen for SCM Stop / Shutdown notifications, cancelling the +// context to wind down cleanly. +type handler struct { + run AgentRun +} + +func (h *handler) Execute(_ []string, req <-chan svc.ChangeRequest, status chan<- svc.Status) (bool, uint32) { + const accepted = svc.AcceptStop | svc.AcceptShutdown + status <- svc.Status{State: svc.StartPending} + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + doneCh := make(chan error, 1) + go func() { + doneCh <- h.run(ctx) + }() + status <- svc.Status{State: svc.Running, Accepts: accepted} + + for { + select { + case c := <-req: + switch c.Cmd { + case svc.Interrogate: + status <- c.CurrentStatus + case svc.Stop, svc.Shutdown: + slog.Info("svc: stop requested") + cancel() + status <- svc.Status{State: svc.StopPending} + if err := <-doneCh; err != nil && !errors.Is(err, context.Canceled) { + slog.Warn("svc: agent loop exited with error", "err", err) + return false, 1 + } + return false, 0 + } + case err := <-doneCh: + // Agent loop exited on its own — uncommon (only via signal + // or fatal error). Surface as an SCM stop. + if err != nil && !errors.Is(err, context.Canceled) { + slog.Warn("svc: agent loop exited unexpectedly", "err", err) + return false, 1 + } + return false, 0 + } + } +}