agent: P2-16 Windows service (SCM) integration

internal/agent/service: build-tagged into service_windows.go (svc.Handler
that listens for Stop/Shutdown + delegates to the agent loop) and
service_other.go (foreground stub for Linux/macOS). install_windows.go
wraps mgr.Connect+CreateService/Delete/Start/Stop for the new
'restic-manager-agent install|uninstall|start|stop' subcommands.

Cross-compile verified: GOOS=windows GOARCH=amd64 go build ./cmd/agent
succeeds. UNTESTED on Windows itself — the SCM round-trip can't be
exercised from Linux CI; treat as a starting point for the first
real Windows install.
This commit is contained in:
2026-05-04 11:13:56 +01:00
parent 4c81ff3e7b
commit 8062db1f2f
4 changed files with 262 additions and 0 deletions
+22
View File
@@ -17,6 +17,7 @@ import (
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/runner" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/runner"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/scheduler" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/scheduler"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/service"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient" "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api" "gitea.dcglab.co.uk/steve/restic-manager/internal/api"
@@ -33,6 +34,27 @@ func main() {
} }
func run() error { func run() error {
// Optional first positional verb for SCM control on Windows.
// `restic-manager-agent install|uninstall|start|stop` route into
// the service package; everything else falls through to the
// flag-driven default (which is what systemd / interactive runs
// hit). On non-Windows builds these verbs return a clear error.
if len(os.Args) > 1 {
switch os.Args[1] {
case "install":
return service.Install()
case "uninstall":
return service.Uninstall()
case "start":
return service.Start()
case "stop":
return service.Stop()
case "run":
// Strip the verb so flag.Parse sees the rest unchanged.
os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
}
}
configPath := flag.String("config", config.DefaultPath(), "path to agent.yaml") configPath := flag.String("config", config.DefaultPath(), "path to agent.yaml")
enrollServer := flag.String("enroll-server", "", "server URL (used with -enroll-token to perform first-run enrollment)") enrollServer := flag.String("enroll-server", "", "server URL (used with -enroll-token to perform first-run enrollment)")
enrollToken := flag.String("enroll-token", "", "one-time enrollment token (operator copies this from the UI)") enrollToken := flag.String("enroll-token", "", "one-time enrollment token (operator copies this from the UI)")
+103
View File
@@ -0,0 +1,103 @@
//go:build windows
// install_windows.go — thin wrappers around the Service Control
// Manager via golang.org/x/sys/windows/svc/mgr. Used by the agent's
// `install` / `uninstall` / `start` / `stop` subcommands.
//
// UNTESTED in CI. Mirrors the canonical example shape; if you need
// to extend this, prefer copying from x/sys/windows/svc/example
// over inventing new patterns.
package service
import (
"fmt"
"os"
"path/filepath"
"golang.org/x/sys/windows/svc/mgr"
)
// Install registers the service with the SCM, pointing it at the
// currently-running binary. The service starts on every boot and
// runs as LocalSystem (default).
func Install() error {
exe, err := os.Executable()
if err != nil {
return fmt.Errorf("install: locate executable: %w", err)
}
exe, err = filepath.Abs(exe)
if err != nil {
return fmt.Errorf("install: absolutise path: %w", err)
}
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("install: connect SCM: %w", err)
}
defer m.Disconnect()
if existing, err := m.OpenService(ServiceName); err == nil {
_ = existing.Close()
return fmt.Errorf("service %q already installed; uninstall first", ServiceName)
}
s, err := m.CreateService(ServiceName, exe, mgr.Config{
StartType: mgr.StartAutomatic,
DisplayName: "Restic-manager agent",
Description: "Backs up this host on the schedule the central restic-manager dictates.",
}, "run")
if err != nil {
return fmt.Errorf("install: create service: %w", err)
}
defer s.Close()
return nil
}
// Uninstall removes the service from the SCM. Caller is expected to
// stop the service first; this returns the SCM's error if it's
// still running.
func Uninstall() error {
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("uninstall: connect SCM: %w", err)
}
defer m.Disconnect()
s, err := m.OpenService(ServiceName)
if err != nil {
return fmt.Errorf("uninstall: open service: %w", err)
}
defer s.Close()
if err := s.Delete(); err != nil {
return fmt.Errorf("uninstall: delete service: %w", err)
}
return nil
}
// Start asks the SCM to start the installed service. No-op if it's
// already running (the SCM returns an error which we surface).
func Start() error {
m, err := mgr.Connect()
if err != nil {
return err
}
defer m.Disconnect()
s, err := m.OpenService(ServiceName)
if err != nil {
return err
}
defer s.Close()
return s.Start()
}
// Stop sends a stop control to the service.
func Stop() error {
m, err := mgr.Connect()
if err != nil {
return err
}
defer m.Disconnect()
s, err := m.OpenService(ServiceName)
if err != nil {
return err
}
defer s.Close()
_, err = s.Control(0x00000001) // SERVICE_CONTROL_STOP
return err
}
+44
View File
@@ -0,0 +1,44 @@
//go:build !windows
// service_other.go — non-Windows fallback for the service package.
// Linux uses systemd to wrap the agent; the binary itself just runs
// in the foreground. Run() therefore just executes the agent loop
// and returns. install/uninstall sub-commands return a clear error
// directing the operator at the install.sh + systemd unit shipped
// in deploy/install/.
package service
import (
"context"
"errors"
)
// AgentRun is the function-pointer shape main passes in. Same shape
// as the Windows variant so the call site is portable.
type AgentRun func(ctx context.Context) error
// Run executes the agent loop in the foreground; on Unix the
// systemd unit (or whatever runs us) supplies the lifecycle.
func Run(agentRun AgentRun) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
return agentRun(ctx)
}
// Install registers the agent as a service. Windows-only; on Unix
// the systemd unit covers this — returns an error pointing there.
func Install() error { return errUnsupported("install") }
// Uninstall is the inverse of Install. Windows-only.
func Uninstall() error { return errUnsupported("uninstall") }
// Start asks the OS service manager to start the installed service.
// Windows-only.
func Start() error { return errUnsupported("start") }
// Stop sends a stop signal to the installed service. Windows-only.
func Stop() error { return errUnsupported("stop") }
func errUnsupported(verb string) error {
return errors.New("service " + verb + " is Windows-only; use the systemd unit on Linux")
}
+93
View File
@@ -0,0 +1,93 @@
//go:build windows
// service_windows.go — Service Control Manager integration for the
// agent on Windows (P2-16). Implements the svc.Handler interface so
// `restic-manager-agent run` works under both interactive and SCM
// contexts. install/uninstall live in install_windows.go.
//
// UNTESTED on Windows in this repo's CI (the runners are Linux).
// The shape mirrors the canonical example in
// golang.org/x/sys/windows/svc/example. Treat any deviation from
// that example as suspicious.
package service
import (
"context"
"errors"
"log/slog"
"golang.org/x/sys/windows/svc"
)
// ServiceName is the SCM identifier for the agent service.
const ServiceName = "restic-manager-agent"
// AgentRun is the function the service handler calls to start the
// agent's main loop. Pass cmd/agent's run-loop entry point at the
// call site so this package stays free of cross-cmd imports.
type AgentRun func(ctx context.Context) error
// Run delegates to the SCM dispatcher when running under Windows
// service control, otherwise runs the agent loop in the foreground
// (for `restic-manager-agent run` from a console, e.g. while
// debugging on a developer's box).
func Run(agentRun AgentRun) error {
isService, err := svc.IsWindowsService()
if err != nil {
return err
}
if !isService {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
return agentRun(ctx)
}
return svc.Run(ServiceName, &handler{run: agentRun})
}
// handler implements svc.Handler. Execute is called once when the
// service is started. We spawn the agent loop in a goroutine and
// listen for SCM Stop / Shutdown notifications, cancelling the
// context to wind down cleanly.
type handler struct {
run AgentRun
}
func (h *handler) Execute(_ []string, req <-chan svc.ChangeRequest, status chan<- svc.Status) (bool, uint32) {
const accepted = svc.AcceptStop | svc.AcceptShutdown
status <- svc.Status{State: svc.StartPending}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
doneCh := make(chan error, 1)
go func() {
doneCh <- h.run(ctx)
}()
status <- svc.Status{State: svc.Running, Accepts: accepted}
for {
select {
case c := <-req:
switch c.Cmd {
case svc.Interrogate:
status <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
slog.Info("svc: stop requested")
cancel()
status <- svc.Status{State: svc.StopPending}
if err := <-doneCh; err != nil && !errors.Is(err, context.Canceled) {
slog.Warn("svc: agent loop exited with error", "err", err)
return false, 1
}
return false, 0
}
case err := <-doneCh:
// Agent loop exited on its own — uncommon (only via signal
// or fatal error). Surface as an SCM stop.
if err != nil && !errors.Is(err, context.Canceled) {
slog.Warn("svc: agent loop exited unexpectedly", "err", err)
return false, 1
}
return false, 0
}
}
}