Files
restic-manager/internal/store/jobs.go
T
steve b6f8de1dcc lint: drive baseline to zero, drop only-new-issues gate
Cleanup pass over the repo so CI can enforce lint going forward
without the only-new-issues escape hatch:

* gofumpt -w across the tree (31 hits, all formatting)
* misspell --fix (25 hits, US-locale spelling) — but reverted on
  api.JobCancelled = "cancelled" since that literal is the wire +
  DB CHECK constraint value, plus matched the case in store/fleet.go
  back to "cancelled" and added //nolint:misspell on both for the
  next time someone reaches for the auto-fix
* Wrap every `defer rows.Close()` / `defer stmt.Close()` /
  `defer res.Body.Close()` in `defer func() { _ = .Close() }()`
  to satisfy errcheck without losing the close itself
* websocket.Dial callers (1 prod, 4 tests) now capture + close the
  upgrade response Body — coder/websocket can return res with a nil
  Body on success, so the test deferred-closes guard against that
* Annotate the two genuine-by-design nilerr cases with //nolint
  comments explaining why nil-on-error is the contract (cookie
  missing = no session; ctx cancelled mid-backoff = clean shutdown)
* Add brief godoc on the 10 exported const groups + types that
  revive flagged (api.HostOS/HostArch/JobKind/JobStatus/LogStream/
  ErrorCode, restic.EventKind, store.Role, web.FS)
* Drop the unused (*Server).userByID method
* Inline the unparam baseView(active) — every UI page is under
  the dashboard primary nav today

Result: `golangci-lint run ./...` reports 0 issues. CI lint job
no longer needs only-new-issues: true; X-06 follow-up entry in
tasks.md removed.
2026-05-03 16:15:17 +01:00

220 lines
6.2 KiB
Go

package store
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"time"
)
// Job mirrors the jobs table.
type Job struct {
ID string
HostID string
Kind string
Status string
ScheduledID *string
ActorKind string // user|schedule|system
ActorID *string
StartedAt *time.Time
FinishedAt *time.Time
ExitCode *int
Stats json.RawMessage
Error *string
CreatedAt time.Time
}
// CreateJob inserts a queued job. The agent will mark it running
// when it actually starts work. ScheduledID is set when the job
// originates from a cron fire (actor_kind="schedule"); nil for
// operator-driven run-now.
func (s *Store) CreateJob(ctx context.Context, j Job) error {
_, err := s.db.ExecContext(ctx,
`INSERT INTO jobs (id, host_id, kind, status, scheduled_id, actor_kind, actor_id, created_at)
VALUES (?, ?, ?, 'queued', ?, ?, ?, ?)`,
j.ID, j.HostID, j.Kind,
nullable(j.ScheduledID), j.ActorKind, nullable(j.ActorID),
j.CreatedAt.UTC().Format(time.RFC3339Nano))
if err != nil {
return fmt.Errorf("store: create job: %w", err)
}
return nil
}
// MarkJobStarted flips status to 'running' and records started_at.
func (s *Store) MarkJobStarted(ctx context.Context, id string, when time.Time) error {
res, err := s.db.ExecContext(ctx,
`UPDATE jobs
SET status = 'running', started_at = ?
WHERE id = ? AND status IN ('queued','running')`,
when.UTC().Format(time.RFC3339Nano), id)
if err != nil {
return fmt.Errorf("store: mark started: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return ErrNotFound
}
return nil
}
// MarkJobFinished records the terminal state.
func (s *Store) MarkJobFinished(ctx context.Context, id, status string, exitCode int, stats json.RawMessage, errMsg string, when time.Time) error {
if len(stats) == 0 {
stats = json.RawMessage("null")
}
res, err := s.db.ExecContext(ctx,
`UPDATE jobs
SET status = ?, finished_at = ?, exit_code = ?, stats = ?, error = ?
WHERE id = ?`,
status,
when.UTC().Format(time.RFC3339Nano),
exitCode, string(stats), nullableStr(errMsg), id)
if err != nil {
return fmt.Errorf("store: mark finished: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return ErrNotFound
}
return nil
}
// AppendJobLog records one line of agent output. seq is the agent's
// monotonic sequence number; gaps imply lost data.
func (s *Store) AppendJobLog(ctx context.Context, jobID string, seq int64, ts time.Time, stream, payload string) error {
_, err := s.db.ExecContext(ctx,
`INSERT INTO job_logs (job_id, seq, ts, stream, payload) VALUES (?,?,?,?,?)`,
jobID, seq, ts.UTC().Format(time.RFC3339Nano), stream, payload)
if err != nil {
return fmt.Errorf("store: append job log: %w", err)
}
return nil
}
// JobLogLine is one persisted log line, ready to render.
type JobLogLine struct {
Seq int64
TS time.Time
Stream string // stdout|stderr|event
Payload string
}
// ListJobLogs returns persisted log lines for a job in seq order.
// afterSeq lets pagers / reconnect-resuming clients fetch only the
// tail; passing 0 returns from the beginning. limit caps the result
// (0 means no cap).
func (s *Store) ListJobLogs(ctx context.Context, jobID string, afterSeq int64, limit int) ([]JobLogLine, error) {
q := `SELECT seq, ts, stream, payload FROM job_logs
WHERE job_id = ? AND seq > ? ORDER BY seq ASC`
args := []any{jobID, afterSeq}
if limit > 0 {
q += ` LIMIT ?`
args = append(args, limit)
}
rows, err := s.db.QueryContext(ctx, q, args...)
if err != nil {
return nil, fmt.Errorf("store: list job logs: %w", err)
}
defer func() { _ = rows.Close() }()
var out []JobLogLine
for rows.Next() {
var l JobLogLine
var ts string
if err := rows.Scan(&l.Seq, &ts, &l.Stream, &l.Payload); err != nil {
return nil, fmt.Errorf("store: scan job log: %w", err)
}
t, perr := time.Parse(time.RFC3339Nano, ts)
if perr != nil {
return nil, fmt.Errorf("store: parse job log ts: %w", perr)
}
l.TS = t
out = append(out, l)
}
return out, rows.Err()
}
// GetJob returns a job row.
func (s *Store) GetJob(ctx context.Context, id string) (*Job, error) {
row := s.db.QueryRowContext(ctx,
`SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
started_at, finished_at, exit_code, stats, error, created_at
FROM jobs WHERE id = ?`, id)
var (
j Job
schedID sql.NullString
actorID sql.NullString
startedAt sql.NullString
finishedAt sql.NullString
exitCode sql.NullInt64
stats sql.NullString
errMsg sql.NullString
createdAt string
)
if err := row.Scan(&j.ID, &j.HostID, &j.Kind, &j.Status, &schedID,
&j.ActorKind, &actorID, &startedAt, &finishedAt,
&exitCode, &stats, &errMsg, &createdAt); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("store: scan job: %w", err)
}
if schedID.Valid {
s := schedID.String
j.ScheduledID = &s
}
if actorID.Valid {
s := actorID.String
j.ActorID = &s
}
if startedAt.Valid {
t, _ := time.Parse(time.RFC3339Nano, startedAt.String)
j.StartedAt = &t
}
if finishedAt.Valid {
t, _ := time.Parse(time.RFC3339Nano, finishedAt.String)
j.FinishedAt = &t
}
if exitCode.Valid {
i := int(exitCode.Int64)
j.ExitCode = &i
}
if stats.Valid && stats.String != "" {
j.Stats = json.RawMessage(stats.String)
}
if errMsg.Valid {
s := errMsg.String
j.Error = &s
}
t, _ := time.Parse(time.RFC3339Nano, createdAt)
j.CreatedAt = t
return &j, nil
}
// HasJobOfKind reports whether any job of the given kind exists for
// this host, regardless of status. Used by the auto-init path on
// agent hello to decide whether to dispatch a fresh `restic init` —
// once we've tried once we don't auto-retry, even on failure
// (failed init usually means bad creds; retrying every reconnect
// just piles up failed rows). The operator can re-init manually via
// the Repo page's danger zone.
func (s *Store) HasJobOfKind(ctx context.Context, hostID, kind string) (bool, error) {
var n int
err := s.db.QueryRowContext(ctx,
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = ?`,
hostID, kind).Scan(&n)
if err != nil {
return false, fmt.Errorf("store: count jobs of kind: %w", err)
}
return n > 0, nil
}
func nullableStr(s string) any {
if s == "" {
return nil
}
return s
}