From d73aabca96ceca0b2902622d78b9b1ea42c8f206 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Sun, 21 Jun 2026 23:51:29 +0100 Subject: [PATCH] feat(mail): RFC822 message parsing (headers, body, attachments) Co-Authored-By: Claude Sonnet 4.6 --- go.mod | 1 + go.sum | 33 ++++++++ internal/mail/message.go | 98 ++++++++++++++++++++++ internal/mail/message_test.go | 70 ++++++++++++++++ internal/mail/testdata/with_attachment.eml | 18 ++++ 5 files changed, 220 insertions(+) create mode 100644 internal/mail/message.go create mode 100644 internal/mail/message_test.go create mode 100644 internal/mail/testdata/with_attachment.eml diff --git a/go.mod b/go.mod index 4b6d471..79b9d76 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require modernc.org/sqlite v1.53.0 require ( github.com/dustin/go-humanize v1.0.1 // indirect + github.com/emersion/go-message v0.18.2 // indirect github.com/google/uuid v1.6.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect diff --git a/go.sum b/go.sum index b054032..3f9f4c3 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/emersion/go-message v0.18.2 h1:rl55SQdjd9oJcIoQNhubD2Acs1E6IzlZISRTK7x/Lpg= +github.com/emersion/go-message v0.18.2/go.mod h1:XpJyL70LwRvq2a8rVbHXikPgKj8+aI0kGdHlg16ibYA= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -12,15 +14,46 @@ github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOF github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4= golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8= golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c= modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws= diff --git a/internal/mail/message.go b/internal/mail/message.go new file mode 100644 index 0000000..ae1fd1b --- /dev/null +++ b/internal/mail/message.go @@ -0,0 +1,98 @@ +// Package mail provides IMAP reading and RFC822 message parsing. +package mail + +import ( + "bytes" + "io" + "strings" + + "github.com/emersion/go-message/mail" +) + +type Header struct { + UID uint32 + From string + To string + Subject string + Date string + MessageID string + HasAttachments bool +} + +type Attachment struct { + Name string + Size int + MIME string + Content []byte +} + +type Message struct { + Header Header + BodyText string + Attachments []Attachment +} + +func readHeader(mr *mail.Reader, uid uint32) Header { + h := Header{UID: uid} + hd := mr.Header + h.Subject, _ = hd.Subject() + if addrs, err := hd.AddressList("From"); err == nil && len(addrs) > 0 { + h.From = addrs[0].String() + } + if addrs, err := hd.AddressList("To"); err == nil && len(addrs) > 0 { + h.To = addrs[0].String() + } + if d, err := hd.Date(); err == nil { + h.Date = d.UTC().Format("Mon, 02 Jan 2006 15:04:05 -0700") + } + if msgID, err := hd.MessageID(); err == nil { + h.MessageID = msgID + } else { + h.MessageID = strings.Trim(hd.Get("Message-Id"), "<> ") + } + return h +} + +// ParseMessage decodes the full message including attachment contents. +func ParseMessage(uid uint32, raw []byte) (Message, error) { + mr, err := mail.CreateReader(bytes.NewReader(raw)) + if err != nil { + return Message{}, err + } + m := Message{Header: readHeader(mr, uid)} + for { + part, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return Message{}, err + } + switch hdr := part.Header.(type) { + case *mail.InlineHeader: + ct, _, _ := hdr.ContentType() + if strings.HasPrefix(ct, "text/plain") && m.BodyText == "" { + b, _ := io.ReadAll(part.Body) + m.BodyText = string(b) + } + case *mail.AttachmentHeader: + name, _ := hdr.Filename() + ct, _, _ := hdr.ContentType() + b, _ := io.ReadAll(part.Body) + m.Attachments = append(m.Attachments, Attachment{ + Name: name, Size: len(b), MIME: ct, Content: b, + }) + } + } + m.Header.HasAttachments = len(m.Attachments) > 0 + return m, nil +} + +// ParseHeaderOnly decodes headers and detects attachments without keeping bodies. +func ParseHeaderOnly(uid uint32, raw []byte) (Header, error) { + m, err := ParseMessage(uid, raw) + if err != nil { + return Header{}, err + } + return m.Header, nil +} diff --git a/internal/mail/message_test.go b/internal/mail/message_test.go new file mode 100644 index 0000000..30ecad3 --- /dev/null +++ b/internal/mail/message_test.go @@ -0,0 +1,70 @@ +package mail + +import ( + "os" + "path/filepath" + "testing" +) + +func loadFixture(t *testing.T, name string) []byte { + t.Helper() + b, err := os.ReadFile(filepath.Join("testdata", name)) + if err != nil { + t.Fatalf("read fixture: %v", err) + } + return b +} + +func TestParseMessage(t *testing.T) { + raw := loadFixture(t, "with_attachment.eml") + m, err := ParseMessage(42, raw) + if err != nil { + t.Fatalf("ParseMessage: %v", err) + } + if m.Header.UID != 42 { + t.Fatalf("uid: %d", m.Header.UID) + } + if m.Header.Subject != "Your Invoice #5" { + t.Fatalf("subject: %q", m.Header.Subject) + } + if m.Header.From != `"Bob" ` && m.Header.From != "Bob " { + t.Fatalf("from: %q", m.Header.From) + } + if m.Header.MessageID != "abc123@trusted.com" && m.Header.MessageID != "" { + t.Fatalf("message-id: %q", m.Header.MessageID) + } + if want := "Hello, here is your invoice."; !contains(m.BodyText, want) { + t.Fatalf("body=%q want contains %q", m.BodyText, want) + } + if !m.Header.HasAttachments { + t.Fatal("HasAttachments should be true") + } + if len(m.Attachments) != 1 || m.Attachments[0].Name != "invoice.txt" { + t.Fatalf("attachments: %+v", m.Attachments) + } + if !contains(string(m.Attachments[0].Content), "LINE-ITEM 1") { + t.Fatalf("attachment content: %q", m.Attachments[0].Content) + } +} + +func TestParseHeaderOnly(t *testing.T) { + raw := loadFixture(t, "with_attachment.eml") + h, err := ParseHeaderOnly(7, raw) + if err != nil { + t.Fatalf("ParseHeaderOnly: %v", err) + } + if h.Subject != "Your Invoice #5" || !h.HasAttachments { + t.Fatalf("header: %+v", h) + } +} + +func contains(s, sub string) bool { + return len(s) >= len(sub) && (func() bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false + }()) +} diff --git a/internal/mail/testdata/with_attachment.eml b/internal/mail/testdata/with_attachment.eml new file mode 100644 index 0000000..e569f7c --- /dev/null +++ b/internal/mail/testdata/with_attachment.eml @@ -0,0 +1,18 @@ +From: "Bob" +To: me@example.com +Subject: Your Invoice #5 +Date: Sat, 20 Jun 2026 10:00:00 +0000 +Message-ID: +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/plain; charset=utf-8 + +Hello, here is your invoice. +--BOUNDARY +Content-Type: text/plain; charset=utf-8 +Content-Disposition: attachment; filename="invoice.txt" + +LINE-ITEM 1: 100.00 +--BOUNDARY--