eb63d8cbc1
Replaces imapdown.py with a multi-file Go implementation using github.com/emersion/go-imap/v2. All features preserved: SSL/STARTTLS, incremental UID-based downloads, attachment extraction to zip, modified UTF-7 folder name decoding, and full-mode safety checks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
228 lines
4.8 KiB
Go
228 lines
4.8 KiB
Go
package main
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"mime"
|
|
"mime/multipart"
|
|
"net/mail"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// EmailMessage represents a parsed email message
|
|
type EmailMessage struct {
|
|
UID uint32
|
|
Date time.Time
|
|
Subject string
|
|
Raw []byte
|
|
Parsed *mail.Message
|
|
}
|
|
|
|
// ParseEmailMessage parses raw RFC822 email data
|
|
func ParseEmailMessage(raw []byte, uid uint32) (*EmailMessage, error) {
|
|
msg, err := mail.ReadMessage(bytes.NewReader(raw))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse email: %w", err)
|
|
}
|
|
|
|
return &EmailMessage{
|
|
UID: uid,
|
|
Date: GetMessageDate(msg),
|
|
Subject: GetMessageSubject(msg),
|
|
Raw: raw,
|
|
Parsed: msg,
|
|
}, nil
|
|
}
|
|
|
|
// GetMessageDate extracts the date from an email message
|
|
// Falls back to current time if date cannot be parsed
|
|
func GetMessageDate(msg *mail.Message) time.Time {
|
|
dateStr := msg.Header.Get("Date")
|
|
if dateStr == "" {
|
|
return time.Now()
|
|
}
|
|
|
|
parsed, err := mail.ParseDate(dateStr)
|
|
if err != nil {
|
|
return time.Now()
|
|
}
|
|
|
|
return parsed
|
|
}
|
|
|
|
// GetMessageSubject extracts and decodes the subject from an email message
|
|
// Returns "no_subject" if subject is empty
|
|
func GetMessageSubject(msg *mail.Message) string {
|
|
subject := msg.Header.Get("Subject")
|
|
if subject == "" {
|
|
return "no_subject"
|
|
}
|
|
|
|
// Decode RFC 2047 encoded-words
|
|
decoded := DecodeHeaderValue(subject)
|
|
if decoded == "" {
|
|
return "no_subject"
|
|
}
|
|
|
|
return decoded
|
|
}
|
|
|
|
// DecodeHeaderValue decodes RFC 2047 encoded-words in headers
|
|
func DecodeHeaderValue(encoded string) string {
|
|
dec := new(mime.WordDecoder)
|
|
decoded, err := dec.DecodeHeader(encoded)
|
|
if err != nil {
|
|
return encoded
|
|
}
|
|
return decoded
|
|
}
|
|
|
|
// ExtractAttachments extracts attachments from an email and saves them to a zip file
|
|
// Returns the number of attachments extracted
|
|
func ExtractAttachments(msg *mail.Message, emlPath string) (int, error) {
|
|
mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type"))
|
|
if err != nil {
|
|
// Not a multipart message or invalid content-type
|
|
return 0, nil
|
|
}
|
|
|
|
if !strings.HasPrefix(mediaType, "multipart/") {
|
|
// Not a multipart message
|
|
return 0, nil
|
|
}
|
|
|
|
attachments := make([]attachment, 0)
|
|
|
|
// Parse multipart message
|
|
boundary := params["boundary"]
|
|
if boundary == "" {
|
|
return 0, nil
|
|
}
|
|
|
|
mr := multipart.NewReader(msg.Body, boundary)
|
|
if err := extractPartsRecursive(mr, &attachments); err != nil {
|
|
// Ignore errors in attachment extraction
|
|
if len(attachments) == 0 {
|
|
return 0, nil
|
|
}
|
|
}
|
|
|
|
if len(attachments) == 0 {
|
|
return 0, nil
|
|
}
|
|
|
|
// Create zip file
|
|
zipPath := strings.TrimSuffix(emlPath, filepath.Ext(emlPath)) + ".zip"
|
|
zipFile, err := os.Create(zipPath)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("failed to create zip file: %w", err)
|
|
}
|
|
defer zipFile.Close()
|
|
|
|
zw := zip.NewWriter(zipFile)
|
|
defer zw.Close()
|
|
|
|
// Track duplicate filenames
|
|
seenNames := make(map[string]int)
|
|
|
|
for _, att := range attachments {
|
|
filename := att.filename
|
|
|
|
// Handle duplicate names
|
|
if count, exists := seenNames[filename]; exists {
|
|
seenNames[filename]++
|
|
ext := filepath.Ext(filename)
|
|
name := strings.TrimSuffix(filename, ext)
|
|
filename = fmt.Sprintf("%s_%d%s", name, count+1, ext)
|
|
} else {
|
|
seenNames[filename] = 0
|
|
}
|
|
|
|
// Write to zip
|
|
w, err := zw.Create(filename)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if _, err := w.Write(att.data); err != nil {
|
|
continue
|
|
}
|
|
}
|
|
|
|
return len(attachments), nil
|
|
}
|
|
|
|
type attachment struct {
|
|
filename string
|
|
data []byte
|
|
}
|
|
|
|
// extractPartsRecursive recursively extracts attachments from multipart message
|
|
func extractPartsRecursive(mr *multipart.Reader, attachments *[]attachment) error {
|
|
for {
|
|
part, err := mr.NextPart()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check Content-Disposition
|
|
disposition := part.Header.Get("Content-Disposition")
|
|
if disposition == "" {
|
|
// Check if it's a nested multipart
|
|
contentType := part.Header.Get("Content-Type")
|
|
mediaType, params, err := mime.ParseMediaType(contentType)
|
|
if err == nil && strings.HasPrefix(mediaType, "multipart/") {
|
|
boundary := params["boundary"]
|
|
if boundary != "" {
|
|
nestedMr := multipart.NewReader(part, boundary)
|
|
extractPartsRecursive(nestedMr, attachments)
|
|
}
|
|
}
|
|
part.Close()
|
|
continue
|
|
}
|
|
|
|
// Check if it's an attachment or inline
|
|
if !strings.Contains(disposition, "attachment") && !strings.Contains(disposition, "inline") {
|
|
part.Close()
|
|
continue
|
|
}
|
|
|
|
// Get filename
|
|
filename := part.FileName()
|
|
if filename == "" {
|
|
part.Close()
|
|
continue
|
|
}
|
|
|
|
// Decode filename if needed
|
|
filename = DecodeHeaderValue(filename)
|
|
filename = SanitizeFilename(filename, 100)
|
|
|
|
// Read attachment data
|
|
data, err := io.ReadAll(part)
|
|
part.Close()
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if len(data) > 0 {
|
|
*attachments = append(*attachments, attachment{
|
|
filename: filename,
|
|
data: data,
|
|
})
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|