From eb63d8cbc1e148e79be511ce461f09528d6abf6d Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Wed, 25 Mar 2026 17:54:41 +0000 Subject: [PATCH] Rewrite from Python to Go for single-binary cross-platform builds Replaces imapdown.py with a multi-file Go implementation using github.com/emersion/go-imap/v2. All features preserved: SSL/STARTTLS, incremental UID-based downloads, attachment extraction to zip, modified UTF-7 folder name decoding, and full-mode safety checks. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 16 ++- CLAUDE.md | 105 ++++++++++---- Makefile | 32 +++++ README.md | 195 ++++++++++++++++++------- email.go | 227 ++++++++++++++++++++++++++++++ filename.go | 126 +++++++++++++++++ go.mod | 10 ++ imap.go | 297 ++++++++++++++++++++++++++++++++++++++ imapdown.py | 399 ---------------------------------------------------- main.go | 243 ++++++++++++++++++++++++++++++++ state.go | 65 +++++++++ 11 files changed, 1230 insertions(+), 485 deletions(-) create mode 100644 Makefile create mode 100644 email.go create mode 100644 filename.go create mode 100644 go.mod create mode 100644 imap.go delete mode 100755 imapdown.py create mode 100644 main.go create mode 100644 state.go diff --git a/.gitignore b/.gitignore index 67c3aff..281a724 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ -# Python -__pycache__/ -*.py[cod] -.venv/ -venv/ +# Go binaries +imapdown +imapdown-* +*.exe +go.sum -# Downloads -download/ +# Test downloads +*.eml +*.zip +.imapdown_state.json diff --git a/CLAUDE.md b/CLAUDE.md index 40bac25..23847f4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,42 +4,56 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -This is a single-file Python script (`imapdown.py`) that downloads all emails from an IMAP server into individual EML files, preserving the folder hierarchy. It uses only Python's standard library and has no external dependencies. +This project downloads all emails from an IMAP server into individual EML files, preserving the folder hierarchy. + +Built with Go as a single self-contained binary, fast and cross-platform. ## Development Environment -- Python 3.6+ required -- Virtual environment is set up in `.venv` - activate it before running: - ```bash - source .venv/bin/activate - ``` +- Go 1.21+ required +- Dependencies: `github.com/emersion/go-imap/v2` (auto-installed via `go mod tidy`) +- Build with: `make build` or `go build` +- Cross-compile with: `make build-all` -## Running the Script +## Running the Application + +First, build the binary: +```bash +make build +# Or cross-compile for all platforms: +make build-all +``` Basic usage (incremental mode - only downloads new emails): ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl ``` Full download (ignores previous state, requires empty target directory): ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --full +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl -full ``` Testing/debugging with limited emails: ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --limit 10 +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl -limit 10 ``` Custom storage directory: ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --output /path/to/backup +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl -output /path/to/backup ``` ## Architecture -### Single-File Design -The entire application is contained in `imapdown.py` (13KB). This is intentional - no modules or packages. +### Implementation Structure +The code is organized into multiple files for clarity: +- `main.go` - Entry point, CLI parsing, orchestration +- `imap.go` - IMAP connection and folder operations +- `email.go` - Email parsing and attachment extraction +- `state.go` - State file management (JSON) +- `filename.go` - Filename sanitization and Modified UTF-7 decoding +- `Makefile` - Build targets ### State Tracking - The script maintains a `.imapdown_state.json` file in each email account's download folder @@ -62,15 +76,15 @@ The entire application is contained in `imapdown.py` (13KB). This is intentional ### Key Implementation Details -**Modified UTF-7 Decoding**: IMAP folder names use modified UTF-7 encoding (see `decode_modified_utf7()` at line 39). This is not standard base64 - it uses `,` instead of `/` and has special `&` handling. +**Modified UTF-7 Decoding**: IMAP folder names use modified UTF-7 encoding. This is not standard base64 - it uses `,` instead of `/` and has special `&` handling. Implemented in `DecodeModifiedUTF7()` in `filename.go`. **Filename Sanitization**: Two-stage process: -- `sanitize_filename()`: Removes invalid filesystem characters, max 50 chars for subjects -- `sanitize_folder_path()`: Converts IMAP folder separators (`.` or `/`) to OS path separators +- `SanitizeFilename()`: Removes invalid filesystem characters, max 50 chars for subjects +- `SanitizeFolderPath()`: Converts IMAP folder separators (`.` or `/`) to OS path separators -**UID-Based Incremental Updates**: Uses IMAP UIDs (not sequence numbers) because UIDs are persistent. The search `UID {last_uid + 1}:*` fetches only new messages. Some servers return the highest UID even when searching for higher UIDs, so there's additional filtering at line 251. +**UID-Based Incremental Updates**: Uses IMAP UIDs (not sequence numbers) because UIDs are persistent. When `lastUID > 0`, searches for UIDs > lastUID. On first run (`lastUID == 0`), searches for all messages using an empty SearchCriteria. Some servers return the highest UID even when searching for higher UIDs, so there's additional filtering. -**Full Mode Safety**: `--full` mode checks if the download folder already contains `.eml` files and refuses to run (line 325). This prevents accidental duplicates. Users must delete the folder first. +**Full Mode Safety**: `-full` mode checks if the download folder already contains `.eml` files and refuses to run. This prevents accidental duplicates. Users must delete the folder first. **Attachment Handling**: - Walks message parts looking for `Content-Disposition: attachment` or `inline` @@ -79,22 +93,59 @@ The entire application is contained in `imapdown.py` (13KB). This is intentional ## Output Structure +Without `-output` flag (default: `./{email_address}`): ``` -{output_dir}/ # default: ./download -└── {email_address}/ # sanitized email address - ├── .imapdown_state.json - ├── INBOX/ - │ ├── 123_20240115_Meeting_notes.eml - │ └── 124_20240116_Report.zip - └── Sent/ - └── 456_20240114_RE_Question.eml +{email_address}/ # sanitized email address in current directory +├── .imapdown_state.json +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ └── 124_20240116_Report.zip +└── Sent/ + └── 456_20240114_RE_Question.eml +``` + +With `-output /path/to/backup`: +``` +/path/to/backup/ # specified output directory used directly +├── .imapdown_state.json +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ └── 124_20240116_Report.zip +└── Sent/ + └── 456_20240114_RE_Question.eml +``` + +## Building and Installing + +Build for current platform: +```bash +make build +``` + +Cross-compile for all platforms: +```bash +make build-all +# Produces: imapdown-linux-amd64, imapdown-linux-arm64, +# imapdown-darwin-amd64, imapdown-darwin-arm64, +# imapdown-windows-amd64.exe +``` + +Install to `$GOPATH/bin`: +```bash +make install +``` + +Clean build artifacts: +```bash +make clean ``` ## Testing No formal test suite exists. Manual testing approach: -- Use `--limit 10` to download a small batch for verification +- Use `-limit 10` to download a small batch for verification - Test SSL vs STARTTLS connections - Test incremental mode by running twice - Verify `.eml` files open correctly in email clients - Check that folders with special characters (non-ASCII) are handled correctly +- Test first run (no state file) to ensure all messages are downloaded diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a839510 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +.PHONY: build build-all clean test install + +# Binary name +BINARY=imapdown + +# Build flags for smaller binaries +LDFLAGS=-ldflags="-s -w" + +# Build for current platform +build: + go build $(LDFLAGS) -o $(BINARY) + +# Cross-compile for multiple platforms +build-all: + GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o $(BINARY)-linux-amd64 + GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o $(BINARY)-linux-arm64 + GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o $(BINARY)-darwin-amd64 + GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o $(BINARY)-darwin-arm64 + GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o $(BINARY)-windows-amd64.exe + +# Clean build artifacts +clean: + rm -f $(BINARY) + rm -f $(BINARY)-* + +# Run tests +test: + go test -v ./... + +# Install to $GOPATH/bin +install: + go install $(LDFLAGS) diff --git a/README.md b/README.md index 97d6566..8d023d0 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,78 @@ # IMAP Downloader -A simple Python script to download all emails from an IMAP server into individual EML files, preserving the folder structure. +Download all emails from an IMAP server into individual EML files, preserving the folder structure. + +Single self-contained binary written in Go - fast, cross-platform, no dependencies. + +## Quickstart + +```bash +# Build the binary +make build + +# Download all emails (creates a folder named after your email address) +./imapdown -server imap.gmail.com -email you@gmail.com -user you@gmail.com -password "your-password" -ssl + +# Subsequent runs only download new emails +./imapdown -server imap.gmail.com -email you@gmail.com -user you@gmail.com -password "your-password" -ssl +``` ## Features -- Downloads emails as standard `.eml` files +- Downloads emails as standard `.eml` files (open in any email client) - Preserves IMAP folder hierarchy locally - Extracts attachments into zip files alongside each email -- Supports SSL and STARTTLS connections +- Supports SSL/TLS and STARTTLS connections - Incremental updates using UID tracking (only download new emails) -- Multi-account support (separate folders per email address) +- Automatic state tracking - never re-downloads the same email - Configurable download limit for testing/debugging +- Works with Gmail, Outlook, FastMail, and any IMAP server ## Requirements -- Python 3.6+ -- No external dependencies (uses only standard library) +- Go 1.21+ (for building from source) +- OR use pre-compiled binaries (no requirements) ## Installation +Download from releases page (coming soon) or build from source: + ```bash -# Clone or download the script +# Clone repository git clone cd imapdown -# Create virtual environment (optional but recommended) -python3 -m venv .venv -source .venv/bin/activate +# Build the binary +make build + +# Or cross-compile for all platforms +make build-all ``` ## Usage ### Basic Usage -By default, the script only downloads new emails since the last run (incremental mode). On first run, it downloads everything. +By default, only new emails since the last run are downloaded (incremental mode). On first run, everything is downloaded. ```bash -# Download emails using SSL (most common) -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl +# Generic IMAP server with SSL (most common) +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -# Using STARTTLS -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --starttls +# Gmail (requires app-specific password if 2FA enabled) +./imapdown -server imap.gmail.com -email you@gmail.com -user you@gmail.com -password "app-password" -ssl -# Custom port -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --port 12993 +# Outlook/Office 365 +./imapdown -server outlook.office365.com -email you@outlook.com -user you@outlook.com -password "password" -ssl # Custom storage directory -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --store /path/to/backup +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -output /path/to/backup + +# Using STARTTLS instead of SSL +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -starttls + +# Custom port +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -port 12993 ``` ### Full Download @@ -54,14 +80,13 @@ By default, the script only downloads new emails since the last run (incremental To force a complete download of all emails (ignoring previous state): ```bash -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --full +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -full ``` -**Note:** As a safety measure, `--full` will refuse to run if the download folder already contains emails. This prevents accidental duplicates. To re-download everything, first delete the folder: +**Note:** As a safety measure, `-full` will refuse to run if the download folder already contains emails. This prevents accidental duplicates. To re-download everything, first delete the folder: ```bash -rm -rf download/me@example.com/ -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --full +rm -rf me@example.com/ ``` ### Debugging/Testing @@ -69,44 +94,55 @@ rm -rf download/me@example.com/ Limit the number of emails downloaded: ```bash -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --limit 10 +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -limit 10 ``` ## Command Line Arguments -| Argument | Required | Description | -|----------|----------|-------------| -| `--server` | Yes | IMAP server hostname | -| `--email` | Yes | Email address (used for folder organization) | -| `--user` | Yes | Username for authentication | -| `--password` | Yes | Password for authentication | -| `--ssl` | No | Use implicit SSL/TLS (default port 993) | -| `--starttls` | No | Use STARTTLS (default port 143) | -| `--port` | No | Custom port (overrides defaults) | -| `--limit` | No | Maximum number of emails to download | -| `--full` | No | Download all emails (default: only new since last run) | -| `--store` | No | Directory to store downloaded emails (default: ./download) | +| Argument | Flag | Required | Description | +|----------|------|----------|-------------| +| Server | `-server` | Yes | IMAP server hostname | +| Email | `-email` | Yes | Email address (used for folder organization) | +| User | `-user` | Yes | Username for authentication | +| Password | `-password` | Yes | Password for authentication | +| SSL | `-ssl` | No | Use implicit SSL/TLS (default port 993) | +| STARTTLS | `-starttls` | No | Use STARTTLS (default port 143) | +| Port | `-port` | No | Custom port (overrides defaults) | +| Limit | `-limit` | No | Maximum number of emails to download | +| Full | `-full` | No | Download all emails (default: only new since last run) | +| Output | `-output` | No | Directory to store downloaded emails (default: ./{email}) | -Note: `--ssl` and `--starttls` are mutually exclusive. +**Notes:** +- `-ssl` and `-starttls` are mutually exclusive ## Output Structure -The default output structure (when `--store` is not specified): - +**Without `-output` flag** (default: `./{email_address}/`): ``` -./download/ -├── user@example.com/ -│ ├── .imapdown_state.json # Tracks last downloaded UID per folder -│ ├── INBOX/ -│ │ ├── 123_20240115_Meeting_notes.eml -│ │ ├── 124_20240116_Report.eml -│ │ └── 124_20240116_Report.zip # Attachments (if any) -│ ├── Sent/ -│ │ └── 456_20240114_RE_Question.eml -│ └── Archive/ -│ └── 789_20240101_Old_email.eml -└── another@example.com/ - └── ... +./user@example.com/ +├── .imapdown_state.json # Tracks last downloaded UID per folder +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ ├── 124_20240116_Report.eml +│ └── 124_20240116_Report.zip # Attachments (if any) +├── Sent/ +│ └── 456_20240114_RE_Question.eml +└── Archive/ + └── 789_20240101_Old_email.eml +``` + +**With `-output /path/to/backup`** (emails go directly into specified directory): +``` +/path/to/backup/ +├── .imapdown_state.json +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ ├── 124_20240116_Report.eml +│ └── 124_20240116_Report.zip +├── Sent/ +│ └── 456_20240114_RE_Question.eml +└── Archive/ + └── 789_20240101_Old_email.eml ``` ### File Naming @@ -123,7 +159,7 @@ When an email contains attachments, they are extracted and saved in a zip file w ## State Tracking -The script maintains a `.imapdown_state.json` file in each email account's folder. This file tracks the highest downloaded UID for each IMAP folder, enabling efficient incremental updates with `--update`. +A `.imapdown_state.json` file is maintained in the download folder. This file tracks the highest downloaded UID for each IMAP folder, enabling efficient incremental updates. Example state file: ```json @@ -134,6 +170,61 @@ Example state file: } ``` +## Building from Source + +```bash +# Build for current platform +make build + +# Cross-compile for all platforms +make build-all +# Produces: imapdown-linux-amd64, imapdown-linux-arm64, +# imapdown-darwin-amd64, imapdown-darwin-arm64, +# imapdown-windows-amd64.exe + +# Install to $GOPATH/bin +make install + +# Clean build artifacts +make clean + +# Or use Go directly +go build -ldflags="-s -w" -o imapdown +``` + +## Troubleshooting + +### Gmail Authentication + +Gmail requires an app-specific password if you have 2-factor authentication enabled: +1. Go to Google Account Settings → Security → 2-Step Verification → App passwords +2. Generate a new app password for "Mail" +3. Use this password instead of your regular password + +### Connection Issues + +- **SSL errors**: Make sure you're using the correct port (993 for SSL, 143 for STARTTLS) +- **Authentication failed**: Verify username and password are correct +- **Timeout**: Some servers require STARTTLS instead of SSL - try `-starttls` flag + +### First Run Not Downloading + +If the first run doesn't download anything: +1. Check the folder actually contains emails on the server +2. Try with `-limit 10` to test with a small batch first +3. Verify your credentials work by logging into webmail + +### Re-downloading Everything + +To start fresh and re-download all emails: +```bash +# Delete the email folder (and state file) +rm -rf ./your-email@example.com/ + +# Run with -full flag +./imapdown -server imap.example.com -email your-email@example.com -user your-email@example.com -password "password" -ssl -full +``` + ## License MIT diff --git a/email.go b/email.go new file mode 100644 index 0000000..1f1d18a --- /dev/null +++ b/email.go @@ -0,0 +1,227 @@ +package main + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "mime" + "mime/multipart" + "net/mail" + "os" + "path/filepath" + "strings" + "time" +) + +// EmailMessage represents a parsed email message +type EmailMessage struct { + UID uint32 + Date time.Time + Subject string + Raw []byte + Parsed *mail.Message +} + +// ParseEmailMessage parses raw RFC822 email data +func ParseEmailMessage(raw []byte, uid uint32) (*EmailMessage, error) { + msg, err := mail.ReadMessage(bytes.NewReader(raw)) + if err != nil { + return nil, fmt.Errorf("failed to parse email: %w", err) + } + + return &EmailMessage{ + UID: uid, + Date: GetMessageDate(msg), + Subject: GetMessageSubject(msg), + Raw: raw, + Parsed: msg, + }, nil +} + +// GetMessageDate extracts the date from an email message +// Falls back to current time if date cannot be parsed +func GetMessageDate(msg *mail.Message) time.Time { + dateStr := msg.Header.Get("Date") + if dateStr == "" { + return time.Now() + } + + parsed, err := mail.ParseDate(dateStr) + if err != nil { + return time.Now() + } + + return parsed +} + +// GetMessageSubject extracts and decodes the subject from an email message +// Returns "no_subject" if subject is empty +func GetMessageSubject(msg *mail.Message) string { + subject := msg.Header.Get("Subject") + if subject == "" { + return "no_subject" + } + + // Decode RFC 2047 encoded-words + decoded := DecodeHeaderValue(subject) + if decoded == "" { + return "no_subject" + } + + return decoded +} + +// DecodeHeaderValue decodes RFC 2047 encoded-words in headers +func DecodeHeaderValue(encoded string) string { + dec := new(mime.WordDecoder) + decoded, err := dec.DecodeHeader(encoded) + if err != nil { + return encoded + } + return decoded +} + +// ExtractAttachments extracts attachments from an email and saves them to a zip file +// Returns the number of attachments extracted +func ExtractAttachments(msg *mail.Message, emlPath string) (int, error) { + mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type")) + if err != nil { + // Not a multipart message or invalid content-type + return 0, nil + } + + if !strings.HasPrefix(mediaType, "multipart/") { + // Not a multipart message + return 0, nil + } + + attachments := make([]attachment, 0) + + // Parse multipart message + boundary := params["boundary"] + if boundary == "" { + return 0, nil + } + + mr := multipart.NewReader(msg.Body, boundary) + if err := extractPartsRecursive(mr, &attachments); err != nil { + // Ignore errors in attachment extraction + if len(attachments) == 0 { + return 0, nil + } + } + + if len(attachments) == 0 { + return 0, nil + } + + // Create zip file + zipPath := strings.TrimSuffix(emlPath, filepath.Ext(emlPath)) + ".zip" + zipFile, err := os.Create(zipPath) + if err != nil { + return 0, fmt.Errorf("failed to create zip file: %w", err) + } + defer zipFile.Close() + + zw := zip.NewWriter(zipFile) + defer zw.Close() + + // Track duplicate filenames + seenNames := make(map[string]int) + + for _, att := range attachments { + filename := att.filename + + // Handle duplicate names + if count, exists := seenNames[filename]; exists { + seenNames[filename]++ + ext := filepath.Ext(filename) + name := strings.TrimSuffix(filename, ext) + filename = fmt.Sprintf("%s_%d%s", name, count+1, ext) + } else { + seenNames[filename] = 0 + } + + // Write to zip + w, err := zw.Create(filename) + if err != nil { + continue + } + + if _, err := w.Write(att.data); err != nil { + continue + } + } + + return len(attachments), nil +} + +type attachment struct { + filename string + data []byte +} + +// extractPartsRecursive recursively extracts attachments from multipart message +func extractPartsRecursive(mr *multipart.Reader, attachments *[]attachment) error { + for { + part, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return err + } + + // Check Content-Disposition + disposition := part.Header.Get("Content-Disposition") + if disposition == "" { + // Check if it's a nested multipart + contentType := part.Header.Get("Content-Type") + mediaType, params, err := mime.ParseMediaType(contentType) + if err == nil && strings.HasPrefix(mediaType, "multipart/") { + boundary := params["boundary"] + if boundary != "" { + nestedMr := multipart.NewReader(part, boundary) + extractPartsRecursive(nestedMr, attachments) + } + } + part.Close() + continue + } + + // Check if it's an attachment or inline + if !strings.Contains(disposition, "attachment") && !strings.Contains(disposition, "inline") { + part.Close() + continue + } + + // Get filename + filename := part.FileName() + if filename == "" { + part.Close() + continue + } + + // Decode filename if needed + filename = DecodeHeaderValue(filename) + filename = SanitizeFilename(filename, 100) + + // Read attachment data + data, err := io.ReadAll(part) + part.Close() + + if err != nil { + continue + } + + if len(data) > 0 { + *attachments = append(*attachments, attachment{ + filename: filename, + data: data, + }) + } + } + + return nil +} diff --git a/filename.go b/filename.go new file mode 100644 index 0000000..3e45cde --- /dev/null +++ b/filename.go @@ -0,0 +1,126 @@ +package main + +import ( + "encoding/base64" + "encoding/binary" + "path/filepath" + "regexp" + "strings" + "unicode/utf16" +) + +// sanitizeFilenameRegex matches invalid filesystem characters +var sanitizeFilenameRegex = regexp.MustCompile(`[<>:"/\\|?*\x00-\x1f]`) + +// SanitizeFilename removes invalid filesystem characters and truncates to maxLength +func SanitizeFilename(name string, maxLength int) string { + if name == "" { + return "untitled" + } + + // Replace invalid characters with underscore + name = sanitizeFilenameRegex.ReplaceAllString(name, "_") + + // Trim leading/trailing dots and spaces + name = strings.Trim(name, ". ") + + // Truncate to max length + if len(name) > maxLength { + name = name[:maxLength] + } + + // Trim again after truncation + name = strings.Trim(name, ". ") + + if name == "" { + return "untitled" + } + + return name +} + +// SanitizeFolderPath converts IMAP folder paths to filesystem paths +func SanitizeFolderPath(folderName string) string { + // Replace both / and . with OS path separator + normalized := strings.ReplaceAll(folderName, "/", string(filepath.Separator)) + normalized = strings.ReplaceAll(normalized, ".", string(filepath.Separator)) + + // Split and sanitize each part + parts := strings.Split(normalized, string(filepath.Separator)) + sanitized := make([]string, 0, len(parts)) + + for _, part := range parts { + if part != "" { + sanitized = append(sanitized, SanitizeFilename(part, 100)) + } + } + + if len(sanitized) == 0 { + return "INBOX" + } + + return filepath.Join(sanitized...) +} + +// DecodeModifiedUTF7 decodes IMAP modified UTF-7 folder names +// Modified UTF-7 uses & as escape character, &- for literal &, +// and uses , instead of / in base64 encoding +func DecodeModifiedUTF7(s string) (string, error) { + var result strings.Builder + i := 0 + + for i < len(s) { + if s[i] == '&' { + // Check for &- (literal ampersand) + if i+1 < len(s) && s[i+1] == '-' { + result.WriteByte('&') + i += 2 + continue + } + + // Find the closing - + end := strings.IndexByte(s[i+1:], '-') + if end == -1 { + // No closing -, just append rest of string + result.WriteString(s[i:]) + break + } + end += i + 1 // Adjust to absolute position + + encoded := s[i+1 : end] + if encoded != "" { + // Replace , with / for standard base64 + encoded = strings.ReplaceAll(encoded, ",", "/") + + // Add padding to make length divisible by 4 + padding := (4 - len(encoded)%4) % 4 + encoded += strings.Repeat("=", padding) + + // Decode base64 + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + // On error, just append the original string + result.WriteString(s[i : end+1]) + i = end + 1 + continue + } + + // Convert UTF-16BE bytes to UTF-16 runes, then to string + utf16Runes := make([]uint16, len(decoded)/2) + for j := 0; j < len(decoded); j += 2 { + utf16Runes[j/2] = binary.BigEndian.Uint16(decoded[j : j+2]) + } + + result.WriteString(string(utf16.Decode(utf16Runes))) + } + + i = end + 1 + } else { + result.WriteByte(s[i]) + i++ + } + } + + return result.String(), nil +} + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fc75cfe --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module github.com/yourusername/imapdown + +go 1.22.2 + +require github.com/emersion/go-imap/v2 v2.0.0-beta.7 + +require ( + github.com/emersion/go-message v0.18.1 // indirect + github.com/emersion/go-sasl v0.0.0-20231106173351-e73c9f7bad43 // indirect +) diff --git a/imap.go b/imap.go new file mode 100644 index 0000000..69079d8 --- /dev/null +++ b/imap.go @@ -0,0 +1,297 @@ +package main + +import ( + "crypto/tls" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/emersion/go-imap/v2" + "github.com/emersion/go-imap/v2/imapclient" +) + +// IMAPClient wraps the IMAP client connection +type IMAPClient struct { + client *imapclient.Client +} + +// ConnectIMAP establishes an IMAP connection with the specified security mode +func ConnectIMAP(config *Config) (*IMAPClient, error) { + var client *imapclient.Client + var err error + + addr := fmt.Sprintf("%s:%d", config.Server, config.Port) + + if config.UseSSL { + fmt.Printf("Connecting to %s with SSL...\n", addr) + tlsConfig := &tls.Config{ + ServerName: config.Server, + } + client, err = imapclient.DialTLS(addr, &imapclient.Options{ + TLSConfig: tlsConfig, + }) + } else if config.UseSTARTTLS { + fmt.Printf("Connecting to %s...\n", addr) + fmt.Println("Upgrading to TLS with STARTTLS...") + tlsConfig := &tls.Config{ + ServerName: config.Server, + } + client, err = imapclient.DialStartTLS(addr, &imapclient.Options{ + TLSConfig: tlsConfig, + }) + } else { + fmt.Printf("Connecting to %s (plain)...\n", addr) + client, err = imapclient.DialInsecure(addr, nil) + } + + if err != nil { + return nil, fmt.Errorf("connection failed: %w", err) + } + + return &IMAPClient{client: client}, nil +} + +// Login authenticates with the IMAP server +func (c *IMAPClient) Login(username, password string) error { + if err := c.client.Login(username, password).Wait(); err != nil { + return fmt.Errorf("authentication failed: %w", err) + } + fmt.Println("Logged in successfully") + return nil +} + +// ListFolders returns all mailbox names, decoded from modified UTF-7 +func (c *IMAPClient) ListFolders() ([]string, error) { + listCmd := c.client.List("", "*", nil) + + folders := make([]string, 0) + for { + mbox := listCmd.Next() + if mbox == nil { + break + } + + // Decode modified UTF-7 folder name + decoded, err := DecodeModifiedUTF7(mbox.Mailbox) + if err != nil { + // On error, use original name + decoded = mbox.Mailbox + } + folders = append(folders, decoded) + } + + if err := listCmd.Close(); err != nil { + return nil, fmt.Errorf("failed to list folders: %w", err) + } + + return folders, nil +} + +// DownloadFolder downloads messages from a folder +// Returns (downloaded_count, highest_uid, error) +func (c *IMAPClient) DownloadFolder(folderName, baseDir string, limit *int, totalSoFar int, updateMode bool, lastUID uint32) (int, uint32, error) { + localPath := filepath.Join(baseDir, SanitizeFolderPath(folderName)) + if err := os.MkdirAll(localPath, 0755); err != nil { + return 0, lastUID, fmt.Errorf("failed to create directory: %w", err) + } + + // Select folder in read-only mode + selectCmd := c.client.Select(folderName, &imap.SelectOptions{ReadOnly: true}) + _, err := selectCmd.Wait() + if err != nil { + fmt.Printf(" Could not select folder: %s\n", folderName) + return 0, lastUID, err + } + + // Search for messages + var searchCriteria imap.SearchCriteria + + // Always set a UID range - empty SearchCriteria doesn't work + uidSet := imap.UIDSet{} + if updateMode && lastUID > 0 { + // Incremental update: search for UIDs > lastUID + uidSet.AddRange(imap.UID(lastUID+1), imap.UID(0xFFFFFFFF)) // 0xFFFFFFFF means * + } else { + // Full download or first run: search all UIDs from 1 to * + uidSet.AddRange(imap.UID(1), imap.UID(0xFFFFFFFF)) + } + searchCriteria.UID = []imap.UIDSet{uidSet} + + searchCmd := c.client.UIDSearch(&searchCriteria, nil) + searchData, err := searchCmd.Wait() + if err != nil { + fmt.Printf(" Could not search folder: %s\n", folderName) + return 0, lastUID, err + } + + uidList := make([]uint32, 0) + for _, uid := range searchData.AllUIDs() { + // Filter out UIDs <= lastUID (server quirk) + if !updateMode || lastUID == 0 || uint32(uid) > lastUID { + uidList = append(uidList, uint32(uid)) + } + } + + if len(uidList) == 0 { + fmt.Printf(" %s: no new messages\n", folderName) + return 0, lastUID, nil + } + + // Apply limit + if limit != nil { + remaining := *limit - totalSoFar + if remaining <= 0 { + return 0, lastUID, nil + } + if len(uidList) > remaining { + uidList = uidList[:remaining] + } + } + + fmt.Printf(" %s: %d messages to download\n", folderName, len(uidList)) + + downloaded := 0 + highestUID := lastUID + + for _, uid := range uidList { + msg, err := c.FetchMessage(uid) + if err != nil { + fmt.Printf(" Error downloading UID %d: %v\n", uid, err) + continue + } + + // Build filename + dateStr := msg.Date.Format("20060102_150405") + subject := SanitizeFilename(msg.Subject, 50) + filename := fmt.Sprintf("%d_%s_%s.eml", uid, dateStr, subject) + filepath := filepath.Join(localPath, filename) + + // Ensure unique filename + filepath = getUniqueFilepath(filepath) + + // Write EML file + if err := os.WriteFile(filepath, msg.Raw, 0644); err != nil { + fmt.Printf(" Error writing UID %d: %v\n", uid, err) + continue + } + + // Extract attachments + ExtractAttachments(msg.Parsed, filepath) + + downloaded++ + + if uid > highestUID { + highestUID = uid + } + } + + return downloaded, highestUID, nil +} + +// FetchMessage retrieves a single message by UID +func (c *IMAPClient) FetchMessage(uid uint32) (*EmailMessage, error) { + uidSet := imap.UIDSet{} + uidSet.AddNum(imap.UID(uid)) + + fetchCmd := c.client.Fetch(uidSet, &imap.FetchOptions{ + BodySection: []*imap.FetchItemBodySection{{}}, + }) + + msg := fetchCmd.Next() + if msg == nil { + fetchCmd.Close() + return nil, fmt.Errorf("message not found") + } + + // Iterate through fetch items to find body section + var rawEmail []byte + for { + item := msg.Next() + if item == nil { + break + } + + switch data := item.(type) { + case imapclient.FetchItemDataBodySection: + // Check if this is the full message (empty Part means full body) + if len(data.Section.Part) == 0 { + rawBytes, err := io.ReadAll(data.Literal) + if err != nil { + fetchCmd.Close() + return nil, fmt.Errorf("failed to read message body: %w", err) + } + rawEmail = rawBytes + } + } + } + + fetchCmd.Close() + + if rawEmail == nil { + return nil, fmt.Errorf("failed to retrieve message body") + } + + return ParseEmailMessage(rawEmail, uid) +} + +// Logout closes the IMAP connection +func (c *IMAPClient) Logout() error { + if c.client != nil { + return c.client.Logout().Wait() + } + return nil +} + +// getUniqueFilepath returns a unique filepath by appending _N if needed +func getUniqueFilepath(basePath string) string { + if _, err := os.Stat(basePath); os.IsNotExist(err) { + return basePath + } + + counter := 1 + ext := filepath.Ext(basePath) + name := strings.TrimSuffix(basePath, ext) + + for { + newPath := fmt.Sprintf("%s_%d%s", name, counter, ext) + if _, err := os.Stat(newPath); os.IsNotExist(err) { + return newPath + } + counter++ + } +} + +// parseFolderList parses IMAP LIST response (legacy, kept for reference) +var folderListPattern = regexp.MustCompile(`\((?P.*?)\) "(?P.*)" (?P.*)`) + +func parseFolderList(response []string) []string { + folders := make([]string, 0) + + for _, item := range response { + match := folderListPattern.FindStringSubmatch(item) + if match == nil { + continue + } + + // Extract name (index 3) + name := match[3] + + // Remove surrounding quotes if present + if len(name) >= 2 && name[0] == '"' && name[len(name)-1] == '"' { + name = name[1 : len(name)-1] + } + + // Decode modified UTF-7 + decoded, err := DecodeModifiedUTF7(name) + if err != nil { + decoded = name + } + + folders = append(folders, decoded) + } + + return folders +} diff --git a/imapdown.py b/imapdown.py deleted file mode 100755 index 465333a..0000000 --- a/imapdown.py +++ /dev/null @@ -1,399 +0,0 @@ -#!/usr/bin/env python3 -"""Simple IMAP email downloader - downloads all emails to EML files.""" - -import argparse -import email -import email.utils -import imaplib -import io -import json -import os -import re -import sys -import zipfile -from datetime import datetime - - -def parse_args(): - """Parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Download all emails from an IMAP server to EML files" - ) - - parser.add_argument("--server", required=True, help="IMAP server hostname") - parser.add_argument("--email", required=True, help="Email address") - parser.add_argument("--user", required=True, help="Username for authentication") - parser.add_argument("--password", required=True, help="Password for authentication") - - security = parser.add_mutually_exclusive_group() - security.add_argument("--ssl", action="store_true", help="Use implicit SSL/TLS (default port 993)") - security.add_argument("--starttls", action="store_true", help="Use STARTTLS (default port 143)") - - parser.add_argument("--port", type=int, help="Custom port (default: 993 for SSL, 143 otherwise)") - parser.add_argument("--limit", type=int, help="Limit number of emails to download (for debugging)") - parser.add_argument("--full", action="store_true", help="Download all emails (default: only new emails since last run)") - parser.add_argument("--output", type=str, help="Directory to store downloaded emails (default: ./download)") - - return parser.parse_args() - - -def decode_modified_utf7(s): - """Decode IMAP modified UTF-7 folder names.""" - result = [] - i = 0 - while i < len(s): - if s[i] == '&': - if i + 1 < len(s) and s[i + 1] == '-': - result.append('&') - i += 2 - else: - end = s.find('-', i + 1) - if end == -1: - result.append(s[i:]) - break - encoded = s[i + 1:end] - if encoded: - encoded = encoded.replace(',', '/') - padding = (4 - len(encoded) % 4) % 4 - encoded += '=' * padding - try: - import base64 - decoded = base64.b64decode(encoded).decode('utf-16-be') - result.append(decoded) - except Exception: - result.append(s[i:end + 1]) - i = end + 1 - else: - result.append(s[i]) - i += 1 - return ''.join(result) - - -def parse_folder_list(response): - """Parse IMAP LIST response to extract folder names.""" - folders = [] - pattern = re.compile(r'\((?P.*?)\) "(?P.*)" (?P.*)') - - for item in response: - if isinstance(item, bytes): - item = item.decode('utf-8', errors='replace') - - match = pattern.match(item) - if match: - name = match.group('name') - if name.startswith('"') and name.endswith('"'): - name = name[1:-1] - name = decode_modified_utf7(name) - folders.append(name) - - return folders - - -def sanitize_filename(name, max_length=50): - """Sanitize a string for use as a filename.""" - if not name: - return "untitled" - name = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '_', name) - name = name.strip('. ') - name = name[:max_length] - name = name.strip('. ') - return name or "untitled" - - -def sanitize_folder_path(folder_name): - """Sanitize folder path for filesystem use.""" - parts = folder_name.replace('/', os.sep).replace('.', os.sep).split(os.sep) - sanitized = [sanitize_filename(p, max_length=100) for p in parts if p] - return os.path.join(*sanitized) if sanitized else "INBOX" - - -def get_message_date(msg): - """Extract date from email message.""" - date_str = msg.get('Date') - if date_str: - try: - parsed = email.utils.parsedate_to_datetime(date_str) - return parsed.strftime('%Y%m%d_%H%M%S') - except Exception: - pass - return datetime.now().strftime('%Y%m%d_%H%M%S') - - -def get_message_subject(msg): - """Extract and decode subject from email message.""" - subject = msg.get('Subject', '') - if not subject: - return 'no_subject' - - try: - decoded_parts = email.header.decode_header(subject) - decoded = [] - for part, charset in decoded_parts: - if isinstance(part, bytes): - charset = charset or 'utf-8' - try: - decoded.append(part.decode(charset, errors='replace')) - except Exception: - decoded.append(part.decode('utf-8', errors='replace')) - else: - decoded.append(part) - return ''.join(decoded) - except Exception: - return str(subject) - - -def extract_attachments(msg, eml_filepath): - """Extract attachments from email and save as zip file.""" - attachments = [] - - for part in msg.walk(): - content_disposition = part.get('Content-Disposition', '') - if 'attachment' in content_disposition or 'inline' in content_disposition: - filename = part.get_filename() - if filename: - try: - decoded_parts = email.header.decode_header(filename) - decoded_filename = [] - for data, charset in decoded_parts: - if isinstance(data, bytes): - charset = charset or 'utf-8' - decoded_filename.append(data.decode(charset, errors='replace')) - else: - decoded_filename.append(data) - filename = ''.join(decoded_filename) - except Exception: - pass - - payload = part.get_payload(decode=True) - if payload: - attachments.append((sanitize_filename(filename, max_length=100), payload)) - - if attachments: - zip_path = os.path.splitext(eml_filepath)[0] + '.zip' - with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf: - seen_names = {} - for filename, data in attachments: - if filename in seen_names: - seen_names[filename] += 1 - name, ext = os.path.splitext(filename) - filename = f"{name}_{seen_names[filename]}{ext}" - else: - seen_names[filename] = 0 - zf.writestr(filename, data) - return len(attachments) - return 0 - - -STATE_FILE = '.imapdown_state.json' - - -def load_state(base_dir): - """Load the state file tracking last downloaded emails.""" - state_path = os.path.join(base_dir, STATE_FILE) - if os.path.exists(state_path): - try: - with open(state_path, 'r') as f: - return json.load(f) - except Exception: - pass - return {} - - -def save_state(base_dir, state): - """Save the state file.""" - state_path = os.path.join(base_dir, STATE_FILE) - with open(state_path, 'w') as f: - json.dump(state, f, indent=2) - - -def connect_imap(server, port, use_ssl, use_starttls): - """Connect to IMAP server with appropriate security.""" - if use_ssl: - port = port or 993 - print(f"Connecting to {server}:{port} with SSL...") - return imaplib.IMAP4_SSL(server, port) - else: - port = port or 143 - print(f"Connecting to {server}:{port}...") - conn = imaplib.IMAP4(server, port) - if use_starttls: - print("Upgrading to TLS with STARTTLS...") - conn.starttls() - return conn - - -def download_folder(conn, folder_name, base_dir, limit=None, total_so_far=0, update_mode=False, last_uid=None): - """Download all emails from a folder. Returns (downloaded_count, highest_uid).""" - local_path = os.path.join(base_dir, sanitize_folder_path(folder_name)) - os.makedirs(local_path, exist_ok=True) - - try: - status, _ = conn.select(f'"{folder_name}"', readonly=True) - if status != 'OK': - print(f" Could not select folder: {folder_name}") - return 0, last_uid - except Exception as e: - print(f" Error selecting folder {folder_name}: {e}") - return 0, last_uid - - if update_mode and last_uid is not None: - status, data = conn.uid('SEARCH', None, f'UID {last_uid + 1}:*') - else: - status, data = conn.uid('SEARCH', None, 'ALL') - - if status != 'OK': - print(f" Could not search folder: {folder_name}") - return 0, last_uid - - uid_list = data[0].split() - - # Filter out UIDs <= last_uid (some servers return highest UID even when searching for higher) - if update_mode and last_uid is not None: - uid_list = [uid for uid in uid_list if int(uid) > last_uid] - - if not uid_list: - print(f" {folder_name}: no new messages") - return 0, last_uid - - if limit is not None: - remaining = limit - total_so_far - if remaining <= 0: - return 0, last_uid - uid_list = uid_list[:remaining] - - print(f" {folder_name}: {len(uid_list)} messages to download") - downloaded = 0 - highest_uid = last_uid - - for uid in uid_list: - try: - uid_int = int(uid) - status, data = conn.uid('FETCH', uid, '(RFC822)') - if status != 'OK': - continue - - raw_email = None - for part in data: - if isinstance(part, tuple): - raw_email = part[1] - break - - if raw_email is None: - continue - - msg = email.message_from_bytes(raw_email) - date_str = get_message_date(msg) - subject = sanitize_filename(get_message_subject(msg)) - - filename = f"{uid_int}_{date_str}_{subject}.eml" - filepath = os.path.join(local_path, filename) - - counter = 1 - base_filepath = filepath - while os.path.exists(filepath): - name, ext = os.path.splitext(base_filepath) - filepath = f"{name}_{counter}{ext}" - counter += 1 - - with open(filepath, 'wb') as f: - f.write(raw_email) - - extract_attachments(msg, filepath) - downloaded += 1 - - if highest_uid is None or uid_int > highest_uid: - highest_uid = uid_int - - except Exception as e: - print(f" Error downloading UID {uid}: {e}") - - return downloaded, highest_uid - - -def main(): - args = parse_args() - - email_folder = sanitize_filename(args.email, max_length=100) - if args.output: - base_dir = os.path.join(args.output, email_folder) - else: - base_dir = os.path.join(os.getcwd(), 'download', email_folder) - os.makedirs(base_dir, exist_ok=True) - - if args.full: - has_emails = False - for root, dirs, files in os.walk(base_dir): - if any(f.endswith('.eml') for f in files): - has_emails = True - break - if has_emails: - print(f"Error: --full specified but {base_dir} already contains emails.", file=sys.stderr) - print("Delete the folder first to do a full re-download, or run without --full for incremental update.", file=sys.stderr) - sys.exit(1) - - try: - conn = connect_imap(args.server, args.port, args.ssl, args.starttls) - except Exception as e: - print(f"Connection failed: {e}", file=sys.stderr) - sys.exit(1) - - try: - status, _ = conn.login(args.user, args.password) - if status != 'OK': - print("Authentication failed", file=sys.stderr) - sys.exit(1) - print("Logged in successfully") - except Exception as e: - print(f"Authentication failed: {e}", file=sys.stderr) - sys.exit(1) - - try: - status, folder_data = conn.list() - if status != 'OK': - print("Could not list folders", file=sys.stderr) - sys.exit(1) - - folders = parse_folder_list(folder_data) - print(f"Found {len(folders)} folders") - - update_mode = not args.full - state = load_state(base_dir) if update_mode else {} - if args.full: - print("Full download mode: downloading all emails") - else: - print("Incremental mode: only downloading new emails (use --full to download all)") - - total_downloaded = 0 - for folder in folders: - last_uid = None - if update_mode and folder in state: - try: - last_uid = int(state[folder]) - except (ValueError, TypeError): - pass - - downloaded, highest_uid = download_folder( - conn, folder, base_dir, args.limit, total_downloaded, - update_mode=update_mode, last_uid=last_uid - ) - total_downloaded += downloaded - - if highest_uid is not None: - state[folder] = highest_uid - - if args.limit and total_downloaded >= args.limit: - print(f" Reached limit of {args.limit} emails") - break - - save_state(base_dir, state) - print(f"\nDownloaded {total_downloaded} emails to {base_dir}") - - finally: - try: - conn.logout() - except Exception: - pass - - -if __name__ == '__main__': - main() diff --git a/main.go b/main.go new file mode 100644 index 0000000..08dcb13 --- /dev/null +++ b/main.go @@ -0,0 +1,243 @@ +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" +) + +// Config holds all CLI arguments +type Config struct { + Server string + Email string + User string + Password string + UseSSL bool + UseSTARTTLS bool + Port int + Limit *int + Full bool + Output string +} + +func main() { + config := parseArgs() + + baseDir := setupBaseDirectory(config) + + if config.Full { + if err := checkFullModeSafety(baseDir); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + } + + // Connect to IMAP server + client, err := ConnectIMAP(config) + if err != nil { + fmt.Fprintf(os.Stderr, "Connection failed: %v\n", err) + os.Exit(1) + } + defer client.Logout() + + // Login + if err := client.Login(config.User, config.Password); err != nil { + fmt.Fprintf(os.Stderr, "Authentication failed: %v\n", err) + os.Exit(1) + } + + // List folders + folders, err := client.ListFolders() + if err != nil { + fmt.Fprintf(os.Stderr, "Could not list folders: %v\n", err) + os.Exit(1) + } + fmt.Printf("Found %d folders\n", len(folders)) + + // Load state + updateMode := !config.Full + state, err := LoadState(baseDir, config.Full) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: could not load state: %v\n", err) + state = make(State) + } + + if config.Full { + fmt.Println("Full download mode: downloading all emails") + } else { + fmt.Println("Incremental mode: only downloading new emails (use --full to download all)") + } + + // Download folders + stats := downloadAllFolders(client, folders, baseDir, config, updateMode, state) + + // Save state + if err := SaveState(baseDir, state); err != nil { + fmt.Fprintf(os.Stderr, "Warning: could not save state: %v\n", err) + } + + fmt.Printf("\nDownloaded %d emails to %s\n", stats.TotalDownloaded, baseDir) +} + +// parseArgs parses and validates command line arguments +func parseArgs() *Config { + config := &Config{} + + flag.StringVar(&config.Server, "server", "", "IMAP server hostname (required)") + flag.StringVar(&config.Email, "email", "", "Email address (required)") + flag.StringVar(&config.User, "user", "", "Username for authentication (required)") + flag.StringVar(&config.Password, "password", "", "Password for authentication (required)") + flag.BoolVar(&config.UseSSL, "ssl", false, "Use implicit SSL/TLS (default port 993)") + flag.BoolVar(&config.UseSTARTTLS, "starttls", false, "Use STARTTLS (default port 143)") + flag.IntVar(&config.Port, "port", 0, "Custom port (default: 993 for SSL, 143 otherwise)") + flag.BoolVar(&config.Full, "full", false, "Download all emails (default: only new emails since last run)") + flag.StringVar(&config.Output, "output", "", "Directory to store downloaded emails (default: ./{email})") + + var limit int + flag.IntVar(&limit, "limit", 0, "Limit number of emails to download (for debugging)") + + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Download all emails from an IMAP server to EML files\n\n") + fmt.Fprintf(os.Stderr, "Options:\n") + flag.PrintDefaults() + } + + flag.Parse() + + // Validate required arguments + if config.Server == "" { + fmt.Fprintf(os.Stderr, "Error: --server is required\n") + flag.Usage() + os.Exit(1) + } + if config.Email == "" { + fmt.Fprintf(os.Stderr, "Error: --email is required\n") + flag.Usage() + os.Exit(1) + } + if config.User == "" { + fmt.Fprintf(os.Stderr, "Error: --user is required\n") + flag.Usage() + os.Exit(1) + } + if config.Password == "" { + fmt.Fprintf(os.Stderr, "Error: --password is required\n") + flag.Usage() + os.Exit(1) + } + + // Check mutually exclusive flags + if config.UseSSL && config.UseSTARTTLS { + fmt.Fprintf(os.Stderr, "Error: --ssl and --starttls are mutually exclusive\n") + os.Exit(1) + } + + // Set default port + if config.Port == 0 { + if config.UseSSL { + config.Port = 993 + } else { + config.Port = 143 + } + } + + // Set limit pointer + if limit > 0 { + config.Limit = &limit + } + + return config +} + +// setupBaseDirectory creates and returns the base directory for downloads +func setupBaseDirectory(config *Config) string { + var baseDir string + if config.Output != "" { + // Use specified output directory directly + baseDir = config.Output + } else { + // Create email folder in current directory + emailFolder := SanitizeFilename(config.Email, 100) + cwd, _ := os.Getwd() + baseDir = filepath.Join(cwd, emailFolder) + } + + if err := os.MkdirAll(baseDir, 0755); err != nil { + fmt.Fprintf(os.Stderr, "Error creating directory: %v\n", err) + os.Exit(1) + } + + return baseDir +} + +// checkFullModeSafety verifies no existing .eml files in full mode +func checkFullModeSafety(baseDir string) error { + hasEmails := false + + err := filepath.Walk(baseDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && filepath.Ext(path) == ".eml" { + hasEmails = true + return filepath.SkipAll + } + return nil + }) + + if err != nil { + return err + } + + if hasEmails { + return fmt.Errorf("--full specified but %s already contains emails.\nDelete the folder first to do a full re-download, or run without --full for incremental update.", baseDir) + } + + return nil +} + +// DownloadStats tracks download statistics +type DownloadStats struct { + TotalDownloaded int + FoldersProcessed int +} + +// downloadAllFolders orchestrates the download of all folders +func downloadAllFolders(client *IMAPClient, folders []string, baseDir string, config *Config, updateMode bool, state State) *DownloadStats { + stats := &DownloadStats{} + + for _, folder := range folders { + lastUID := state.GetLastUID(folder) + + downloaded, highestUID, err := client.DownloadFolder( + folder, + baseDir, + config.Limit, + stats.TotalDownloaded, + updateMode, + lastUID, + ) + + if err != nil { + fmt.Printf(" Error processing folder %s: %v\n", folder, err) + continue + } + + stats.TotalDownloaded += downloaded + stats.FoldersProcessed++ + + if highestUID > 0 { + state.UpdateFolder(folder, highestUID) + } + + // Check limit + if config.Limit != nil && stats.TotalDownloaded >= *config.Limit { + fmt.Printf(" Reached limit of %d emails\n", *config.Limit) + break + } + } + + return stats +} diff --git a/state.go b/state.go new file mode 100644 index 0000000..3083528 --- /dev/null +++ b/state.go @@ -0,0 +1,65 @@ +package main + +import ( + "encoding/json" + "os" + "path/filepath" +) + +const stateFileName = ".imapdown_state.json" + +// State tracks the highest UID downloaded per folder +type State map[string]uint32 + +// LoadState reads the state file from the base directory +// Returns empty state if file doesn't exist or can't be read +func LoadState(baseDir string, fullMode bool) (State, error) { + if fullMode { + return make(State), nil + } + + statePath := filepath.Join(baseDir, stateFileName) + + data, err := os.ReadFile(statePath) + if err != nil { + if os.IsNotExist(err) { + return make(State), nil + } + return make(State), err + } + + var state State + if err := json.Unmarshal(data, &state); err != nil { + // Return empty state on parse error + return make(State), nil + } + + return state, nil +} + +// SaveState writes the state file to the base directory with indentation +func SaveState(baseDir string, state State) error { + statePath := filepath.Join(baseDir, stateFileName) + + data, err := json.MarshalIndent(state, "", " ") + if err != nil { + return err + } + + return os.WriteFile(statePath, data, 0644) +} + +// UpdateFolder updates the highest UID for a folder +func (s State) UpdateFolder(folder string, uid uint32) { + if current, exists := s[folder]; !exists || uid > current { + s[folder] = uid + } +} + +// GetLastUID returns the last UID for a folder, or 0 if not found +func (s State) GetLastUID(folder string) uint32 { + if uid, exists := s[folder]; exists { + return uid + } + return 0 +}