diff --git a/.gitignore b/.gitignore index 67c3aff..281a724 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ -# Python -__pycache__/ -*.py[cod] -.venv/ -venv/ +# Go binaries +imapdown +imapdown-* +*.exe +go.sum -# Downloads -download/ +# Test downloads +*.eml +*.zip +.imapdown_state.json diff --git a/CLAUDE.md b/CLAUDE.md index 40bac25..23847f4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,42 +4,56 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -This is a single-file Python script (`imapdown.py`) that downloads all emails from an IMAP server into individual EML files, preserving the folder hierarchy. It uses only Python's standard library and has no external dependencies. +This project downloads all emails from an IMAP server into individual EML files, preserving the folder hierarchy. + +Built with Go as a single self-contained binary, fast and cross-platform. ## Development Environment -- Python 3.6+ required -- Virtual environment is set up in `.venv` - activate it before running: - ```bash - source .venv/bin/activate - ``` +- Go 1.21+ required +- Dependencies: `github.com/emersion/go-imap/v2` (auto-installed via `go mod tidy`) +- Build with: `make build` or `go build` +- Cross-compile with: `make build-all` -## Running the Script +## Running the Application + +First, build the binary: +```bash +make build +# Or cross-compile for all platforms: +make build-all +``` Basic usage (incremental mode - only downloads new emails): ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl ``` Full download (ignores previous state, requires empty target directory): ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --full +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl -full ``` Testing/debugging with limited emails: ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --limit 10 +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl -limit 10 ``` Custom storage directory: ```bash -./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --output /path/to/backup +./imapdown -server imap.example.com -email user@example.com -user user@example.com -password "password" -ssl -output /path/to/backup ``` ## Architecture -### Single-File Design -The entire application is contained in `imapdown.py` (13KB). This is intentional - no modules or packages. +### Implementation Structure +The code is organized into multiple files for clarity: +- `main.go` - Entry point, CLI parsing, orchestration +- `imap.go` - IMAP connection and folder operations +- `email.go` - Email parsing and attachment extraction +- `state.go` - State file management (JSON) +- `filename.go` - Filename sanitization and Modified UTF-7 decoding +- `Makefile` - Build targets ### State Tracking - The script maintains a `.imapdown_state.json` file in each email account's download folder @@ -62,15 +76,15 @@ The entire application is contained in `imapdown.py` (13KB). This is intentional ### Key Implementation Details -**Modified UTF-7 Decoding**: IMAP folder names use modified UTF-7 encoding (see `decode_modified_utf7()` at line 39). This is not standard base64 - it uses `,` instead of `/` and has special `&` handling. +**Modified UTF-7 Decoding**: IMAP folder names use modified UTF-7 encoding. This is not standard base64 - it uses `,` instead of `/` and has special `&` handling. Implemented in `DecodeModifiedUTF7()` in `filename.go`. **Filename Sanitization**: Two-stage process: -- `sanitize_filename()`: Removes invalid filesystem characters, max 50 chars for subjects -- `sanitize_folder_path()`: Converts IMAP folder separators (`.` or `/`) to OS path separators +- `SanitizeFilename()`: Removes invalid filesystem characters, max 50 chars for subjects +- `SanitizeFolderPath()`: Converts IMAP folder separators (`.` or `/`) to OS path separators -**UID-Based Incremental Updates**: Uses IMAP UIDs (not sequence numbers) because UIDs are persistent. The search `UID {last_uid + 1}:*` fetches only new messages. Some servers return the highest UID even when searching for higher UIDs, so there's additional filtering at line 251. +**UID-Based Incremental Updates**: Uses IMAP UIDs (not sequence numbers) because UIDs are persistent. When `lastUID > 0`, searches for UIDs > lastUID. On first run (`lastUID == 0`), searches for all messages using an empty SearchCriteria. Some servers return the highest UID even when searching for higher UIDs, so there's additional filtering. -**Full Mode Safety**: `--full` mode checks if the download folder already contains `.eml` files and refuses to run (line 325). This prevents accidental duplicates. Users must delete the folder first. +**Full Mode Safety**: `-full` mode checks if the download folder already contains `.eml` files and refuses to run. This prevents accidental duplicates. Users must delete the folder first. **Attachment Handling**: - Walks message parts looking for `Content-Disposition: attachment` or `inline` @@ -79,22 +93,59 @@ The entire application is contained in `imapdown.py` (13KB). This is intentional ## Output Structure +Without `-output` flag (default: `./{email_address}`): ``` -{output_dir}/ # default: ./download -└── {email_address}/ # sanitized email address - ├── .imapdown_state.json - ├── INBOX/ - │ ├── 123_20240115_Meeting_notes.eml - │ └── 124_20240116_Report.zip - └── Sent/ - └── 456_20240114_RE_Question.eml +{email_address}/ # sanitized email address in current directory +├── .imapdown_state.json +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ └── 124_20240116_Report.zip +└── Sent/ + └── 456_20240114_RE_Question.eml +``` + +With `-output /path/to/backup`: +``` +/path/to/backup/ # specified output directory used directly +├── .imapdown_state.json +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ └── 124_20240116_Report.zip +└── Sent/ + └── 456_20240114_RE_Question.eml +``` + +## Building and Installing + +Build for current platform: +```bash +make build +``` + +Cross-compile for all platforms: +```bash +make build-all +# Produces: imapdown-linux-amd64, imapdown-linux-arm64, +# imapdown-darwin-amd64, imapdown-darwin-arm64, +# imapdown-windows-amd64.exe +``` + +Install to `$GOPATH/bin`: +```bash +make install +``` + +Clean build artifacts: +```bash +make clean ``` ## Testing No formal test suite exists. Manual testing approach: -- Use `--limit 10` to download a small batch for verification +- Use `-limit 10` to download a small batch for verification - Test SSL vs STARTTLS connections - Test incremental mode by running twice - Verify `.eml` files open correctly in email clients - Check that folders with special characters (non-ASCII) are handled correctly +- Test first run (no state file) to ensure all messages are downloaded diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a839510 --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +.PHONY: build build-all clean test install + +# Binary name +BINARY=imapdown + +# Build flags for smaller binaries +LDFLAGS=-ldflags="-s -w" + +# Build for current platform +build: + go build $(LDFLAGS) -o $(BINARY) + +# Cross-compile for multiple platforms +build-all: + GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o $(BINARY)-linux-amd64 + GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o $(BINARY)-linux-arm64 + GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o $(BINARY)-darwin-amd64 + GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o $(BINARY)-darwin-arm64 + GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o $(BINARY)-windows-amd64.exe + +# Clean build artifacts +clean: + rm -f $(BINARY) + rm -f $(BINARY)-* + +# Run tests +test: + go test -v ./... + +# Install to $GOPATH/bin +install: + go install $(LDFLAGS) diff --git a/README.md b/README.md index 97d6566..8d023d0 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,78 @@ # IMAP Downloader -A simple Python script to download all emails from an IMAP server into individual EML files, preserving the folder structure. +Download all emails from an IMAP server into individual EML files, preserving the folder structure. + +Single self-contained binary written in Go - fast, cross-platform, no dependencies. + +## Quickstart + +```bash +# Build the binary +make build + +# Download all emails (creates a folder named after your email address) +./imapdown -server imap.gmail.com -email you@gmail.com -user you@gmail.com -password "your-password" -ssl + +# Subsequent runs only download new emails +./imapdown -server imap.gmail.com -email you@gmail.com -user you@gmail.com -password "your-password" -ssl +``` ## Features -- Downloads emails as standard `.eml` files +- Downloads emails as standard `.eml` files (open in any email client) - Preserves IMAP folder hierarchy locally - Extracts attachments into zip files alongside each email -- Supports SSL and STARTTLS connections +- Supports SSL/TLS and STARTTLS connections - Incremental updates using UID tracking (only download new emails) -- Multi-account support (separate folders per email address) +- Automatic state tracking - never re-downloads the same email - Configurable download limit for testing/debugging +- Works with Gmail, Outlook, FastMail, and any IMAP server ## Requirements -- Python 3.6+ -- No external dependencies (uses only standard library) +- Go 1.21+ (for building from source) +- OR use pre-compiled binaries (no requirements) ## Installation +Download from releases page (coming soon) or build from source: + ```bash -# Clone or download the script +# Clone repository git clone cd imapdown -# Create virtual environment (optional but recommended) -python3 -m venv .venv -source .venv/bin/activate +# Build the binary +make build + +# Or cross-compile for all platforms +make build-all ``` ## Usage ### Basic Usage -By default, the script only downloads new emails since the last run (incremental mode). On first run, it downloads everything. +By default, only new emails since the last run are downloaded (incremental mode). On first run, everything is downloaded. ```bash -# Download emails using SSL (most common) -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl +# Generic IMAP server with SSL (most common) +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -# Using STARTTLS -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --starttls +# Gmail (requires app-specific password if 2FA enabled) +./imapdown -server imap.gmail.com -email you@gmail.com -user you@gmail.com -password "app-password" -ssl -# Custom port -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --port 12993 +# Outlook/Office 365 +./imapdown -server outlook.office365.com -email you@outlook.com -user you@outlook.com -password "password" -ssl # Custom storage directory -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --store /path/to/backup +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -output /path/to/backup + +# Using STARTTLS instead of SSL +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -starttls + +# Custom port +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -port 12993 ``` ### Full Download @@ -54,14 +80,13 @@ By default, the script only downloads new emails since the last run (incremental To force a complete download of all emails (ignoring previous state): ```bash -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --full +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -full ``` -**Note:** As a safety measure, `--full` will refuse to run if the download folder already contains emails. This prevents accidental duplicates. To re-download everything, first delete the folder: +**Note:** As a safety measure, `-full` will refuse to run if the download folder already contains emails. This prevents accidental duplicates. To re-download everything, first delete the folder: ```bash -rm -rf download/me@example.com/ -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --full +rm -rf me@example.com/ ``` ### Debugging/Testing @@ -69,44 +94,55 @@ rm -rf download/me@example.com/ Limit the number of emails downloaded: ```bash -./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --limit 10 +./imapdown -server imap.example.com -email me@example.com -user me@example.com -password "secret" -ssl -limit 10 ``` ## Command Line Arguments -| Argument | Required | Description | -|----------|----------|-------------| -| `--server` | Yes | IMAP server hostname | -| `--email` | Yes | Email address (used for folder organization) | -| `--user` | Yes | Username for authentication | -| `--password` | Yes | Password for authentication | -| `--ssl` | No | Use implicit SSL/TLS (default port 993) | -| `--starttls` | No | Use STARTTLS (default port 143) | -| `--port` | No | Custom port (overrides defaults) | -| `--limit` | No | Maximum number of emails to download | -| `--full` | No | Download all emails (default: only new since last run) | -| `--store` | No | Directory to store downloaded emails (default: ./download) | +| Argument | Flag | Required | Description | +|----------|------|----------|-------------| +| Server | `-server` | Yes | IMAP server hostname | +| Email | `-email` | Yes | Email address (used for folder organization) | +| User | `-user` | Yes | Username for authentication | +| Password | `-password` | Yes | Password for authentication | +| SSL | `-ssl` | No | Use implicit SSL/TLS (default port 993) | +| STARTTLS | `-starttls` | No | Use STARTTLS (default port 143) | +| Port | `-port` | No | Custom port (overrides defaults) | +| Limit | `-limit` | No | Maximum number of emails to download | +| Full | `-full` | No | Download all emails (default: only new since last run) | +| Output | `-output` | No | Directory to store downloaded emails (default: ./{email}) | -Note: `--ssl` and `--starttls` are mutually exclusive. +**Notes:** +- `-ssl` and `-starttls` are mutually exclusive ## Output Structure -The default output structure (when `--store` is not specified): - +**Without `-output` flag** (default: `./{email_address}/`): ``` -./download/ -├── user@example.com/ -│ ├── .imapdown_state.json # Tracks last downloaded UID per folder -│ ├── INBOX/ -│ │ ├── 123_20240115_Meeting_notes.eml -│ │ ├── 124_20240116_Report.eml -│ │ └── 124_20240116_Report.zip # Attachments (if any) -│ ├── Sent/ -│ │ └── 456_20240114_RE_Question.eml -│ └── Archive/ -│ └── 789_20240101_Old_email.eml -└── another@example.com/ - └── ... +./user@example.com/ +├── .imapdown_state.json # Tracks last downloaded UID per folder +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ ├── 124_20240116_Report.eml +│ └── 124_20240116_Report.zip # Attachments (if any) +├── Sent/ +│ └── 456_20240114_RE_Question.eml +└── Archive/ + └── 789_20240101_Old_email.eml +``` + +**With `-output /path/to/backup`** (emails go directly into specified directory): +``` +/path/to/backup/ +├── .imapdown_state.json +├── INBOX/ +│ ├── 123_20240115_Meeting_notes.eml +│ ├── 124_20240116_Report.eml +│ └── 124_20240116_Report.zip +├── Sent/ +│ └── 456_20240114_RE_Question.eml +└── Archive/ + └── 789_20240101_Old_email.eml ``` ### File Naming @@ -123,7 +159,7 @@ When an email contains attachments, they are extracted and saved in a zip file w ## State Tracking -The script maintains a `.imapdown_state.json` file in each email account's folder. This file tracks the highest downloaded UID for each IMAP folder, enabling efficient incremental updates with `--update`. +A `.imapdown_state.json` file is maintained in the download folder. This file tracks the highest downloaded UID for each IMAP folder, enabling efficient incremental updates. Example state file: ```json @@ -134,6 +170,61 @@ Example state file: } ``` +## Building from Source + +```bash +# Build for current platform +make build + +# Cross-compile for all platforms +make build-all +# Produces: imapdown-linux-amd64, imapdown-linux-arm64, +# imapdown-darwin-amd64, imapdown-darwin-arm64, +# imapdown-windows-amd64.exe + +# Install to $GOPATH/bin +make install + +# Clean build artifacts +make clean + +# Or use Go directly +go build -ldflags="-s -w" -o imapdown +``` + +## Troubleshooting + +### Gmail Authentication + +Gmail requires an app-specific password if you have 2-factor authentication enabled: +1. Go to Google Account Settings → Security → 2-Step Verification → App passwords +2. Generate a new app password for "Mail" +3. Use this password instead of your regular password + +### Connection Issues + +- **SSL errors**: Make sure you're using the correct port (993 for SSL, 143 for STARTTLS) +- **Authentication failed**: Verify username and password are correct +- **Timeout**: Some servers require STARTTLS instead of SSL - try `-starttls` flag + +### First Run Not Downloading + +If the first run doesn't download anything: +1. Check the folder actually contains emails on the server +2. Try with `-limit 10` to test with a small batch first +3. Verify your credentials work by logging into webmail + +### Re-downloading Everything + +To start fresh and re-download all emails: +```bash +# Delete the email folder (and state file) +rm -rf ./your-email@example.com/ + +# Run with -full flag +./imapdown -server imap.example.com -email your-email@example.com -user your-email@example.com -password "password" -ssl -full +``` + ## License MIT diff --git a/email.go b/email.go new file mode 100644 index 0000000..1f1d18a --- /dev/null +++ b/email.go @@ -0,0 +1,227 @@ +package main + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "mime" + "mime/multipart" + "net/mail" + "os" + "path/filepath" + "strings" + "time" +) + +// EmailMessage represents a parsed email message +type EmailMessage struct { + UID uint32 + Date time.Time + Subject string + Raw []byte + Parsed *mail.Message +} + +// ParseEmailMessage parses raw RFC822 email data +func ParseEmailMessage(raw []byte, uid uint32) (*EmailMessage, error) { + msg, err := mail.ReadMessage(bytes.NewReader(raw)) + if err != nil { + return nil, fmt.Errorf("failed to parse email: %w", err) + } + + return &EmailMessage{ + UID: uid, + Date: GetMessageDate(msg), + Subject: GetMessageSubject(msg), + Raw: raw, + Parsed: msg, + }, nil +} + +// GetMessageDate extracts the date from an email message +// Falls back to current time if date cannot be parsed +func GetMessageDate(msg *mail.Message) time.Time { + dateStr := msg.Header.Get("Date") + if dateStr == "" { + return time.Now() + } + + parsed, err := mail.ParseDate(dateStr) + if err != nil { + return time.Now() + } + + return parsed +} + +// GetMessageSubject extracts and decodes the subject from an email message +// Returns "no_subject" if subject is empty +func GetMessageSubject(msg *mail.Message) string { + subject := msg.Header.Get("Subject") + if subject == "" { + return "no_subject" + } + + // Decode RFC 2047 encoded-words + decoded := DecodeHeaderValue(subject) + if decoded == "" { + return "no_subject" + } + + return decoded +} + +// DecodeHeaderValue decodes RFC 2047 encoded-words in headers +func DecodeHeaderValue(encoded string) string { + dec := new(mime.WordDecoder) + decoded, err := dec.DecodeHeader(encoded) + if err != nil { + return encoded + } + return decoded +} + +// ExtractAttachments extracts attachments from an email and saves them to a zip file +// Returns the number of attachments extracted +func ExtractAttachments(msg *mail.Message, emlPath string) (int, error) { + mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type")) + if err != nil { + // Not a multipart message or invalid content-type + return 0, nil + } + + if !strings.HasPrefix(mediaType, "multipart/") { + // Not a multipart message + return 0, nil + } + + attachments := make([]attachment, 0) + + // Parse multipart message + boundary := params["boundary"] + if boundary == "" { + return 0, nil + } + + mr := multipart.NewReader(msg.Body, boundary) + if err := extractPartsRecursive(mr, &attachments); err != nil { + // Ignore errors in attachment extraction + if len(attachments) == 0 { + return 0, nil + } + } + + if len(attachments) == 0 { + return 0, nil + } + + // Create zip file + zipPath := strings.TrimSuffix(emlPath, filepath.Ext(emlPath)) + ".zip" + zipFile, err := os.Create(zipPath) + if err != nil { + return 0, fmt.Errorf("failed to create zip file: %w", err) + } + defer zipFile.Close() + + zw := zip.NewWriter(zipFile) + defer zw.Close() + + // Track duplicate filenames + seenNames := make(map[string]int) + + for _, att := range attachments { + filename := att.filename + + // Handle duplicate names + if count, exists := seenNames[filename]; exists { + seenNames[filename]++ + ext := filepath.Ext(filename) + name := strings.TrimSuffix(filename, ext) + filename = fmt.Sprintf("%s_%d%s", name, count+1, ext) + } else { + seenNames[filename] = 0 + } + + // Write to zip + w, err := zw.Create(filename) + if err != nil { + continue + } + + if _, err := w.Write(att.data); err != nil { + continue + } + } + + return len(attachments), nil +} + +type attachment struct { + filename string + data []byte +} + +// extractPartsRecursive recursively extracts attachments from multipart message +func extractPartsRecursive(mr *multipart.Reader, attachments *[]attachment) error { + for { + part, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return err + } + + // Check Content-Disposition + disposition := part.Header.Get("Content-Disposition") + if disposition == "" { + // Check if it's a nested multipart + contentType := part.Header.Get("Content-Type") + mediaType, params, err := mime.ParseMediaType(contentType) + if err == nil && strings.HasPrefix(mediaType, "multipart/") { + boundary := params["boundary"] + if boundary != "" { + nestedMr := multipart.NewReader(part, boundary) + extractPartsRecursive(nestedMr, attachments) + } + } + part.Close() + continue + } + + // Check if it's an attachment or inline + if !strings.Contains(disposition, "attachment") && !strings.Contains(disposition, "inline") { + part.Close() + continue + } + + // Get filename + filename := part.FileName() + if filename == "" { + part.Close() + continue + } + + // Decode filename if needed + filename = DecodeHeaderValue(filename) + filename = SanitizeFilename(filename, 100) + + // Read attachment data + data, err := io.ReadAll(part) + part.Close() + + if err != nil { + continue + } + + if len(data) > 0 { + *attachments = append(*attachments, attachment{ + filename: filename, + data: data, + }) + } + } + + return nil +} diff --git a/filename.go b/filename.go new file mode 100644 index 0000000..3e45cde --- /dev/null +++ b/filename.go @@ -0,0 +1,126 @@ +package main + +import ( + "encoding/base64" + "encoding/binary" + "path/filepath" + "regexp" + "strings" + "unicode/utf16" +) + +// sanitizeFilenameRegex matches invalid filesystem characters +var sanitizeFilenameRegex = regexp.MustCompile(`[<>:"/\\|?*\x00-\x1f]`) + +// SanitizeFilename removes invalid filesystem characters and truncates to maxLength +func SanitizeFilename(name string, maxLength int) string { + if name == "" { + return "untitled" + } + + // Replace invalid characters with underscore + name = sanitizeFilenameRegex.ReplaceAllString(name, "_") + + // Trim leading/trailing dots and spaces + name = strings.Trim(name, ". ") + + // Truncate to max length + if len(name) > maxLength { + name = name[:maxLength] + } + + // Trim again after truncation + name = strings.Trim(name, ". ") + + if name == "" { + return "untitled" + } + + return name +} + +// SanitizeFolderPath converts IMAP folder paths to filesystem paths +func SanitizeFolderPath(folderName string) string { + // Replace both / and . with OS path separator + normalized := strings.ReplaceAll(folderName, "/", string(filepath.Separator)) + normalized = strings.ReplaceAll(normalized, ".", string(filepath.Separator)) + + // Split and sanitize each part + parts := strings.Split(normalized, string(filepath.Separator)) + sanitized := make([]string, 0, len(parts)) + + for _, part := range parts { + if part != "" { + sanitized = append(sanitized, SanitizeFilename(part, 100)) + } + } + + if len(sanitized) == 0 { + return "INBOX" + } + + return filepath.Join(sanitized...) +} + +// DecodeModifiedUTF7 decodes IMAP modified UTF-7 folder names +// Modified UTF-7 uses & as escape character, &- for literal &, +// and uses , instead of / in base64 encoding +func DecodeModifiedUTF7(s string) (string, error) { + var result strings.Builder + i := 0 + + for i < len(s) { + if s[i] == '&' { + // Check for &- (literal ampersand) + if i+1 < len(s) && s[i+1] == '-' { + result.WriteByte('&') + i += 2 + continue + } + + // Find the closing - + end := strings.IndexByte(s[i+1:], '-') + if end == -1 { + // No closing -, just append rest of string + result.WriteString(s[i:]) + break + } + end += i + 1 // Adjust to absolute position + + encoded := s[i+1 : end] + if encoded != "" { + // Replace , with / for standard base64 + encoded = strings.ReplaceAll(encoded, ",", "/") + + // Add padding to make length divisible by 4 + padding := (4 - len(encoded)%4) % 4 + encoded += strings.Repeat("=", padding) + + // Decode base64 + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + // On error, just append the original string + result.WriteString(s[i : end+1]) + i = end + 1 + continue + } + + // Convert UTF-16BE bytes to UTF-16 runes, then to string + utf16Runes := make([]uint16, len(decoded)/2) + for j := 0; j < len(decoded); j += 2 { + utf16Runes[j/2] = binary.BigEndian.Uint16(decoded[j : j+2]) + } + + result.WriteString(string(utf16.Decode(utf16Runes))) + } + + i = end + 1 + } else { + result.WriteByte(s[i]) + i++ + } + } + + return result.String(), nil +} + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fc75cfe --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module github.com/yourusername/imapdown + +go 1.22.2 + +require github.com/emersion/go-imap/v2 v2.0.0-beta.7 + +require ( + github.com/emersion/go-message v0.18.1 // indirect + github.com/emersion/go-sasl v0.0.0-20231106173351-e73c9f7bad43 // indirect +) diff --git a/imap.go b/imap.go new file mode 100644 index 0000000..69079d8 --- /dev/null +++ b/imap.go @@ -0,0 +1,297 @@ +package main + +import ( + "crypto/tls" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/emersion/go-imap/v2" + "github.com/emersion/go-imap/v2/imapclient" +) + +// IMAPClient wraps the IMAP client connection +type IMAPClient struct { + client *imapclient.Client +} + +// ConnectIMAP establishes an IMAP connection with the specified security mode +func ConnectIMAP(config *Config) (*IMAPClient, error) { + var client *imapclient.Client + var err error + + addr := fmt.Sprintf("%s:%d", config.Server, config.Port) + + if config.UseSSL { + fmt.Printf("Connecting to %s with SSL...\n", addr) + tlsConfig := &tls.Config{ + ServerName: config.Server, + } + client, err = imapclient.DialTLS(addr, &imapclient.Options{ + TLSConfig: tlsConfig, + }) + } else if config.UseSTARTTLS { + fmt.Printf("Connecting to %s...\n", addr) + fmt.Println("Upgrading to TLS with STARTTLS...") + tlsConfig := &tls.Config{ + ServerName: config.Server, + } + client, err = imapclient.DialStartTLS(addr, &imapclient.Options{ + TLSConfig: tlsConfig, + }) + } else { + fmt.Printf("Connecting to %s (plain)...\n", addr) + client, err = imapclient.DialInsecure(addr, nil) + } + + if err != nil { + return nil, fmt.Errorf("connection failed: %w", err) + } + + return &IMAPClient{client: client}, nil +} + +// Login authenticates with the IMAP server +func (c *IMAPClient) Login(username, password string) error { + if err := c.client.Login(username, password).Wait(); err != nil { + return fmt.Errorf("authentication failed: %w", err) + } + fmt.Println("Logged in successfully") + return nil +} + +// ListFolders returns all mailbox names, decoded from modified UTF-7 +func (c *IMAPClient) ListFolders() ([]string, error) { + listCmd := c.client.List("", "*", nil) + + folders := make([]string, 0) + for { + mbox := listCmd.Next() + if mbox == nil { + break + } + + // Decode modified UTF-7 folder name + decoded, err := DecodeModifiedUTF7(mbox.Mailbox) + if err != nil { + // On error, use original name + decoded = mbox.Mailbox + } + folders = append(folders, decoded) + } + + if err := listCmd.Close(); err != nil { + return nil, fmt.Errorf("failed to list folders: %w", err) + } + + return folders, nil +} + +// DownloadFolder downloads messages from a folder +// Returns (downloaded_count, highest_uid, error) +func (c *IMAPClient) DownloadFolder(folderName, baseDir string, limit *int, totalSoFar int, updateMode bool, lastUID uint32) (int, uint32, error) { + localPath := filepath.Join(baseDir, SanitizeFolderPath(folderName)) + if err := os.MkdirAll(localPath, 0755); err != nil { + return 0, lastUID, fmt.Errorf("failed to create directory: %w", err) + } + + // Select folder in read-only mode + selectCmd := c.client.Select(folderName, &imap.SelectOptions{ReadOnly: true}) + _, err := selectCmd.Wait() + if err != nil { + fmt.Printf(" Could not select folder: %s\n", folderName) + return 0, lastUID, err + } + + // Search for messages + var searchCriteria imap.SearchCriteria + + // Always set a UID range - empty SearchCriteria doesn't work + uidSet := imap.UIDSet{} + if updateMode && lastUID > 0 { + // Incremental update: search for UIDs > lastUID + uidSet.AddRange(imap.UID(lastUID+1), imap.UID(0xFFFFFFFF)) // 0xFFFFFFFF means * + } else { + // Full download or first run: search all UIDs from 1 to * + uidSet.AddRange(imap.UID(1), imap.UID(0xFFFFFFFF)) + } + searchCriteria.UID = []imap.UIDSet{uidSet} + + searchCmd := c.client.UIDSearch(&searchCriteria, nil) + searchData, err := searchCmd.Wait() + if err != nil { + fmt.Printf(" Could not search folder: %s\n", folderName) + return 0, lastUID, err + } + + uidList := make([]uint32, 0) + for _, uid := range searchData.AllUIDs() { + // Filter out UIDs <= lastUID (server quirk) + if !updateMode || lastUID == 0 || uint32(uid) > lastUID { + uidList = append(uidList, uint32(uid)) + } + } + + if len(uidList) == 0 { + fmt.Printf(" %s: no new messages\n", folderName) + return 0, lastUID, nil + } + + // Apply limit + if limit != nil { + remaining := *limit - totalSoFar + if remaining <= 0 { + return 0, lastUID, nil + } + if len(uidList) > remaining { + uidList = uidList[:remaining] + } + } + + fmt.Printf(" %s: %d messages to download\n", folderName, len(uidList)) + + downloaded := 0 + highestUID := lastUID + + for _, uid := range uidList { + msg, err := c.FetchMessage(uid) + if err != nil { + fmt.Printf(" Error downloading UID %d: %v\n", uid, err) + continue + } + + // Build filename + dateStr := msg.Date.Format("20060102_150405") + subject := SanitizeFilename(msg.Subject, 50) + filename := fmt.Sprintf("%d_%s_%s.eml", uid, dateStr, subject) + filepath := filepath.Join(localPath, filename) + + // Ensure unique filename + filepath = getUniqueFilepath(filepath) + + // Write EML file + if err := os.WriteFile(filepath, msg.Raw, 0644); err != nil { + fmt.Printf(" Error writing UID %d: %v\n", uid, err) + continue + } + + // Extract attachments + ExtractAttachments(msg.Parsed, filepath) + + downloaded++ + + if uid > highestUID { + highestUID = uid + } + } + + return downloaded, highestUID, nil +} + +// FetchMessage retrieves a single message by UID +func (c *IMAPClient) FetchMessage(uid uint32) (*EmailMessage, error) { + uidSet := imap.UIDSet{} + uidSet.AddNum(imap.UID(uid)) + + fetchCmd := c.client.Fetch(uidSet, &imap.FetchOptions{ + BodySection: []*imap.FetchItemBodySection{{}}, + }) + + msg := fetchCmd.Next() + if msg == nil { + fetchCmd.Close() + return nil, fmt.Errorf("message not found") + } + + // Iterate through fetch items to find body section + var rawEmail []byte + for { + item := msg.Next() + if item == nil { + break + } + + switch data := item.(type) { + case imapclient.FetchItemDataBodySection: + // Check if this is the full message (empty Part means full body) + if len(data.Section.Part) == 0 { + rawBytes, err := io.ReadAll(data.Literal) + if err != nil { + fetchCmd.Close() + return nil, fmt.Errorf("failed to read message body: %w", err) + } + rawEmail = rawBytes + } + } + } + + fetchCmd.Close() + + if rawEmail == nil { + return nil, fmt.Errorf("failed to retrieve message body") + } + + return ParseEmailMessage(rawEmail, uid) +} + +// Logout closes the IMAP connection +func (c *IMAPClient) Logout() error { + if c.client != nil { + return c.client.Logout().Wait() + } + return nil +} + +// getUniqueFilepath returns a unique filepath by appending _N if needed +func getUniqueFilepath(basePath string) string { + if _, err := os.Stat(basePath); os.IsNotExist(err) { + return basePath + } + + counter := 1 + ext := filepath.Ext(basePath) + name := strings.TrimSuffix(basePath, ext) + + for { + newPath := fmt.Sprintf("%s_%d%s", name, counter, ext) + if _, err := os.Stat(newPath); os.IsNotExist(err) { + return newPath + } + counter++ + } +} + +// parseFolderList parses IMAP LIST response (legacy, kept for reference) +var folderListPattern = regexp.MustCompile(`\((?P.*?)\) "(?P.*)" (?P.*)`) + +func parseFolderList(response []string) []string { + folders := make([]string, 0) + + for _, item := range response { + match := folderListPattern.FindStringSubmatch(item) + if match == nil { + continue + } + + // Extract name (index 3) + name := match[3] + + // Remove surrounding quotes if present + if len(name) >= 2 && name[0] == '"' && name[len(name)-1] == '"' { + name = name[1 : len(name)-1] + } + + // Decode modified UTF-7 + decoded, err := DecodeModifiedUTF7(name) + if err != nil { + decoded = name + } + + folders = append(folders, decoded) + } + + return folders +} diff --git a/imapdown.py b/imapdown.py deleted file mode 100755 index 465333a..0000000 --- a/imapdown.py +++ /dev/null @@ -1,399 +0,0 @@ -#!/usr/bin/env python3 -"""Simple IMAP email downloader - downloads all emails to EML files.""" - -import argparse -import email -import email.utils -import imaplib -import io -import json -import os -import re -import sys -import zipfile -from datetime import datetime - - -def parse_args(): - """Parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Download all emails from an IMAP server to EML files" - ) - - parser.add_argument("--server", required=True, help="IMAP server hostname") - parser.add_argument("--email", required=True, help="Email address") - parser.add_argument("--user", required=True, help="Username for authentication") - parser.add_argument("--password", required=True, help="Password for authentication") - - security = parser.add_mutually_exclusive_group() - security.add_argument("--ssl", action="store_true", help="Use implicit SSL/TLS (default port 993)") - security.add_argument("--starttls", action="store_true", help="Use STARTTLS (default port 143)") - - parser.add_argument("--port", type=int, help="Custom port (default: 993 for SSL, 143 otherwise)") - parser.add_argument("--limit", type=int, help="Limit number of emails to download (for debugging)") - parser.add_argument("--full", action="store_true", help="Download all emails (default: only new emails since last run)") - parser.add_argument("--output", type=str, help="Directory to store downloaded emails (default: ./download)") - - return parser.parse_args() - - -def decode_modified_utf7(s): - """Decode IMAP modified UTF-7 folder names.""" - result = [] - i = 0 - while i < len(s): - if s[i] == '&': - if i + 1 < len(s) and s[i + 1] == '-': - result.append('&') - i += 2 - else: - end = s.find('-', i + 1) - if end == -1: - result.append(s[i:]) - break - encoded = s[i + 1:end] - if encoded: - encoded = encoded.replace(',', '/') - padding = (4 - len(encoded) % 4) % 4 - encoded += '=' * padding - try: - import base64 - decoded = base64.b64decode(encoded).decode('utf-16-be') - result.append(decoded) - except Exception: - result.append(s[i:end + 1]) - i = end + 1 - else: - result.append(s[i]) - i += 1 - return ''.join(result) - - -def parse_folder_list(response): - """Parse IMAP LIST response to extract folder names.""" - folders = [] - pattern = re.compile(r'\((?P.*?)\) "(?P.*)" (?P.*)') - - for item in response: - if isinstance(item, bytes): - item = item.decode('utf-8', errors='replace') - - match = pattern.match(item) - if match: - name = match.group('name') - if name.startswith('"') and name.endswith('"'): - name = name[1:-1] - name = decode_modified_utf7(name) - folders.append(name) - - return folders - - -def sanitize_filename(name, max_length=50): - """Sanitize a string for use as a filename.""" - if not name: - return "untitled" - name = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '_', name) - name = name.strip('. ') - name = name[:max_length] - name = name.strip('. ') - return name or "untitled" - - -def sanitize_folder_path(folder_name): - """Sanitize folder path for filesystem use.""" - parts = folder_name.replace('/', os.sep).replace('.', os.sep).split(os.sep) - sanitized = [sanitize_filename(p, max_length=100) for p in parts if p] - return os.path.join(*sanitized) if sanitized else "INBOX" - - -def get_message_date(msg): - """Extract date from email message.""" - date_str = msg.get('Date') - if date_str: - try: - parsed = email.utils.parsedate_to_datetime(date_str) - return parsed.strftime('%Y%m%d_%H%M%S') - except Exception: - pass - return datetime.now().strftime('%Y%m%d_%H%M%S') - - -def get_message_subject(msg): - """Extract and decode subject from email message.""" - subject = msg.get('Subject', '') - if not subject: - return 'no_subject' - - try: - decoded_parts = email.header.decode_header(subject) - decoded = [] - for part, charset in decoded_parts: - if isinstance(part, bytes): - charset = charset or 'utf-8' - try: - decoded.append(part.decode(charset, errors='replace')) - except Exception: - decoded.append(part.decode('utf-8', errors='replace')) - else: - decoded.append(part) - return ''.join(decoded) - except Exception: - return str(subject) - - -def extract_attachments(msg, eml_filepath): - """Extract attachments from email and save as zip file.""" - attachments = [] - - for part in msg.walk(): - content_disposition = part.get('Content-Disposition', '') - if 'attachment' in content_disposition or 'inline' in content_disposition: - filename = part.get_filename() - if filename: - try: - decoded_parts = email.header.decode_header(filename) - decoded_filename = [] - for data, charset in decoded_parts: - if isinstance(data, bytes): - charset = charset or 'utf-8' - decoded_filename.append(data.decode(charset, errors='replace')) - else: - decoded_filename.append(data) - filename = ''.join(decoded_filename) - except Exception: - pass - - payload = part.get_payload(decode=True) - if payload: - attachments.append((sanitize_filename(filename, max_length=100), payload)) - - if attachments: - zip_path = os.path.splitext(eml_filepath)[0] + '.zip' - with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf: - seen_names = {} - for filename, data in attachments: - if filename in seen_names: - seen_names[filename] += 1 - name, ext = os.path.splitext(filename) - filename = f"{name}_{seen_names[filename]}{ext}" - else: - seen_names[filename] = 0 - zf.writestr(filename, data) - return len(attachments) - return 0 - - -STATE_FILE = '.imapdown_state.json' - - -def load_state(base_dir): - """Load the state file tracking last downloaded emails.""" - state_path = os.path.join(base_dir, STATE_FILE) - if os.path.exists(state_path): - try: - with open(state_path, 'r') as f: - return json.load(f) - except Exception: - pass - return {} - - -def save_state(base_dir, state): - """Save the state file.""" - state_path = os.path.join(base_dir, STATE_FILE) - with open(state_path, 'w') as f: - json.dump(state, f, indent=2) - - -def connect_imap(server, port, use_ssl, use_starttls): - """Connect to IMAP server with appropriate security.""" - if use_ssl: - port = port or 993 - print(f"Connecting to {server}:{port} with SSL...") - return imaplib.IMAP4_SSL(server, port) - else: - port = port or 143 - print(f"Connecting to {server}:{port}...") - conn = imaplib.IMAP4(server, port) - if use_starttls: - print("Upgrading to TLS with STARTTLS...") - conn.starttls() - return conn - - -def download_folder(conn, folder_name, base_dir, limit=None, total_so_far=0, update_mode=False, last_uid=None): - """Download all emails from a folder. Returns (downloaded_count, highest_uid).""" - local_path = os.path.join(base_dir, sanitize_folder_path(folder_name)) - os.makedirs(local_path, exist_ok=True) - - try: - status, _ = conn.select(f'"{folder_name}"', readonly=True) - if status != 'OK': - print(f" Could not select folder: {folder_name}") - return 0, last_uid - except Exception as e: - print(f" Error selecting folder {folder_name}: {e}") - return 0, last_uid - - if update_mode and last_uid is not None: - status, data = conn.uid('SEARCH', None, f'UID {last_uid + 1}:*') - else: - status, data = conn.uid('SEARCH', None, 'ALL') - - if status != 'OK': - print(f" Could not search folder: {folder_name}") - return 0, last_uid - - uid_list = data[0].split() - - # Filter out UIDs <= last_uid (some servers return highest UID even when searching for higher) - if update_mode and last_uid is not None: - uid_list = [uid for uid in uid_list if int(uid) > last_uid] - - if not uid_list: - print(f" {folder_name}: no new messages") - return 0, last_uid - - if limit is not None: - remaining = limit - total_so_far - if remaining <= 0: - return 0, last_uid - uid_list = uid_list[:remaining] - - print(f" {folder_name}: {len(uid_list)} messages to download") - downloaded = 0 - highest_uid = last_uid - - for uid in uid_list: - try: - uid_int = int(uid) - status, data = conn.uid('FETCH', uid, '(RFC822)') - if status != 'OK': - continue - - raw_email = None - for part in data: - if isinstance(part, tuple): - raw_email = part[1] - break - - if raw_email is None: - continue - - msg = email.message_from_bytes(raw_email) - date_str = get_message_date(msg) - subject = sanitize_filename(get_message_subject(msg)) - - filename = f"{uid_int}_{date_str}_{subject}.eml" - filepath = os.path.join(local_path, filename) - - counter = 1 - base_filepath = filepath - while os.path.exists(filepath): - name, ext = os.path.splitext(base_filepath) - filepath = f"{name}_{counter}{ext}" - counter += 1 - - with open(filepath, 'wb') as f: - f.write(raw_email) - - extract_attachments(msg, filepath) - downloaded += 1 - - if highest_uid is None or uid_int > highest_uid: - highest_uid = uid_int - - except Exception as e: - print(f" Error downloading UID {uid}: {e}") - - return downloaded, highest_uid - - -def main(): - args = parse_args() - - email_folder = sanitize_filename(args.email, max_length=100) - if args.output: - base_dir = os.path.join(args.output, email_folder) - else: - base_dir = os.path.join(os.getcwd(), 'download', email_folder) - os.makedirs(base_dir, exist_ok=True) - - if args.full: - has_emails = False - for root, dirs, files in os.walk(base_dir): - if any(f.endswith('.eml') for f in files): - has_emails = True - break - if has_emails: - print(f"Error: --full specified but {base_dir} already contains emails.", file=sys.stderr) - print("Delete the folder first to do a full re-download, or run without --full for incremental update.", file=sys.stderr) - sys.exit(1) - - try: - conn = connect_imap(args.server, args.port, args.ssl, args.starttls) - except Exception as e: - print(f"Connection failed: {e}", file=sys.stderr) - sys.exit(1) - - try: - status, _ = conn.login(args.user, args.password) - if status != 'OK': - print("Authentication failed", file=sys.stderr) - sys.exit(1) - print("Logged in successfully") - except Exception as e: - print(f"Authentication failed: {e}", file=sys.stderr) - sys.exit(1) - - try: - status, folder_data = conn.list() - if status != 'OK': - print("Could not list folders", file=sys.stderr) - sys.exit(1) - - folders = parse_folder_list(folder_data) - print(f"Found {len(folders)} folders") - - update_mode = not args.full - state = load_state(base_dir) if update_mode else {} - if args.full: - print("Full download mode: downloading all emails") - else: - print("Incremental mode: only downloading new emails (use --full to download all)") - - total_downloaded = 0 - for folder in folders: - last_uid = None - if update_mode and folder in state: - try: - last_uid = int(state[folder]) - except (ValueError, TypeError): - pass - - downloaded, highest_uid = download_folder( - conn, folder, base_dir, args.limit, total_downloaded, - update_mode=update_mode, last_uid=last_uid - ) - total_downloaded += downloaded - - if highest_uid is not None: - state[folder] = highest_uid - - if args.limit and total_downloaded >= args.limit: - print(f" Reached limit of {args.limit} emails") - break - - save_state(base_dir, state) - print(f"\nDownloaded {total_downloaded} emails to {base_dir}") - - finally: - try: - conn.logout() - except Exception: - pass - - -if __name__ == '__main__': - main() diff --git a/main.go b/main.go new file mode 100644 index 0000000..08dcb13 --- /dev/null +++ b/main.go @@ -0,0 +1,243 @@ +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" +) + +// Config holds all CLI arguments +type Config struct { + Server string + Email string + User string + Password string + UseSSL bool + UseSTARTTLS bool + Port int + Limit *int + Full bool + Output string +} + +func main() { + config := parseArgs() + + baseDir := setupBaseDirectory(config) + + if config.Full { + if err := checkFullModeSafety(baseDir); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + } + + // Connect to IMAP server + client, err := ConnectIMAP(config) + if err != nil { + fmt.Fprintf(os.Stderr, "Connection failed: %v\n", err) + os.Exit(1) + } + defer client.Logout() + + // Login + if err := client.Login(config.User, config.Password); err != nil { + fmt.Fprintf(os.Stderr, "Authentication failed: %v\n", err) + os.Exit(1) + } + + // List folders + folders, err := client.ListFolders() + if err != nil { + fmt.Fprintf(os.Stderr, "Could not list folders: %v\n", err) + os.Exit(1) + } + fmt.Printf("Found %d folders\n", len(folders)) + + // Load state + updateMode := !config.Full + state, err := LoadState(baseDir, config.Full) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: could not load state: %v\n", err) + state = make(State) + } + + if config.Full { + fmt.Println("Full download mode: downloading all emails") + } else { + fmt.Println("Incremental mode: only downloading new emails (use --full to download all)") + } + + // Download folders + stats := downloadAllFolders(client, folders, baseDir, config, updateMode, state) + + // Save state + if err := SaveState(baseDir, state); err != nil { + fmt.Fprintf(os.Stderr, "Warning: could not save state: %v\n", err) + } + + fmt.Printf("\nDownloaded %d emails to %s\n", stats.TotalDownloaded, baseDir) +} + +// parseArgs parses and validates command line arguments +func parseArgs() *Config { + config := &Config{} + + flag.StringVar(&config.Server, "server", "", "IMAP server hostname (required)") + flag.StringVar(&config.Email, "email", "", "Email address (required)") + flag.StringVar(&config.User, "user", "", "Username for authentication (required)") + flag.StringVar(&config.Password, "password", "", "Password for authentication (required)") + flag.BoolVar(&config.UseSSL, "ssl", false, "Use implicit SSL/TLS (default port 993)") + flag.BoolVar(&config.UseSTARTTLS, "starttls", false, "Use STARTTLS (default port 143)") + flag.IntVar(&config.Port, "port", 0, "Custom port (default: 993 for SSL, 143 otherwise)") + flag.BoolVar(&config.Full, "full", false, "Download all emails (default: only new emails since last run)") + flag.StringVar(&config.Output, "output", "", "Directory to store downloaded emails (default: ./{email})") + + var limit int + flag.IntVar(&limit, "limit", 0, "Limit number of emails to download (for debugging)") + + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s [options]\n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Download all emails from an IMAP server to EML files\n\n") + fmt.Fprintf(os.Stderr, "Options:\n") + flag.PrintDefaults() + } + + flag.Parse() + + // Validate required arguments + if config.Server == "" { + fmt.Fprintf(os.Stderr, "Error: --server is required\n") + flag.Usage() + os.Exit(1) + } + if config.Email == "" { + fmt.Fprintf(os.Stderr, "Error: --email is required\n") + flag.Usage() + os.Exit(1) + } + if config.User == "" { + fmt.Fprintf(os.Stderr, "Error: --user is required\n") + flag.Usage() + os.Exit(1) + } + if config.Password == "" { + fmt.Fprintf(os.Stderr, "Error: --password is required\n") + flag.Usage() + os.Exit(1) + } + + // Check mutually exclusive flags + if config.UseSSL && config.UseSTARTTLS { + fmt.Fprintf(os.Stderr, "Error: --ssl and --starttls are mutually exclusive\n") + os.Exit(1) + } + + // Set default port + if config.Port == 0 { + if config.UseSSL { + config.Port = 993 + } else { + config.Port = 143 + } + } + + // Set limit pointer + if limit > 0 { + config.Limit = &limit + } + + return config +} + +// setupBaseDirectory creates and returns the base directory for downloads +func setupBaseDirectory(config *Config) string { + var baseDir string + if config.Output != "" { + // Use specified output directory directly + baseDir = config.Output + } else { + // Create email folder in current directory + emailFolder := SanitizeFilename(config.Email, 100) + cwd, _ := os.Getwd() + baseDir = filepath.Join(cwd, emailFolder) + } + + if err := os.MkdirAll(baseDir, 0755); err != nil { + fmt.Fprintf(os.Stderr, "Error creating directory: %v\n", err) + os.Exit(1) + } + + return baseDir +} + +// checkFullModeSafety verifies no existing .eml files in full mode +func checkFullModeSafety(baseDir string) error { + hasEmails := false + + err := filepath.Walk(baseDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && filepath.Ext(path) == ".eml" { + hasEmails = true + return filepath.SkipAll + } + return nil + }) + + if err != nil { + return err + } + + if hasEmails { + return fmt.Errorf("--full specified but %s already contains emails.\nDelete the folder first to do a full re-download, or run without --full for incremental update.", baseDir) + } + + return nil +} + +// DownloadStats tracks download statistics +type DownloadStats struct { + TotalDownloaded int + FoldersProcessed int +} + +// downloadAllFolders orchestrates the download of all folders +func downloadAllFolders(client *IMAPClient, folders []string, baseDir string, config *Config, updateMode bool, state State) *DownloadStats { + stats := &DownloadStats{} + + for _, folder := range folders { + lastUID := state.GetLastUID(folder) + + downloaded, highestUID, err := client.DownloadFolder( + folder, + baseDir, + config.Limit, + stats.TotalDownloaded, + updateMode, + lastUID, + ) + + if err != nil { + fmt.Printf(" Error processing folder %s: %v\n", folder, err) + continue + } + + stats.TotalDownloaded += downloaded + stats.FoldersProcessed++ + + if highestUID > 0 { + state.UpdateFolder(folder, highestUID) + } + + // Check limit + if config.Limit != nil && stats.TotalDownloaded >= *config.Limit { + fmt.Printf(" Reached limit of %d emails\n", *config.Limit) + break + } + } + + return stats +} diff --git a/state.go b/state.go new file mode 100644 index 0000000..3083528 --- /dev/null +++ b/state.go @@ -0,0 +1,65 @@ +package main + +import ( + "encoding/json" + "os" + "path/filepath" +) + +const stateFileName = ".imapdown_state.json" + +// State tracks the highest UID downloaded per folder +type State map[string]uint32 + +// LoadState reads the state file from the base directory +// Returns empty state if file doesn't exist or can't be read +func LoadState(baseDir string, fullMode bool) (State, error) { + if fullMode { + return make(State), nil + } + + statePath := filepath.Join(baseDir, stateFileName) + + data, err := os.ReadFile(statePath) + if err != nil { + if os.IsNotExist(err) { + return make(State), nil + } + return make(State), err + } + + var state State + if err := json.Unmarshal(data, &state); err != nil { + // Return empty state on parse error + return make(State), nil + } + + return state, nil +} + +// SaveState writes the state file to the base directory with indentation +func SaveState(baseDir string, state State) error { + statePath := filepath.Join(baseDir, stateFileName) + + data, err := json.MarshalIndent(state, "", " ") + if err != nil { + return err + } + + return os.WriteFile(statePath, data, 0644) +} + +// UpdateFolder updates the highest UID for a folder +func (s State) UpdateFolder(folder string, uid uint32) { + if current, exists := s[folder]; !exists || uid > current { + s[folder] = uid + } +} + +// GetLastUID returns the last UID for a folder, or 0 if not found +func (s State) GetLastUID(folder string) uint32 { + if uid, exists := s[folder]; exists { + return uid + } + return 0 +}