From 1a7be7fddf8e6448093e699230034e24cdc67758 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Mon, 2 Feb 2026 22:00:08 +0000 Subject: [PATCH] Added ability to define output directory --- CLAUDE.md | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 6 ++++ imapdown.py | 6 +++- 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..79ff654 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,100 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is a single-file Python script (`imapdown.py`) that downloads all emails from an IMAP server into individual EML files, preserving the folder hierarchy. It uses only Python's standard library and has no external dependencies. + +## Development Environment + +- Python 3.6+ required +- Virtual environment is set up in `.venv` - activate it before running: + ```bash + source .venv/bin/activate + ``` + +## Running the Script + +Basic usage (incremental mode - only downloads new emails): +```bash +./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl +``` + +Full download (ignores previous state, requires empty target directory): +```bash +./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --full +``` + +Testing/debugging with limited emails: +```bash +./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --limit 10 +``` + +Custom storage directory: +```bash +./imapdown.py --server imap.example.com --email user@example.com --user user@example.com --password "password" --ssl --store /path/to/backup +``` + +## Architecture + +### Single-File Design +The entire application is contained in `imapdown.py` (13KB). This is intentional - no modules or packages. + +### State Tracking +- The script maintains a `.imapdown_state.json` file in each email account's download folder +- Tracks the highest UID (unique identifier) downloaded per IMAP folder +- Format: `{"INBOX": 19334, "INBOX.Archive": 1770, "Sent": 892}` +- Enables efficient incremental downloads (default mode) + +### Download Flow +1. Parse arguments +2. Connect to IMAP server (SSL, STARTTLS, or plain) +3. List all folders and decode modified UTF-7 folder names +4. For each folder: + - Load last downloaded UID from state file (if incremental mode) + - Search for new messages (UID > last_uid) + - Download each message as RFC822 + - Save as `.eml` file with naming: `{UID}_{date}_{subject}.eml` + - Extract attachments into `.zip` file (same base name) + - Update state with highest UID +5. Save state file + +### Key Implementation Details + +**Modified UTF-7 Decoding**: IMAP folder names use modified UTF-7 encoding (see `decode_modified_utf7()` at line 39). This is not standard base64 - it uses `,` instead of `/` and has special `&` handling. + +**Filename Sanitization**: Two-stage process: +- `sanitize_filename()`: Removes invalid filesystem characters, max 50 chars for subjects +- `sanitize_folder_path()`: Converts IMAP folder separators (`.` or `/`) to OS path separators + +**UID-Based Incremental Updates**: Uses IMAP UIDs (not sequence numbers) because UIDs are persistent. The search `UID {last_uid + 1}:*` fetches only new messages. Some servers return the highest UID even when searching for higher UIDs, so there's additional filtering at line 251. + +**Full Mode Safety**: `--full` mode checks if the download folder already contains `.eml` files and refuses to run (line 325). This prevents accidental duplicates. Users must delete the folder first. + +**Attachment Handling**: +- Walks message parts looking for `Content-Disposition: attachment` or `inline` +- Handles duplicate attachment filenames by appending `_{counter}` +- All attachments for one email go into a single `.zip` file + +## Output Structure + +``` +{store_dir}/ # default: ./download +└── {email_address}/ # sanitized email address + ├── .imapdown_state.json + ├── INBOX/ + │ ├── 123_20240115_Meeting_notes.eml + │ └── 124_20240116_Report.zip + └── Sent/ + └── 456_20240114_RE_Question.eml +``` + +## Testing + +No formal test suite exists. Manual testing approach: +- Use `--limit 10` to download a small batch for verification +- Test SSL vs STARTTLS connections +- Test incremental mode by running twice +- Verify `.eml` files open correctly in email clients +- Check that folders with special characters (non-ASCII) are handled correctly diff --git a/README.md b/README.md index 977c167..97d6566 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,9 @@ By default, the script only downloads new emails since the last run (incremental # Custom port ./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --port 12993 + +# Custom storage directory +./imapdown.py --server imap.example.com --email me@example.com --user me@example.com --password "secret" --ssl --store /path/to/backup ``` ### Full Download @@ -82,11 +85,14 @@ Limit the number of emails downloaded: | `--port` | No | Custom port (overrides defaults) | | `--limit` | No | Maximum number of emails to download | | `--full` | No | Download all emails (default: only new since last run) | +| `--store` | No | Directory to store downloaded emails (default: ./download) | Note: `--ssl` and `--starttls` are mutually exclusive. ## Output Structure +The default output structure (when `--store` is not specified): + ``` ./download/ ├── user@example.com/ diff --git a/imapdown.py b/imapdown.py index c7a33fd..9482dfb 100755 --- a/imapdown.py +++ b/imapdown.py @@ -32,6 +32,7 @@ def parse_args(): parser.add_argument("--port", type=int, help="Custom port (default: 993 for SSL, 143 otherwise)") parser.add_argument("--limit", type=int, help="Limit number of emails to download (for debugging)") parser.add_argument("--full", action="store_true", help="Download all emails (default: only new emails since last run)") + parser.add_argument("--store", type=str, help="Directory to store downloaded emails (default: ./download)") return parser.parse_args() @@ -313,7 +314,10 @@ def main(): args = parse_args() email_folder = sanitize_filename(args.email, max_length=100) - base_dir = os.path.join(os.getcwd(), 'download', email_folder) + if args.store: + base_dir = os.path.join(args.store, email_folder) + else: + base_dir = os.path.join(os.getcwd(), 'download', email_folder) os.makedirs(base_dir, exist_ok=True) if args.full: