Last updated: March 21, 2026
Expense reports are manual friction that remote workers deal with monthly. The typical flow — photograph receipts, open a spreadsheet, copy in amounts, export to PDF, attach to email — takes 30-60 minutes per report. Automating it with scripts reduces that to minutes.
This guide builds an expense automation system: a folder watcher that extracts amounts from receipt images using OCR, generates a categorized CSV, and produces a PDF report ready to submit.
What Gets Automated
- Drop receipt images into
~/receipts/incoming/ - OCR script extracts amount, vendor, and date from each image
- Categorize expenses (meals, software, hardware, travel)
- Generate monthly CSV and PDF report
- Email report to your finance contact or manager
Install Dependencies
# macOS
brew install tesseract imagemagick python3
# Ubuntu/Debian
sudo apt install tesseract-ocr imagemagick python3-pip -y
# Python packages
pip3 install pytesseract Pillow pandas fpdf2 python-dotenv
# Verify Tesseract
tesseract --version
Folder Structure
~/expenses/
├── receipts/
│ ├── incoming/ # drop new receipts here
│ └── processed/ # moved here after extraction
├── ledger.csv # master expense log
├── reports/ # generated PDFs
├── extract.py # OCR extraction script
├── report.py # PDF report generator
└── .env # SMTP credentials
Receipt Extraction Script
# extract.py
import os
import re
import shutil
import csv
from datetime import datetime
from pathlib import Path
import pytesseract
from PIL import Image
INCOMING = Path.home() / "expenses/receipts/incoming"
PROCESSED = Path.home() / "expenses/receipts/processed"
LEDGER = Path.home() / "expenses/ledger.csv"
CATEGORIES = {
"amazon": "software",
"aws": "software",
"github": "software",
"figma": "software",
"notion": "software",
"zoom": "software",
"slack": "software",
"uber eats": "meals",
"doordash": "meals",
"grubhub": "meals",
"starbucks": "meals",
"delta": "travel",
"united": "travel",
"american airlines": "travel",
"marriott": "travel",
"hilton": "travel",
"best buy": "hardware",
"b&h": "hardware",
"apple": "hardware",
"logitech": "hardware",
}
def extract_amount(text):
"""Extract dollar amount from OCR text."""
patterns = [
r"total[:\s]+\$?([\d,]+\.\d{2})",
r"amount[:\s]+\$?([\d,]+\.\d{2})",
r"\$\s*([\d,]+\.\d{2})",
r"([\d,]+\.\d{2})",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
amount = match.group(1).replace(",", "")
return float(amount)
return None
def extract_vendor(text):
"""Match known vendors from OCR text."""
text_lower = text.lower()
for vendor in CATEGORIES:
if vendor in text_lower:
return vendor.title(), CATEGORIES[vendor]
# Fallback: first non-empty line as vendor
lines = [l.strip() for l in text.split("\n") if l.strip()]
return lines[0] if lines else "Unknown", "other"
def extract_date(text):
"""Extract date from OCR text, default to today."""
patterns = [
r"(\d{1,2}/\d{1,2}/\d{2,4})",
r"(\d{4}-\d{2}-\d{2})",
r"(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d{1,2},?\s+\d{4}",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
return match.group(0)
return datetime.today().strftime("%Y-%m-%d")
def process_receipts():
INCOMING.mkdir(parents=True, exist_ok=True)
PROCESSED.mkdir(parents=True, exist_ok=True)
images = list(INCOMING.glob("*.{jpg,jpeg,png,pdf}"))
if not images:
images = [p for ext in ["jpg","jpeg","png","pdf"] for p in INCOMING.glob(f"*.{ext}")]
new_rows = []
for img_path in images:
print(f"Processing: {img_path.name}")
img = Image.open(img_path)
text = pytesseract.image_to_string(img)
amount = extract_amount(text)
vendor, category = extract_vendor(text)
date = extract_date(text)
if amount:
row = {
"date": date,
"vendor": vendor,
"amount": amount,
"category": category,
"notes": "",
"receipt": img_path.name,
}
new_rows.append(row)
print(f" → {vendor}: ${amount} ({category})")
else:
print(f" → Could not extract amount — check manually")
# Move to processed
shutil.move(str(img_path), str(PROCESSED / img_path.name))
if new_rows:
write_header = not LEDGER.exists()
with open(LEDGER, "a", newline="") as f:
writer = csv.DictWriter(f, fieldnames=new_rows[0].keys())
if write_header:
writer.writeheader()
writer.writerows(new_rows)
print(f"\nAdded {len(new_rows)} expenses to ledger.")
if __name__ == "__main__":
process_receipts()
PDF Report Generator
# report.py
import csv
import os
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from fpdf import FPDF
LEDGER = Path.home() / "expenses/ledger.csv"
REPORTS_DIR = Path.home() / "expenses/reports"
def generate_report(month: str = None):
"""month format: YYYY-MM, defaults to current month"""
if not month:
month = datetime.today().strftime("%Y-%m")
rows = []
with open(LEDGER, newline="") as f:
for row in csv.DictReader(f):
if row["date"].startswith(month):
rows.append(row)
if not rows:
print(f"No expenses found for {month}")
return
total = sum(float(r["amount"]) for r in rows)
by_category = defaultdict(float)
for r in rows:
by_category[r["category"]] += float(r["amount"])
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
pdf_path = REPORTS_DIR / f"expense-report-{month}.pdf"
pdf = FPDF()
pdf.add_page()
pdf.set_font("Helvetica", "B", 16)
pdf.cell(0, 10, f"Expense Report — {month}", ln=True)
pdf.set_font("Helvetica", "", 10)
pdf.cell(0, 6, f"Generated: {datetime.today().strftime('%Y-%m-%d')}", ln=True)
pdf.ln(4)
# Category summary
pdf.set_font("Helvetica", "B", 12)
pdf.cell(0, 8, "Summary by Category", ln=True)
pdf.set_font("Helvetica", "", 10)
for cat, amt in sorted(by_category.items()):
pdf.cell(60, 6, cat.title())
pdf.cell(0, 6, f"${amt:.2f}", ln=True)
pdf.set_font("Helvetica", "B", 10)
pdf.cell(60, 6, "TOTAL")
pdf.cell(0, 6, f"${total:.2f}", ln=True)
pdf.ln(6)
# Line items
pdf.set_font("Helvetica", "B", 12)
pdf.cell(0, 8, "Line Items", ln=True)
pdf.set_font("Helvetica", "B", 9)
pdf.cell(30, 6, "Date")
pdf.cell(70, 6, "Vendor")
pdf.cell(30, 6, "Category")
pdf.cell(0, 6, "Amount", ln=True)
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
pdf.set_font("Helvetica", "", 9)
for r in sorted(rows, key=lambda x: x["date"]):
pdf.cell(30, 6, r["date"])
pdf.cell(70, 6, r["vendor"][:30])
pdf.cell(30, 6, r["category"])
pdf.cell(0, 6, f"${float(r['amount']):.2f}", ln=True)
pdf.output(str(pdf_path))
print(f"Report saved: {pdf_path}")
return pdf_path
if __name__ == "__main__":
import sys
month = sys.argv[1] if len(sys.argv) > 1 else None
generate_report(month)
Email the Report
# .env
SMTP_HOST=smtp.gmail.com
SMTP_USER=you@gmail.com
SMTP_PASS=your-app-password
MANAGER_EMAIL=manager@company.com
YOUR_NAME=Your Name
# send-report.py
import smtplib
import os
import sys
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
from datetime import datetime
from dotenv import load_dotenv
from report import generate_report
load_dotenv(Path.home() / "expenses/.env")
def send_report(month=None):
if not month:
month = datetime.today().strftime("%Y-%m")
pdf_path = generate_report(month)
if not pdf_path:
return
msg = MIMEMultipart()
msg["From"] = os.getenv("SMTP_USER")
msg["To"] = os.getenv("MANAGER_EMAIL")
msg["Subject"] = f"Expense Report {month} — {os.getenv('YOUR_NAME')}"
msg.attach(MIMEText(f"Hi,\n\nAttached is my expense report for {month}.\n\nBest,\n{os.getenv('YOUR_NAME')}"))
with open(pdf_path, "rb") as f:
part = MIMEBase("application", "octet-stream")
part.set_payload(f.read())
encoders.encode_base64(part)
part.add_header("Content-Disposition", f'attachment; filename="{pdf_path.name}"')
msg.attach(part)
with smtplib.SMTP(os.getenv("SMTP_HOST"), 587) as server:
server.starttls()
server.login(os.getenv("SMTP_USER"), os.getenv("SMTP_PASS"))
server.sendmail(msg["From"], msg["To"], msg.msg.as_string())
print(f"Report sent to {os.getenv('MANAGER_EMAIL')}")
if __name__ == "__main__":
send_report(sys.argv[1] if len(sys.argv) > 1 else None)
Run the Full Workflow
# 1. Drop receipt images into ~/expenses/receipts/incoming/
# 2. Extract amounts from all new receipts
python3 ~/expenses/extract.py
# 3. Review and fix the ledger if OCR missed anything
cat ~/expenses/ledger.csv
# 4. Generate report for current month
python3 ~/expenses/report.py
# 5. Send report
python3 ~/expenses/send-report.py
# Or send for a specific month
python3 ~/expenses/send-report.py 2026-02
Add a Manual Entry
For receipts that OCR cannot parse (printed text too faint, scanned PDFs):
# Append manually
echo "2026-03-15,Ikea,89.99,hardware,Standing desk riser,desk-riser-receipt.jpg" \
>> ~/expenses/ledger.csv
Related Reading
- Best Expense Management Platform for Remote Teams with Receipt Tracking
- Best Accounting Software for Freelancers 2026
- Automate Invoice Generation for Freelancers
- How to Set Up Remote Finance Team Approval Workflow
Related Articles
- Best Expense Management Platform for Remote Teams with Recei
- Remote Team Handbook Section Template for Writing Expense Re
- Best SIP Phone Software for Remote Workers: A Technical
- How to Set Up Remote Finance Team Approval Workflow
- How to Create Automated Client Progress Report for Remote Built by theluckystrike — More at zovo.one
Frequently Asked Questions
Who is this article written for?
This article is written for developers, technical professionals, and power users who want practical guidance. Whether you are evaluating options or implementing a solution, the information here focuses on real-world applicability rather than theoretical overviews.
How current is the information in this article?
We update articles regularly to reflect the latest changes. However, tools and platforms evolve quickly. Always verify specific feature availability and pricing directly on the official website before making purchasing decisions.
Are there free alternatives available?
Free alternatives exist for most tool categories, though they typically come with limitations on features, usage volume, or support. Open-source options can fill some gaps if you are willing to handle setup and maintenance yourself. Evaluate whether the time savings from a paid tool justify the cost for your situation.
How do I get my team to adopt a new tool?
Start with a small pilot group of willing early adopters. Let them use it for 2-3 weeks, then gather their honest feedback. Address concerns before rolling out to the full team. Forced adoption without buy-in almost always fails.
What is the learning curve like?
Most tools discussed here can be used productively within a few hours. Mastering advanced features takes 1-2 weeks of regular use. Focus on the 20% of features that cover 80% of your needs first, then explore advanced capabilities as specific needs arise.