#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
dedupe_feed.py — Remove duplicate <item>s in feed.xml based on <enclosure url>.
Usage:
    python dedupe_feed.py [--verbose]

Place this script in the SAME folder as feed.xml, e.g., D:\TTS Podcast\
"""

import os
import sys
import argparse
import xml.etree.ElementTree as ET

def main():
    parser = argparse.ArgumentParser(description="De-duplicate podcast feed items by enclosure URL.")
    parser.add_argument("--feed", default="feed.xml", help="Path to feed.xml (default: feed.xml in current dir)")
    parser.add_argument("--verbose", action="store_true", help="Print actions taken")
    args = parser.parse_args()

    feed_path = os.path.abspath(args.feed)
    if not os.path.exists(feed_path):
        print(f"[ERROR] feed.xml not found at: {feed_path}")
        sys.exit(1)

    # Parse
    try:
        tree = ET.parse(feed_path)
        root = tree.getroot()
        channel = root.find("channel")
        if channel is None:
            print("[ERROR] <channel> not found in feed.xml")
            sys.exit(1)
    except ET.ParseError as e:
        print("[ERROR] XML parse error:", e)
        sys.exit(1)

    items = list(channel.findall("item"))
    if args.verbose:
        print(f"[INFO] Found {len(items)} <item>(s)")

    seen_urls = set()
    unique_items = []
    for it in items:
        enc = it.find("enclosure")
        url = enc.get("url") if enc is not None else None
        if url and url not in seen_urls:
            seen_urls.add(url)
            unique_items.append(it)
        elif args.verbose:
            print("[INFO] Removing duplicate item with URL:", url)

    # Clear existing items
    for it in items:
        channel.remove(it)

    # Re-insert unique items (keep original order: newest-first assumed already)
    insert_index = 1  # right after the first child (usually <title>)
    for it in unique_items:
        channel.insert(insert_index, it)
        insert_index += 1

    tree.write(feed_path, encoding="utf-8", xml_declaration=True)
    if args.verbose:
        print(f"[OK] De-duplication complete. Kept {len(unique_items)} item(s).")

if __name__ == "__main__":
    main()
