import xml.etree.ElementTree as ET from email.utils import parsedate_to_datetime from datetime import datetime import requests import json from urllib.parse import urlparse import os def parseXML(xmlfile): tree = ET.parse(xmlfile) root = tree.getroot() episodes = [] for item in root.findall('./channel/item'): episode = {} for child in item: if child.tag == 'enclosure': episode['download_url'] = child.attrib['url'] if child.tag == 'pubDate': date = child.text episode['date'] = parsedate_to_datetime(date).isoformat() if child.tag == "title": episode['title'] = child.text if child.tag == "description": episode['summary'] = child.text.rstrip("Learn more about your ad choices. Visit podcastchoices.com/adchoices") episodes.append(episode) return episodes def save_json(episodes): json_store = {} json_store["episodes"] = episodes json_data = json.dumps(json_store) with open("episodes.json", mode="wb") as file: file.write(json_data.encode("UTF-8")) def download_episodes(episodes): downloaded = [] for episode in episodes: download = {} filename = os.path.basename(urlparse(episode["download_url"]).path) download["date"] = episode["date"] download["title"] = episode["title"] download["summary"] = episode["summary"] download["filename"] = filename print(download["date"], download["filename"]) # response = requests.get(episode["download_url"]) # with open("media/"+download["filename"], mode="wb") as file: # file.write(response.content) downloaded.append(download) return downloaded def main(): to_download = parseXML('chompers.rss') # for episode in to_download: # print(episode['date'], ": ", episode['download_url']) downloaded_episodes = download_episodes(to_download) # for episode in downloaded_episodes: # print(episode['date'], ": ", episode['filename']) save_json(downloaded_episodes) if __name__ == "__main__": main()