chompers-rewind/download.py

82 lines
2.2 KiB
Python

import xml.etree.ElementTree as ET
from email.utils import parsedate_to_datetime
from datetime import datetime
import requests
import json
from urllib.parse import urlparse
import os
def parseXML(xmlfile):
tree = ET.parse(xmlfile)
root = tree.getroot()
episodes = []
for item in root.findall('./channel/item'):
episode = {}
for child in item:
if child.tag == 'enclosure':
episode['download_url'] = child.attrib['url']
if child.tag == 'pubDate':
date = child.text
episode['date'] = parsedate_to_datetime(date).isoformat() + "-08:00"
if child.tag == "title":
episode['title'] = child.text
if child.tag == "description":
episode['summary'] = child.text.rstrip("Learn more about your ad choices. Visit podcastchoices.com/adchoices")
episodes.append(episode)
return episodes
def save_json(episodes):
json_store = {}
json_store["episodes"] = episodes
json_data = json.dumps(json_store)
with open("episodes.json", mode="wb") as file:
file.write(json_data.encode("UTF-8"))
def download_episodes(episodes):
downloaded = []
for episode in episodes:
download = {}
filename = os.path.basename(urlparse(episode["download_url"]).path)
download["date"] = episode["date"]
download["title"] = episode["title"]
download["summary"] = episode["summary"]
download["filename"] = filename
print(download["date"], download["filename"])
# response = requests.get(episode["download_url"])
# with open("media/"+download["filename"], mode="wb") as file:
# file.write(response.content)
downloaded.append(download)
return downloaded
def main():
to_download = parseXML('chompers.rss')
# for episode in to_download:
# print(episode['date'], ": ", episode['download_url'])
downloaded_episodes = download_episodes(to_download)
# for episode in downloaded_episodes:
# print(episode['date'], ": ", episode['filename'])
save_json(downloaded_episodes)
if __name__ == "__main__":
main()