82 lines
2.2 KiB
Python
82 lines
2.2 KiB
Python
import xml.etree.ElementTree as ET
|
|
from email.utils import parsedate_to_datetime
|
|
from datetime import datetime
|
|
import requests
|
|
import json
|
|
from urllib.parse import urlparse
|
|
import os
|
|
|
|
|
|
def parseXML(xmlfile):
|
|
tree = ET.parse(xmlfile)
|
|
|
|
root = tree.getroot()
|
|
|
|
episodes = []
|
|
|
|
for item in root.findall('./channel/item'):
|
|
episode = {}
|
|
|
|
for child in item:
|
|
if child.tag == 'enclosure':
|
|
episode['download_url'] = child.attrib['url']
|
|
if child.tag == 'pubDate':
|
|
date = child.text
|
|
episode['date'] = parsedate_to_datetime(date).isoformat() + "-08:00"
|
|
if child.tag == "title":
|
|
episode['title'] = child.text
|
|
if child.tag == "description":
|
|
episode['summary'] = child.text.rstrip("Learn more about your ad choices. Visit podcastchoices.com/adchoices")
|
|
|
|
episodes.append(episode)
|
|
|
|
return episodes
|
|
|
|
def save_json(episodes):
|
|
json_store = {}
|
|
json_store["episodes"] = episodes
|
|
json_data = json.dumps(json_store)
|
|
|
|
with open("episodes.json", mode="wb") as file:
|
|
file.write(json_data.encode("UTF-8"))
|
|
|
|
|
|
def download_episodes(episodes):
|
|
|
|
downloaded = []
|
|
|
|
for episode in episodes:
|
|
download = {}
|
|
filename = os.path.basename(urlparse(episode["download_url"]).path)
|
|
download["date"] = episode["date"]
|
|
download["title"] = episode["title"]
|
|
download["summary"] = episode["summary"]
|
|
download["filename"] = filename
|
|
print(download["date"], download["filename"])
|
|
|
|
# response = requests.get(episode["download_url"])
|
|
|
|
# with open("media/"+download["filename"], mode="wb") as file:
|
|
# file.write(response.content)
|
|
|
|
downloaded.append(download)
|
|
|
|
return downloaded
|
|
|
|
def main():
|
|
to_download = parseXML('chompers.rss')
|
|
|
|
# for episode in to_download:
|
|
# print(episode['date'], ": ", episode['download_url'])
|
|
|
|
downloaded_episodes = download_episodes(to_download)
|
|
|
|
# for episode in downloaded_episodes:
|
|
# print(episode['date'], ": ", episode['filename'])
|
|
|
|
save_json(downloaded_episodes)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|