')
text = soup.find("div", attrs={"class":"articleBody"})
elif soup.find("div", attrs={"class":"post"}):
print('Text import from
')
text = soup.find("div", attrs={"class":"post"})
elif soup.find("div", attrs={"class":"post-content"}):
print('Text import from
')
text = soup.find("div", attrs={"class":"post-content"})
elif soup.find("div", attrs={"class":"article-content"}):
print('Text import from
')
text = soup.find("div", attrs={"class":"article-content"})
elif soup.find("div", attrs={"class":"story-content"}):
print('Text import from
')
text = soup.find("div", attrs={"class":"story-content"})
elif soup.find("div", attrs={"class":"content"}):
print('Text import from
')
text = soup.find("div", attrs={"class":"content"})
elif soup.find("article"):
print('Text import from from ')
text = soup.find("article")
elif soup.find("div", attrs={"id":"page"}):
print('Text import from
')
text = soup.find("div", attrs={"id":"page"})
else:
text = soup("body")[0]
strip_tags = True
if paragraphs == True:
for t in text('img'):
t['style'] = "max-width:600px;max-height:600px;"
try:
t['src'] = urljoin(url, t['src'])
except:
pass
for t in text("div"):
del(t['class'])
del(t['style'])
for t in text("iframe"):
del(t['height'])
del(t['width'])
t['style'] = "max-width:600px;max-height:600px;margin:0em auto;display:block;"
if strip_tags == True:
lines = (line.strip() for line in text.get_text().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '
'+'
'.join(chunk for chunk in chunks if chunk) + '' else: lines = (line.strip() for line in text.prettify().splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) output = '\n'.join(chunk for chunk in chunks if chunk) else: lines = (line.strip() for line in text.get_text().splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) output = '\n'.join(chunk for chunk in chunks if chunk) return output def kill_list(): kill_list = [] kill_list.append(["div", {"id": "comments"}]) kill_list.append(["div", {"class": "video"}]) kill_list.append(["div", {"class": "m-linkset"}]) kill_list.append(["div", {"class": "m-feature__intro"}]) kill_list.append(["div", {"class": "m-share-buttons"}]) kill_list.append(["p", {"class": "m-entry__byline"}]) kill_list.append(["div", {"class": "social"}]) kill_list.append(["div", {"id": "follow-bar"}]) kill_list.append(["section", {"class": "m-rail-component"}]) return kill_list