34 lines
738 B
Python
34 lines
738 B
Python
import requests
|
|
import bs4
|
|
import re
|
|
|
|
base_url = "http://www.asciiartfarts.com/"
|
|
hub_url = "http://www.asciiartfarts.com/number.html"
|
|
out_dir = "out/"
|
|
|
|
hub = bs4.BeautifulSoup(requests.get(hub_url).text, 'html.parser')
|
|
|
|
href_list = set()
|
|
|
|
for a in hub.find('ul').find('li').find_all('a'):
|
|
try: href = a['href']
|
|
except: continue
|
|
if re.match(r'\d+.html', href):
|
|
href_list.add(href)
|
|
|
|
#print(href_list)
|
|
|
|
for h in href_list:
|
|
page = bs4.BeautifulSoup(requests.get(base_url + h).text, 'html.parser')
|
|
name = (page
|
|
.find('h1')
|
|
.text.split(':')[1]
|
|
.strip()
|
|
.replace(' ', '_')
|
|
.replace('/', '_')
|
|
+ '.ascii'
|
|
)
|
|
ascii_art = page.find_all('pre')[1].text
|
|
print(name)
|
|
with open(out_dir + name, 'w') as f: f.write(ascii_art)
|