Added 'Python/asciifarts_scrapper.py'
This commit is contained in:
33
Python/asciifarts_scrapper.py
Normal file
33
Python/asciifarts_scrapper.py
Normal file
@ -0,0 +1,33 @@
|
||||
import requests
|
||||
import bs4
|
||||
import re
|
||||
|
||||
base_url = "http://www.asciiartfarts.com/"
|
||||
hub_url = "http://www.asciiartfarts.com/number.html"
|
||||
out_dir = "out/"
|
||||
|
||||
hub = bs4.BeautifulSoup(requests.get(hub_url).text, 'html.parser')
|
||||
|
||||
href_list = set()
|
||||
|
||||
for a in hub.find('ul').find('li').find_all('a'):
|
||||
try: href = a['href']
|
||||
except: continue
|
||||
if re.match(r'\d+.html', href):
|
||||
href_list.add(href)
|
||||
|
||||
#print(href_list)
|
||||
|
||||
for h in href_list:
|
||||
page = bs4.BeautifulSoup(requests.get(base_url + h).text, 'html.parser')
|
||||
name = (page
|
||||
.find('h1')
|
||||
.text.split(':')[1]
|
||||
.strip()
|
||||
.replace(' ', '_')
|
||||
.replace('/', '_')
|
||||
+ '.ascii'
|
||||
)
|
||||
ascii_art = page.find_all('pre')[1].text
|
||||
print(name)
|
||||
with open(out_dir + name, 'w') as f: f.write(ascii_art)
|
Reference in New Issue
Block a user