Added 'Misc./girl_vs_pig_scrapper.py'
This commit is contained in:
parent
85b1cf6e02
commit
185a2fec14
37
Misc./girl_vs_pig_scrapper.py
Executable file
37
Misc./girl_vs_pig_scrapper.py
Executable file
@ -0,0 +1,37 @@
|
||||
#!/bin/python3
|
||||
from os import mkdir
|
||||
from os.path import basename
|
||||
import requests
|
||||
import re
|
||||
from bs4 import *
|
||||
|
||||
base_url = "https://girlvspig.com"
|
||||
index_url = base_url + "/archive"
|
||||
strip_url = "/images/her{n}{letter}.gif"
|
||||
out_dir = "out/"
|
||||
|
||||
def get_comic_list():
|
||||
r = []
|
||||
index = BeautifulSoup(requests.get(index_url).text, 'html.parser')
|
||||
for i in index.find('article').find_all('li'):
|
||||
i = i.find('a')
|
||||
name = i.text.replace(' ', '_')
|
||||
n = re.search(r'\d+', name).group()
|
||||
r.append({'name' : name, 'n' : n})
|
||||
return r
|
||||
|
||||
def download_page(c):
|
||||
page_dir = out_dir + c['name']
|
||||
|
||||
try: mkdir(page_dir)
|
||||
except: pass
|
||||
|
||||
for l in [i for i in ['a', 'b', 'c', 'd']]:
|
||||
full_strip_url = base_url + strip_url.format(n=c['n'], letter=l)
|
||||
print(full_strip_url)
|
||||
response = requests.get(full_strip_url)
|
||||
with open(page_dir + "/" + basename(full_strip_url), 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
|
||||
for c in get_comic_list(): download_page(c)
|
Loading…
x
Reference in New Issue
Block a user