init
This commit is contained in:
commit
a034dea430
4
Makefile
Normal file
4
Makefile
Normal file
@ -0,0 +1,4 @@
|
||||
init:
|
||||
pip install --user beautifulsoup4
|
||||
chmod +x OSFS_download.sh
|
||||
chmod +x OSFS_parser.py
|
16
OSFS_download.sh
Executable file
16
OSFS_download.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
URL="https://www.opensocietyfoundations.org/grants/past?page="
|
||||
if [ -n "$1" ]; then
|
||||
DUMPFILE="$1"
|
||||
else
|
||||
DUMPFILE=dump.log
|
||||
fi
|
||||
echo '' > "$DUMPFILE"
|
||||
|
||||
I='0'
|
||||
while true; do
|
||||
echo "${URL}${I}:"
|
||||
curl "${URL}${I}" | tee --append $DUMPFILE | grep '<li class="m-grantsDatabase__item">' &> /dev/null || break
|
||||
I=$(expr $I + 1)
|
||||
done
|
48
OSFS_parser.py
Executable file
48
OSFS_parser.py
Executable file
@ -0,0 +1,48 @@
|
||||
#!/bin/python3
|
||||
|
||||
from sys import argv
|
||||
from contextlib import suppress
|
||||
import json
|
||||
import bs4
|
||||
|
||||
def usage():
|
||||
print("Incorrect invokation. Usage:")
|
||||
print("\ŧ" + argv[0] + " [FILE]")
|
||||
exit(1)
|
||||
|
||||
if len(argv) == 1:
|
||||
usage()
|
||||
|
||||
with open(argv[1]) as f:
|
||||
b = bs4.BeautifulSoup(f.read(), 'html.parser')
|
||||
entries = b.find_all(class_="m-grantsDatabase__item")
|
||||
|
||||
class donation:
|
||||
pass
|
||||
|
||||
def assignAttempt(o, attr, cmd):
|
||||
buf = ""
|
||||
with suppress(AttributeError): buf = eval(cmd)
|
||||
setattr(o, attr, buf)
|
||||
|
||||
donations = []
|
||||
for i in entries:
|
||||
d = donation()
|
||||
d.to = i.find(class_="a-grantsDatabase__title").get_text()
|
||||
d.date = i.find(class_="a-grantsDatabase__cell--1").get_text()
|
||||
d.amount = i.find(class_="a-grantsDatabase__cell--2").get_text()
|
||||
d.desc = i.find(class_="a-grantsDatabase__cell--6").find(class_="a-grantsDatabase__text").get_text()
|
||||
assignAttempt(d, 'theme', 'i.find(string="Theme").parent.next_sibling.next_sibling.get_text()')
|
||||
assignAttempt(d, 'ref_prog', 'i.find(string="Referring Program").parent.next_sibling.next_sibling.get_text()')
|
||||
assignAttempt(d, 'term', 'i.find(string="Term").parent.next_sibling.next_sibling.get_text()')
|
||||
assignAttempt(d, 'region', 'i.find(string="Region").parent.next_sibling.next_sibling.get_text()')
|
||||
assignAttempt(d, 'funder', 'i.find(string="Funder").parent.next_sibling.next_sibling.get_text()')
|
||||
donations.append(d)
|
||||
|
||||
|
||||
|
||||
for i in donations:
|
||||
for h in vars(i):
|
||||
exec('i.{0} = i.{0}.strip(" \\t\\n")'.format(h))
|
||||
|
||||
print(json.dumps([i.__dict__ for i in donations]))
|
7
main.sh
Executable file
7
main.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
DUMPFILE="dump.log"
|
||||
|
||||
./OSFS_download.sh "$DUMPFILE"
|
||||
#./OSFS_parser.py "test.txt"
|
||||
./OSFS_parser.py "$DUMPFILE"
|
Loading…
x
Reference in New Issue
Block a user