init
This commit is contained in:
commit
a034dea430
4
Makefile
Normal file
4
Makefile
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
init:
|
||||||
|
pip install --user beautifulsoup4
|
||||||
|
chmod +x OSFS_download.sh
|
||||||
|
chmod +x OSFS_parser.py
|
16
OSFS_download.sh
Executable file
16
OSFS_download.sh
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
URL="https://www.opensocietyfoundations.org/grants/past?page="
|
||||||
|
if [ -n "$1" ]; then
|
||||||
|
DUMPFILE="$1"
|
||||||
|
else
|
||||||
|
DUMPFILE=dump.log
|
||||||
|
fi
|
||||||
|
echo '' > "$DUMPFILE"
|
||||||
|
|
||||||
|
I='0'
|
||||||
|
while true; do
|
||||||
|
echo "${URL}${I}:"
|
||||||
|
curl "${URL}${I}" | tee --append $DUMPFILE | grep '<li class="m-grantsDatabase__item">' &> /dev/null || break
|
||||||
|
I=$(expr $I + 1)
|
||||||
|
done
|
48
OSFS_parser.py
Executable file
48
OSFS_parser.py
Executable file
@ -0,0 +1,48 @@
|
|||||||
|
#!/bin/python3
|
||||||
|
|
||||||
|
from sys import argv
|
||||||
|
from contextlib import suppress
|
||||||
|
import json
|
||||||
|
import bs4
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
print("Incorrect invokation. Usage:")
|
||||||
|
print("\ŧ" + argv[0] + " [FILE]")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if len(argv) == 1:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
with open(argv[1]) as f:
|
||||||
|
b = bs4.BeautifulSoup(f.read(), 'html.parser')
|
||||||
|
entries = b.find_all(class_="m-grantsDatabase__item")
|
||||||
|
|
||||||
|
class donation:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def assignAttempt(o, attr, cmd):
|
||||||
|
buf = ""
|
||||||
|
with suppress(AttributeError): buf = eval(cmd)
|
||||||
|
setattr(o, attr, buf)
|
||||||
|
|
||||||
|
donations = []
|
||||||
|
for i in entries:
|
||||||
|
d = donation()
|
||||||
|
d.to = i.find(class_="a-grantsDatabase__title").get_text()
|
||||||
|
d.date = i.find(class_="a-grantsDatabase__cell--1").get_text()
|
||||||
|
d.amount = i.find(class_="a-grantsDatabase__cell--2").get_text()
|
||||||
|
d.desc = i.find(class_="a-grantsDatabase__cell--6").find(class_="a-grantsDatabase__text").get_text()
|
||||||
|
assignAttempt(d, 'theme', 'i.find(string="Theme").parent.next_sibling.next_sibling.get_text()')
|
||||||
|
assignAttempt(d, 'ref_prog', 'i.find(string="Referring Program").parent.next_sibling.next_sibling.get_text()')
|
||||||
|
assignAttempt(d, 'term', 'i.find(string="Term").parent.next_sibling.next_sibling.get_text()')
|
||||||
|
assignAttempt(d, 'region', 'i.find(string="Region").parent.next_sibling.next_sibling.get_text()')
|
||||||
|
assignAttempt(d, 'funder', 'i.find(string="Funder").parent.next_sibling.next_sibling.get_text()')
|
||||||
|
donations.append(d)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for i in donations:
|
||||||
|
for h in vars(i):
|
||||||
|
exec('i.{0} = i.{0}.strip(" \\t\\n")'.format(h))
|
||||||
|
|
||||||
|
print(json.dumps([i.__dict__ for i in donations]))
|
7
main.sh
Executable file
7
main.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
DUMPFILE="dump.log"
|
||||||
|
|
||||||
|
./OSFS_download.sh "$DUMPFILE"
|
||||||
|
#./OSFS_parser.py "test.txt"
|
||||||
|
./OSFS_parser.py "$DUMPFILE"
|
Loading…
x
Reference in New Issue
Block a user