This commit is contained in:
anon 2023-03-10 14:08:43 +01:00
commit a034dea430
6 changed files with 8425 additions and 0 deletions

4
Makefile Normal file
View File

@ -0,0 +1,4 @@
init:
pip install --user beautifulsoup4
chmod +x OSFS_download.sh
chmod +x OSFS_parser.py

16
OSFS_download.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/bash
URL="https://www.opensocietyfoundations.org/grants/past?page="
if [ -n "$1" ]; then
DUMPFILE="$1"
else
DUMPFILE=dump.log
fi
echo '' > "$DUMPFILE"
I='0'
while true; do
echo "${URL}${I}:"
curl "${URL}${I}" | tee --append $DUMPFILE | grep '<li class="m-grantsDatabase__item">' &> /dev/null || break
I=$(expr $I + 1)
done

48
OSFS_parser.py Executable file
View File

@ -0,0 +1,48 @@
#!/bin/python3
from sys import argv
from contextlib import suppress
import json
import bs4
def usage():
print("Incorrect invokation. Usage:")
print("\ŧ" + argv[0] + " [FILE]")
exit(1)
if len(argv) == 1:
usage()
with open(argv[1]) as f:
b = bs4.BeautifulSoup(f.read(), 'html.parser')
entries = b.find_all(class_="m-grantsDatabase__item")
class donation:
pass
def assignAttempt(o, attr, cmd):
buf = ""
with suppress(AttributeError): buf = eval(cmd)
setattr(o, attr, buf)
donations = []
for i in entries:
d = donation()
d.to = i.find(class_="a-grantsDatabase__title").get_text()
d.date = i.find(class_="a-grantsDatabase__cell--1").get_text()
d.amount = i.find(class_="a-grantsDatabase__cell--2").get_text()
d.desc = i.find(class_="a-grantsDatabase__cell--6").find(class_="a-grantsDatabase__text").get_text()
assignAttempt(d, 'theme', 'i.find(string="Theme").parent.next_sibling.next_sibling.get_text()')
assignAttempt(d, 'ref_prog', 'i.find(string="Referring Program").parent.next_sibling.next_sibling.get_text()')
assignAttempt(d, 'term', 'i.find(string="Term").parent.next_sibling.next_sibling.get_text()')
assignAttempt(d, 'region', 'i.find(string="Region").parent.next_sibling.next_sibling.get_text()')
assignAttempt(d, 'funder', 'i.find(string="Funder").parent.next_sibling.next_sibling.get_text()')
donations.append(d)
for i in donations:
for h in vars(i):
exec('i.{0} = i.{0}.strip(" \\t\\n")'.format(h))
print(json.dumps([i.__dict__ for i in donations]))

2
dump.log Normal file
View File

@ -0,0 +1,2 @@
error code: 1015

7
main.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
DUMPFILE="dump.log"
./OSFS_download.sh "$DUMPFILE"
#./OSFS_parser.py "test.txt"
./OSFS_parser.py "$DUMPFILE"

8348
test.txt Normal file

File diff suppressed because it is too large Load Diff