init

2024-01-25 18:37:18 +01:00
commit ec30c7d15d
52 changed files with 1908 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
+**/__pycache__/
+scrapper/venv
+scrapper/service/scrapper.lock
+vichan_archive.tar.gz
+vichan_archive_data.tar.gz
+db/data.sqlite
+db/data.sqlite.bak
+db/files/*
+db/files.bak
--- a/52
+++ b/52
@ -0,0 +1,52 @@
+pass:
+	@echo pass
+
+# ----
+include config.mk
+
+VHOSTS_D := $(APACHE_CONFIG_DIR)/vhosts.d/
+SHELL    := /bin/bash
+
+clone:
+	tar -I 'gzip --best' -c db/data.sqlite db/files/ -f front_end/vichan_archive_data.tar.gz
+	git archive --output=front_end/vichan_archive.tar.gz master
+
+
+init_db:
+	-mv db/data.sqlite db/data.sqlite.bak
+	-rm -r db/files.bak
+	-mv db/files db/files.bak
+	-mkdir db/files; touch db/files/.placeholder
+	cd db/; cat init.sql | sqlite3
+
+
+init_python:
+	cd scrapper/; \
+	python -m venv venv; \
+	source venv/bin/activate; \
+	pip install -r requirements.txt
+
+
+init: init_db init_python
+
+
+server:
+	-mkdir $(VHOSTS_D)
+	m4 srv/archive.apache2.vhost.conf.m4 > $(VHOSTS_D)/archive.conf
+
+service:
+	cd scrapper/service/; \
+	m4 cron.m4 > /etc/cron.d/fc_scrapper
+
+scrap:
+	cd scrapper/; \
+	./run.sh
+
+repair:
+	cd scrapper/; \
+	./run.sh -r
+
+restore:
+	-rm db/data.sqlite
+	-cp db/data.sqlite.bak db/data.sqlite
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,60 @@
+# Vichan Scrapper
+> scrapper for archiving data from vichan instances with a minimalistic frontend included to ease local viewing
+
+### Demo
+![frontend\_index\_demo](docs/demo1.png)
+
+![frontend\_board\_demo](docs/demo2.png)
+
+### Disclaimer
+The scrapper worked pretty well for the specific instance it was built around / tested on,
+however it's possible that it will fail on different version (which is unknown)
+and or configuration.
+
+### Requirements
+ Python3
+ Sqlite3
+ Apache2 or PHP (see Installation/Server)
+
+### Installation
+1. Meet the requirements
+#### Base
+```sh
+$ make init		# initialize databse and python environment
+$ make clone	# create tarballs for distribution
+```
+##### Server
+###### For yourself
+ If you don't have to have the front end to be publicly facing, its easiest to use the built in server of the PHP interpreter.
+1. Navigate to the front end's directory
+```sh
+$ cd <vichan_scrapper>/front_end/
+```
+2. Deploy the PHP server
+```sh
+$ php -S localhost:8000
+```
+3. Access it through your browser on the address:
+localhost:8000
+###### In production
+1. Run apache
+2. Add configs
+```sh
+$ make server
+```
+##### Schedule scrapper
+```sh
+$ make service
+```
+ the default is to scrapp every hour, and attempt to get missing files every 3.5 hours
+ for personilazation see scrapper/service/cron.m4
+
+### Configuration
+#### Scrapper
+#### Front end
+ front\_end/config.php
+	- posts\_per\_page : int
+	- search\_enabled : boolean
+
+#### Recommendations
+ use XFS
--- a/config.mk
+++ b/config.mk
@ -0,0 +1 @@
+APACHE_CONFIG_DIR := /etc/apache2/vhosts.d/
--- a/db/init.sql
+++ b/db/init.sql
@ -0,0 +1,50 @@
+.open data.sqlite
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `boards`
+--
+
+DROP TABLE IF EXISTS boards;
+CREATE TABLE boards (name VARCHAR(10) PRIMARY KEY,
+						desc VARCHAR(24)
+					);
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `posts`
+--
+
+DROP TABLE IF EXISTS posts;
+CREATE TABLE posts (
+   id INTEGER,
+   board INTEGER,
+   thread INT(11) DEFAULT NULL,
+   subject VARCHAR(100) DEFAULT NULL,
+   email VARCHAR(30) DEFAULT NULL,
+   name VARCHAR(35) DEFAULT NULL,
+   trip VARCHAR(15) DEFAULT NULL,
+   capcode VARCHAR(50) DEFAULT NULL,
+   body text,
+   time VARCHAR(30),
+   num_files INT(11) DEFAULT 0, -- Used for integrity checks, NOT redundant
+   --`filehash` text CHARACTER SET ascii
+   PRIMARY KEY (id, board)
+);
+
+-- --------------------------------------------------------
+
+--
+-- Table structure for table `files`
+--
+ 
+DROP TABLE IF EXISTS files;
+CREATE TABLE files (
+   id INTEGER PRIMARY KEY,
+   name TEXT,
+   post INTEGER NOT NULL,
+   board INTEGER NOT NULL,
+   path text
+);
--- a/docs/Retrowave
+++ b/docs/Retrowave
--- a/docs/demo1.png
+++ b/docs/demo1.png
--- a/docs/demo2.png
+++ b/docs/demo2.png
--- a/front_end/404.php
+++ b/front_end/404.php
@ -0,0 +1,27 @@
+<!DOCTYPE html>
+	<html>
+		<head>
+			<title>Example Archive - 404</title>
+			<link rel="stylesheet" href="global.css">
+			<meta charset="utf-8">
+			<style>
+				#_404_container {
+					display: flex;
+					justify-content: center;
+				}
+				#_404_container img {
+					width: 60%;
+				}
+			</style>
+		</head>
+		<body>
+			<div id=body_main>
+				<div id=index_header>
+					<p>Vichan Archive - 404</p>
+				</div>
+				<div id=_404_container>
+					<img id="404" src="media/apu404.png" alt="apu404"></img>
+				</div>
+			</div>
+		</body>
+	</html>
--- a/front_end/android-chrome-192x192.png
+++ b/front_end/android-chrome-192x192.png
--- a/front_end/android-chrome-512x512.png
+++ b/front_end/android-chrome-512x512.png
--- a/front_end/apple-touch-icon.png
+++ b/front_end/apple-touch-icon.png
--- a/front_end/board.php
+++ b/front_end/board.php
@ -0,0 +1,110 @@
+<!DOCTYPE html>
+	<html>
+		<?php
+			require_once('global.php');
+			require_once('config.php');
+
+			# Query validation
+			if(!isset($_GET['page'])){
+				$page = 1;
+			}else{
+				$page = intval($_GET['page']);
+			}
+			if(validate_board_name($_GET['board'])){
+				$board = $_GET['board'];
+			}else{
+				header('Location: /404.php');
+				die();
+			}
+		?>
+		<head>
+			<title>Examplechan - Archive /<?=$board?>/</title>
+			<link rel="stylesheet" href="global.css">
+			<meta charset="utf-8">
+			<style>
+				span {
+					color: #06df20;
+				}
+
+				.thread {
+					box-sizing: border-box;
+					padding: 20px;
+				}
+				.thread:hover {
+					background: teal;
+					cursor: pointer;
+				}
+				.thread img {
+					width: 200px;
+				}
+				.file {
+					max-height: 400px;
+					overflow-y: hidden;
+				}
+				.page_list {
+					text-align: center;
+					font-size: 2rem;
+					color: yellow;
+					font-weight: bold;
+				}
+					.page_list a:link {
+						color: lime;
+						font-weight: normal;
+					}
+					.page_list a:visited {
+						color: lightgreen;
+					}
+			</style>
+		</head>
+		<body>
+			<div id=body_main>
+				<div id=index_header>
+					<p>Examplechan Archive - /<?=$board?>/</p>
+					<a href="/">
+						<img id=plant src="media/plant.png" alt="fc_logo"></img>
+					</a>
+				</div>
+				<div class=page_list>
+					[
+					<?php
+						$post_count = $db->querySingle('SELECT COUNT(*) count FROM posts WHERE board = \'/' . $_GET['board'] . '/\' and thread is NULL;');
+						$page_count = ceil($post_count / $config['posts_per_page']);
+						for($i = 0; $i < $page_count; $i++):
+					?>
+						<a href="/board.php?board=<?=$board?>&page=<?=$i+1?>"><?=$i+1?></a>
+					<?php
+						endfor;
+					?>
+					]
+				</div>
+				<hr>
+				<hr>
+				<?php
+					$query = 'SELECT * FROM posts WHERE ' .
+									'board = \'/' . $board . '/\' ' .
+										'AND ' .
+									'thread IS NULL ' .
+									'ORDER BY id DESC ' .
+									'LIMIT ' . $config['posts_per_page'] . ' ' .
+									'OFFSET ' . ($config['posts_per_page']*($page-1)) . ';';
+					$results = $db->query($query);
+
+					while($row = $results->fetchArray()):
+				?>
+					<div class="thread" onclick="window.location='/post.php?board=<?=$board?>&post=<?=$row['id']?>';">
+						<div>
+							<?=print_post_head($row)?>
+							<div class='files'>
+								<?=print_files($row['id'], $row['board'])?>
+							</div>
+							<div class='post_body'>
+								<?=$row['body']?>
+							</div>
+						</div>
+					</div>
+					<hr>
+				<?php endwhile; ?>
+				<script id=page_list_duplicator type="text/javascript" src="js/duplicate_page_list.js"></script>
+			</div>
+		</body>
+	</html>
--- a/front_end/color_hash.php
+++ b/front_end/color_hash.php
@ -0,0 +1,31 @@
+<?php
+	define('COLORS', [
+				["#ff0000", "white"], /* Red */
+				["#ffa500", "black"], /* Orange */
+				["#ffff00", "black"], /* Yellow */
+				["#00ff00", "black"], /* Lime */
+				["#008000", "white"], /* Green */
+				["#00ffff", "black"], /* Aquamarine */
+				["#00bfff", "white"], /* Cyan */
+				["#0000ff", "white"], /* Blue */
+				["#4b0082", "white"], /* Indigo */
+				["#ffc0cb", "black"], /* Pink */
+				["#ff00ff", "black"], /* Magenta */
+				["#ff7f50", "black"], /* Coral */
+				["#fa8072", "white"], /* Salmon */
+				["#ff6347", "white"], /* Tomato */
+				["#ffd700", "black"], /* Gold */
+				["#f0e68c", "black"], /* Khaki */
+				["#d2b48c", "white"], /* Tan */
+				["#d2691e", "white"], /* Chocolate */
+				["#a0522d", "white"], /* Sienna */
+				["#800000", "white"], /* Maroon */
+				["#808080", "white"], /* Gray */
+				["#000000", "white"], /* Black */
+				["#ffffff", "black"] /* White */
+	]);
+
+	function ids2color($id){
+			return $id == 'ONION' ? ["#800080", "white"] /* Purple */ : COLORS[intval(crc32($id)) % 23];
+	}
+?>
--- a/front_end/config.php
+++ b/front_end/config.php
@ -0,0 +1,4 @@
+<?php
+	$config['posts_per_page'] = 10;
+	$config['search_enabled'] = false;
+?>
--- a/front_end/data.sqlite
+++ b/front_end/data.sqlite
@ -0,0 +1 @@
+../scrapper/data.sqlite
--- a/front_end/downloads.php
+++ b/front_end/downloads.php
@ -0,0 +1,65 @@
+<!DOCTYPE html>
+	<html>
+		<?php
+			require_once('config.php');
+		?>
+		<head>
+			<title>ViChan - Archive</title>
+			<link rel="stylesheet" href="global.css">
+			<meta charset="utf-8">
+			<style>
+				button {
+					float: left;
+					height: 100%;
+					width: 100%;
+					color: green;
+					font-weight: bold;
+					font-size: 2.4rem;
+				}
+
+				a {
+					display: inline-block;
+					height: 100%;
+					width: 100%;
+				}
+
+				.bdiv {
+					height: 100px;
+					width: 300px;
+				}
+
+				#mid {
+					display: flex;
+					justify-content: space-evenly;
+					padding-top: 40px;
+				}
+			</style>
+		</head>
+		<body>
+			<div id=body_main>
+				<div id=index_header>
+					<p>Vichan Archive - Memetic core</p>
+				</div>
+				<hr>
+				<hr>
+				<div id=mid>
+					<div class=bdiv>
+						<a href="vichan_archive_data.tar.gz" download>
+							<button class=flashy_button>
+								Database + Files
+							</button>
+						</a>
+					</div>
+					<div class=bdiv>
+						<a href="vichan_archive.tar.gz" download>
+							<button class=flashy_button>
+								Scrapper + Front end
+							</button>
+						</a>
+					</div>
+				</div>
+			</div>
+			<script>
+			</script>
+		</body>
+	</html>
--- a/front_end/favicon-16x16.png
+++ b/front_end/favicon-16x16.png
--- a/front_end/favicon-32x32.png
+++ b/front_end/favicon-32x32.png
--- a/front_end/favicon.ico
+++ b/front_end/favicon.ico
--- a/front_end/files
+++ b/front_end/files
@ -0,0 +1 @@
+../db/files
--- a/front_end/global.css
+++ b/front_end/global.css
@ -0,0 +1,74 @@
+:root {
+	--std-border: solid green 5px;
+}
+
+body {
+	margin: 0;
+	background-image: url("media/background.jpg");
+	background-repeat: no-repeat;
+	background-size: cover;
+	background-attachment: fixed;
+	color: teal;
+}
+
+button {
+	cursor: pointer;
+}
+
+hr {
+	color: rgba(0, 204, 0, 1);
+}
+
+	#body_main {
+		margin: auto;
+		margin-top: 70px;
+		width: 70%;
+		border: var(--std-border);
+		padding: 10px 30px 50px 30px;
+		background: rgba(0, 0, 0, 0.5);
+	}
+	#index_header {
+		text-align: center;
+	}
+	#index_header p {
+		font-size: 2rem;
+		color: lime;
+	}
+	.file {
+		vertical-align: top;
+		display: inline-block;
+	}
+	.post_head {
+		color: lime;
+	}
+	.post_body {
+		color: white;
+		display: inline-block;
+		vertical-align: top;
+		margin-top: 10px;
+	}
+		.subject {
+			font-weight: bold;
+			color: #39ff14;
+		}
+		.name {
+			color: #39ff14;
+		}
+		.poster_id {
+			font-weight: bold;
+			border: 3px;
+			border-radius: 5px;
+			padding: 2px 4px 1px 4px;
+		}
+	.quote {
+		color: #55d02e;
+	}
+	#plant {
+		height: 256px;
+	}
+
+	.flashy_button {
+		background: rgba(45, 226, 230, 1);
+		border: rgba(45, 226, 230, 1);
+		border-radius: 10px;
+	}
--- a/front_end/global.php
+++ b/front_end/global.php
@ -0,0 +1,60 @@
+<?php
+	require_once('color_hash.php');
+
+	$db = new SQLite3('data.sqlite', SQLITE3_OPEN_READONLY);
+
+	function print_post_head($p){
+		$c = ids2color($p['capcode']);
+		?>
+			<div class="post_head">
+				<span class='subject'><?=$p['subject']?></span>
+				<span class='name'><?=$p['name']?></span>
+				<a class='post_no'>No. <?=$p['id']?></a>
+				<span class='poster_id' style="background: <?=$c[0]?>; color: <?=$c[1]?>">ID <?=strtoupper($p['capcode'])?></span>
+				<span class='date'><?=$p['time']?></span>
+			</div>
+		<?php
+	}
+
+	function print_file($f){
+		$mt = mime_content_type($f['path']);
+		?>
+			<div class='file'>
+				<div><?=$f['name']?></div>
+		<?php
+			if(strpos($mt, 'image/') === 0):
+		?>
+				<img src='<?=$f['path']?>'></img>
+		<?php
+			elseif(strpos($mt, 'video/') === 0):
+		?>
+				<video src='<?=$f['path']?>'></video>
+		<?php
+			endif;
+		?>
+			</div>
+		<?php
+	}
+
+	function print_files($no, $board){
+		global $db;
+		$query = 'SELECT * FROM files WHERE post = ' . $no . ' AND board = \'' . $board . '\' ORDER BY id;';
+		$files = $db->query($query);
+		if($files){
+			while($f = $files->fetchArray()){
+				print_file($f);
+			}
+		}
+	}
+
+	function validate_board_name($s){
+		global $db;
+		$result = $db->query('SELECT name FROM boards;');
+		$boards = array();
+		while($row = $result->fetchArray()){
+			array_push($boards, $row['name']);
+		}
+
+		return in_array('/'.$s.'/', $boards);
+	}
+?>
--- a/front_end/index.php
+++ b/front_end/index.php
@ -0,0 +1,98 @@
+<!DOCTYPE html>
+	<html>
+		<?php
+			require_once('global.php');
+		?>
+		<head>
+			<title>ExampleChan - Archive</title>
+			<link rel="stylesheet" href="global.css">
+			<meta charset="utf-8">
+			<style>
+				#menu {
+					display: flex;
+					justify-content: space-between;
+					gap: 0.5%;
+				}
+					#menu * {
+						width: 100%;
+						height: 30px;
+						box-sizing: border-box
+					}
+				/* ------- */
+				table {
+					color: white;
+					border: var(--std-border);
+					width: 100%;
+					border-collapse: collapse;
+				}
+					.hr {
+						background: rgba(0, 128, 128, 0.5);
+					}
+					.hr:hover {
+						background: forestgreen;
+						cursor: pointer;
+						/*font-weight: bold;*/
+					}
+					th, td {
+						text-align: left;
+					}
+					th {
+						color: lime;
+						border: solid green 1px;
+						border-collapse: collapse;
+					}
+					td {
+						padding-left: 1%;
+					}
+
+			</style>
+		</head>
+		<body>
+			<div id=body_main>
+				<div id=index_header>
+					<p>Examplechan Archive</p>
+					<img id="plant" src="media/plant.png" alt="fc_logo"></img>
+				</div>
+				<div id=menu>
+					<a href="/downloads.php">
+						<button class=flashy_button>
+							Get a copy
+						</button>
+					</a>
+					<a href="/search.php">
+						<button class=flashy_button>
+							Advanced search
+						</button>
+					</a>
+				</div>
+				<hr>
+				<table id=board_list>
+					<thead>
+						<tr class="hr">
+							<th>Board</th>
+							<th>Threads</th>
+							<th>Files</th>
+							<th>Posts</th>
+						</tr>
+					</thead>
+					<tbody>
+						<?php
+							$results = $db->query('SELECT * FROM boards;');
+
+							while($row = $results->fetchArray()):
+						?>
+							<tr class="hr" onclick="window.location='<?='/board.php?board='.trim($row['name'], '/')?>';">
+								<td><?=$row['name']?> - <?=$row['desc']?></td>
+								<td><?=$db->querySingle('SELECT COUNT(*) count FROM posts WHERE board = \'' . $row['name'] . '\' AND thread IS NULL;')?></td>
+								<td><?=$db->querySingle('SELECT COUNT(*) FROM posts INNER JOIN files ON posts.id = files.post and posts.board = files.board WHERE posts.board = \'' . $row['name'] . '\';')?></td>
+								<td><?=$db->querySingle('SELECT COUNT(*) count FROM posts WHERE board = \'' . $row['name'] . '\';')?></td>
+							</tr>
+						<?php endwhile; ?>
+					</tbody>
+				</table>
+			</div>
+			<script>
+			</script>
+		</body>
+	</html>
+
--- a/front_end/js/duplicate_page_list.js
+++ b/front_end/js/duplicate_page_list.js
@ -0,0 +1,2 @@
+var pl2 = document.getElementsByClassName('page_list')[0].cloneNode(true);
+document.getElementById('page_list_duplicator').replaceWith(pl2);
--- a/front_end/js/jquery.min.js
+++ b/front_end/js/jquery.min.js
--- a/front_end/js/post-hover.js
+++ b/front_end/js/post-hover.js
@ -0,0 +1,170 @@
+/*
+ * post-hover.js
+ * https://github.com/savetheinternet/Tinyboard/blob/master/js/post-hover.js
+ *
+ * Released under the MIT license
+ * Copyright (c) 2012 Michael Save <savetheinternet@tinyboard.org>
+ * Copyright (c) 2013-2014 Marcin Łabanowski <marcin@6irc.net>
+ * Copyright (c) 2013 Macil Tech <maciltech@gmail.com>
+ *
+ * Usage:
+ *   $config['additional_javascript'][] = 'js/jquery.min.js';
+ *   $config['additional_javascript'][] = 'js/post-hover.js';
+ *
+ */
+
+onready(function(){
+	var dont_fetch_again = [];
+	init_hover = function() {
+		var $link = $(this);
+		
+		var id;
+		var matches;
+
+                if ($link.is('[data-thread]')) {
+                        id = $link.attr('data-thread');
+                }
+		else if(matches = $link.text().match(/^>>(?:>\/([^\/]+)\/)?(\d+)$/)) {
+			id = matches[2];
+		}
+		else {
+			return;
+		}
+		
+		var board = $(this);
+		while (board.data('board') === undefined) {
+			board = board.parent();
+		}
+		var threadid;
+		if ($link.is('[data-thread]')) threadid = 0;
+		else threadid = board.attr('id').replace("thread_", "");
+
+		board = board.data('board');
+
+		var parentboard = board;
+		
+		if ($link.is('[data-thread]')) parentboard = $('form[name="post"] input[name="board"]').val();
+		else if (matches[1] !== undefined) board = matches[1];
+
+		var $post = false;
+		var hovering = false;
+		var hovered_at;
+		$link.hover(function(e) {
+			hovering = true;
+			hovered_at = {'x': e.pageX, 'y': e.pageY};
+			
+			var start_hover = function($link) {
+				if ($post.is(':visible') &&
+						$post.offset().top >= $(window).scrollTop() &&
+						$post.offset().top + $post.height() <= $(window).scrollTop() + $(window).height()) {
+					// post is in view
+					$post.addClass('highlighted');
+				} else {
+					var $newPost = $post.clone();
+					$newPost.find('>.reply, >br').remove();
+					$newPost.find('span.mentioned').remove();
+					$newPost.find('a.post_anchor').remove();
+
+					$newPost
+						.attr('id', 'post-hover-' + id)
+						.attr('data-board', board)
+						.addClass('post-hover')
+						.css('border-style', 'solid')
+						.css('box-shadow', '1px 1px 1px #999')
+						.css('display', 'block')
+						.css('position', 'absolute')
+						.css('font-style', 'normal')
+						.css('z-index', '100')
+						.addClass('reply').addClass('post')
+						.insertAfter($link.parent())
+
+					$link.trigger('mousemove');
+				}
+			};
+			
+			$post = $('[data-board="' + board + '"] div.post#reply_' + id + ', [data-board="' + board + '"]div#thread_' + id);
+			if($post.length > 0) {
+				start_hover($(this));
+			} else {
+				var url = $link.attr('href').replace(/#.*$/, '');
+				
+				if($.inArray(url, dont_fetch_again) != -1) {
+					return;
+				}
+				dont_fetch_again.push(url);
+				
+				$.ajax({
+					url: url,
+					context: document.body,
+					success: function(data) {
+						var mythreadid = $(data).find('div[id^="thread_"]').attr('id').replace("thread_", "");
+
+						if (mythreadid == threadid && parentboard == board) {
+							$(data).find('div.post.reply').each(function() {
+								if($('[data-board="' + board + '"] #' + $(this).attr('id')).length == 0) {
+									$('[data-board="' + board + '"]#thread_' + threadid + " .post.reply:first").before($(this).hide().addClass('hidden'));
+								}
+							});
+						}
+						else if ($('[data-board="' + board + '"]#thread_'+mythreadid).length > 0) {
+							$(data).find('div.post.reply').each(function() {
+								if($('[data-board="' + board + '"] #' + $(this).attr('id')).length == 0) {
+									$('[data-board="' + board + '"]#thread_' + mythreadid + " .post.reply:first").before($(this).hide().addClass('hidden'));
+								}
+							});
+						}
+						else {
+							$(data).find('div[id^="thread_"]').hide().attr('data-cached', 'yes').prependTo('form[name="postcontrols"]');
+						}
+
+						$post = $('[data-board="' + board + '"] div.post#reply_' + id + ', [data-board="' + board + '"]div#thread_' + id);
+
+						if(hovering && $post.length > 0) {
+							start_hover($link);
+						}
+					}
+				});
+			}
+		}, function() {
+			hovering = false;
+			if(!$post)
+				return;
+			
+			$post.removeClass('highlighted');
+			if($post.hasClass('hidden') || $post.data('cached') == 'yes')
+				$post.css('display', 'none');
+			$('.post-hover').remove();
+		}).mousemove(function(e) {
+			if(!$post)
+				return;
+			
+			var $hover = $('#post-hover-' + id + '[data-board="' + board + '"]');
+			if($hover.length == 0)
+				return;
+
+			var scrollTop = $(window).scrollTop();
+			if ($link.is("[data-thread]")) scrollTop = 0;
+			var epy = e.pageY;
+			if ($link.is("[data-thread]")) epy -= $(window).scrollTop();			
+
+			var top = (epy ? epy : hovered_at['y']) - 10;
+			
+			if(epy < scrollTop + 15) {
+				top = scrollTop;
+			} else if(epy > scrollTop + $(window).height() - $hover.height() - 15) {
+				top = scrollTop + $(window).height() - $hover.height() - 15;
+			}
+			
+			
+			$hover.css('left', (e.pageX ? e.pageX : hovered_at['x'])).css('top', top);
+		});
+	};
+	
+	$('div.body a:not([rel="nofollow"])').each(init_hover);
+	
+	// allow to work with auto-reload.js, etc.
+	$(document).on('new_post', function(e, post) {
+		$(post).find('div.body a:not([rel="nofollow"])').each(init_hover);
+	});
+});
+
--- a/front_end/js/show-backlinks.js
+++ b/front_end/js/show-backlinks.js
@ -0,0 +1,61 @@
+/*
+ * show-backlinks.js
+ * https://github.com/savetheinternet/Tinyboard/blob/master/js/show-backlinks.js
+ *
+ * Released under the MIT license
+ * Copyright (c) 2012 Michael Save <savetheinternet@tinyboard.org>
+ * Copyright (c) 2013-2014 Marcin Łabanowski <marcin@6irc.net> 
+ *
+ * Usage:
+ *   $config['additional_javascript'][] = 'js/jquery.min.js';
+ *   // $config['additional_javascript'][] = 'js/post-hover'; (optional; must come first)
+ *   $config['additional_javascript'][] = 'js/show-backlinks.js';
+ *
+ */
+
+$(document).ready(function(){
+	var showBackLinks = function() {
+		var reply_id = $(this).attr('id').replace(/(^reply_)|(^op_)/, '');
+		
+		$(this).find('div.body a:not([rel="nofollow"])').each(function() {
+			var id, post, $mentioned;
+		
+			if(id = $(this).text().match(/^>>(\d+)$/))
+				id = id[1];
+			else
+				return;
+		
+			$post = $('#reply_' + id);
+			if($post.length == 0){
+				$post = $('#op_' + id);
+				if($post.length == 0)
+					return;
+			}
+		
+			$mentioned = $post.find('p.intro span.mentioned');
+			if($mentioned.length == 0)
+				$mentioned = $('<span class="mentioned unimportant"></span>').appendTo($post.find('p.intro'));
+			
+			if ($mentioned.find('a.mentioned-' + reply_id).length != 0)
+				return;
+			
+			var $link = $('<a class="mentioned-' + reply_id + '" onclick="highlightReply(\'' + reply_id + '\');" href="#' + reply_id + '">&gt;&gt;' +
+				reply_id + '</a>');
+			$link.appendTo($mentioned)
+			
+			if (window.init_hover) {
+				$link.each(init_hover);
+			}
+		});
+	};
+	
+	$('div.post.reply').each(showBackLinks);
+	$('div.post.op').each(showBackLinks);
+
+        $(document).on('new_post', function(e, post) {
+		showBackLinks.call(post);
+		if ($(post).hasClass("op")) {
+			$(post).find('div.post.reply').each(showBackLinks);
+		}
+	});
+});
--- a/front_end/js/show-op.js
+++ b/front_end/js/show-op.js
@ -0,0 +1,40 @@
+/*
+ * show-op
+ * https://github.com/savetheinternet/Tinyboard/blob/master/js/show-op.js
+ *
+ * Adds "(OP)" to >>X links when the OP is quoted.
+ *
+ * Released under the MIT license
+ * Copyright (c) 2012 Michael Save <savetheinternet@tinyboard.org>
+ * Copyright (c) 2014 Marcin Łabanowski <marcin@6irc.net>
+ *
+ * Usage:
+ *   $config['additional_javascript'][] = 'js/jquery.min.js';
+ *   $config['additional_javascript'][] = 'js/show-op.js';
+ *
+ */
+
+$(document).ready(function(){
+	let OP = parseInt($('.op .post_no').text().replace(/^\D+/g, ""))
+	if(isNaN(OP)){ return; }
+
+	var showOPLinks = function() {
+		$(this).find('div.post_body a:not([rel="nofollow"])').each(function() {
+			var postID;
+			
+			if(postID = $(this).text().match(/^>>(\d+)$/))
+				postID = postID[1];
+			else
+				return;
+			
+			if (postID == OP) {
+				$(this).after(' <small>(OP)</small>');
+			}
+		});
+	};
+	
+	$('div.post.reply').each(showOPLinks);
+});
+
+
+
--- a/front_end/media/apu404.png
+++ b/front_end/media/apu404.png
--- a/front_end/media/background.jpg
+++ b/front_end/media/background.jpg
--- a/front_end/media/frogenkopf-neon.png
+++ b/front_end/media/frogenkopf-neon.png
--- a/front_end/media/plant.png
+++ b/front_end/media/plant.png
--- a/front_end/playground.php
+++ b/front_end/playground.php
@ -0,0 +1,11 @@
+<?php
+	require_once('global.php');
+	require_once('config.php');
+	$query = 'SELECT COUNT(*) count FROM posts 
+				WHERE 
+					board = \'/' . $_GET['board'] . '/\' 
+						AND 
+					thread is NULL; create table fuck_you (i int);';
+	echo $query . '</br>';
+	echo $db->querySingle($query) . '</br>';
+?>
--- a/front_end/post.php
+++ b/front_end/post.php
@ -0,0 +1,72 @@
+<!DOCTYPE html>
+	<html>
+		<?php
+			require_once('global.php');
+
+			# Query validation
+			if(validate_board_name($_GET['board'])){
+				$board = '/'.$_GET['board'].'/';
+			}else{
+				header('Location: /404.php');
+				die();
+			}
+		?>
+		<head>
+			<title>Examplechan - Archive /<?=$board?>/<?=$_GET['post']?></title>
+			<link rel="stylesheet" href="global.css">
+			<meta charset="utf-8">
+			<script type="text/javascript" src="js/jquery.min.js"></script>
+			<script type="text/javascript" src="js/show-op.js"></script>
+			<style>
+				#body_main {
+					margin-bottom: 200px;
+				}
+				.post img {
+					width: 100%;
+				}
+			</style>
+		</head>
+		<body>
+			<div id=body_main>
+				<div id=index_header>
+					<p>Examplechan Archive - /<?=$board?>/</p>
+					<p>Thread No. <?=$_GET['post']?><p>
+					<a href="/">
+						<img id=plant src="media/plant.png" alt="fc_logo"></img>
+					</a>
+				</div>
+				<hr>
+				<hr>
+				<!-- ###### -->
+				<div class="op post">
+					<?php
+						$query = 'SELECT * FROM posts WHERE id = ' . $_GET['post'] . ' AND board = \'' . $board . '\';';
+						$thread = $db->query($query)->fetchArray();
+					?>
+					<div class='files'>
+						<?=print_files($thread['id'], $thread['board'])?>
+					</div>
+					<?=print_post_head($thread)?>
+					<div class='post_body'>
+						<?=$thread['body']?>
+					</div>
+				</div>
+				<!-- ###### -->
+				<?php
+					$posts = $db->query('SELECT * FROM posts WHERE thread = ' . $thread['id'] . ' AND board = \'' . $board  . '\';');
+					while($p = $posts->fetchArray()):
+				?>
+				<hr>
+					<div class="reply post">
+						<?=print_post_head($p)?>
+						<div class='files'>
+							<?=print_files($p['id'], $p['board'])?>
+						</div>
+						<div class='post_body'>
+							<?=$p['body']?>
+						</div>
+					</div>
+				<?php endwhile; ?>
+			</div>
+		</body>
+	</html>
--- a/front_end/search.php
+++ b/front_end/search.php
@ -0,0 +1,34 @@
+<!DOCTYPE html>
+	<html>
+		<?php
+			require_once('config.php');
+		?>
+		<head>
+			<title>Examplechan - Archive</title>
+			<link rel="stylesheet" href="global.css">
+			<meta charset="utf-8">
+			<style>
+			</style>
+		</head>
+		<body>
+			<div id=body_main>
+				<div id=index_header>
+					<p>Examplechan Archive - Advanced Search</p>
+					<div>
+						<?php
+							if(!$config['search_enabled']){
+								echo "<h3>Advanced search was disabled on this instance due to securitiy reasons. It recommended you get a local copy and search that way.</h3>";
+								die();
+							}
+						?>
+					</div>
+				</div>
+				<div id=search_box>
+				</div>
+				<div id=result_box>
+				</div>
+			</div>
+			<script>
+			</script>
+		</body>
+	</html>
--- a/front_end/site.webmanifest
+++ b/front_end/site.webmanifest
@ -0,0 +1 @@
+{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
--- a/scrapper/antiRange.py
+++ b/scrapper/antiRange.py
@ -0,0 +1,17 @@
+# Constantly querying wheter we already have a post is expensive,
+#  so is storing every post number in memory.
+# Since we know that the posts we have is going to be mostly continuous (in production atleast)
+#  we can store only the border values and the missing values in their range.
+class AntiRange:
+	def __init__(self, range_ : list):
+		if range_ == []:
+			import sys
+			self.min_ = sys.maxsize
+			self.max_ = 0
+			self.not_ = []
+			return
+		self.min_ = min(range_)
+		self.max_ = max(range_)
+		self.not_ = list(set(range(self.min_, self.max_)) - set(range_))
+
+anti_ranges = {}
--- a/scrapper/config.py
+++ b/scrapper/config.py
@ -0,0 +1,37 @@
+from bs4 import BeautifulSoup
+
+# Logical function to determine wheter a thread should be archived.
+#  On return:
+#    True  - do archive
+#    False - do not archive
+#  All threads are filtered through this function unless '-a' is specified
+def is_thread_allegeable(p : BeautifulSoup):
+	return True
+
+# Range of pages to designate for scrapping.
+#  Ignored if '-a' is specified.
+min_page = 1
+max_page = 10000 # over shooting this value does not cause overhead
+if min_page > max_page:
+	raise Exception('Invalid page range [{0};{1}].'.format(min_page, max_page))
+
+# List of boards to archive.
+#  Overriden by '-b'.
+#  Empty means 'all'.
+boards = []
+
+# Seconds to wait before giving up on each request
+request_time_out = 5
+
+# Domain to scrap from
+base_url = 'https://examplechan.org'
+
+# URL marking the 404 page
+#  Dobiously it does not return a 404 response code, therefor the url must be tested.
+_404_url = base_url + '/404.html'
+
+# Maximum number of threads to create.
+#  Should be 2-4 times the number of available CPU cores.
+#  To determine the perfect value experimenting is recommended.
+#  Go with <cores>*2 if you're clueless.
+max_threads = 4
--- a/scrapper/data.sqlite
+++ b/scrapper/data.sqlite
@ -0,0 +1 @@
+../db/data.sqlite
--- a/scrapper/db.py
+++ b/scrapper/db.py
@ -0,0 +1,183 @@
+import sqlite3
+import bisect
+import multiprocessing
+import random
+import time
+#
+import config
+from antiRange import AntiRange, anti_ranges
+
+# --- Tricks i could still implement to make this faster ---
+#		> connection pool
+#		> pragma journal_mode = WAL;
+#		> pragma synchronous = normal;
+
+CONNECT_TO = "data.sqlite"
+
+connection_pool = []
+connection_pool_lock = multiprocessing.Lock()
+connection_produced = multiprocessing.Event()
+
+def connections_init():
+	for i in range(config.max_threads):
+		connection = sqlite3.Connection(CONNECT_TO, check_same_thread=False)
+		connection_pool.append(connection)
+
+
+class Board:
+	def __init__(self, n, d):
+		self.name = n
+		self.description = d
+
+class Post:
+	def __init__(self, no, poster, date, text,
+					poster_id = None,
+					num_files = 0,
+					subject = None,
+					board = None,
+					thread = None
+				):
+		if board == None and thread == None:
+			raise Exception('Orphan post')
+		self.no = no
+		self.poster = poster
+		self.date = date
+		self.poster_id = poster_id
+		self.num_files = num_files
+		self.subject = subject
+		self.text = text
+		self.board = board
+		self.thread = thread
+
+class File:
+	def __init__(self, name, post, board, path):
+		self.name = name
+		self.post = post
+		self.board = board
+		self.path = path
+
+def corrupt_posts():
+	with sqlite3.Connection(CONNECT_TO) as con:
+		cursor = con.cursor()
+		cursor.execute(
+				'SELECT posts.board, posts.id, posts.thread, file_count.count, posts.num_files \
+					FROM \
+						posts \
+					INNER JOIN \
+							(SELECT post, board, count(*) AS count \
+								FROM \
+									files \
+								GROUP BY post) \
+						file_count ON \
+							posts.id = file_count.post \
+								AND \
+							posts.board = file_count.board \
+					WHERE \
+						(file_count.count is null and posts.num_files != 0) \
+							OR \
+						file_count.count < posts.num_files \
+					;'
+		)
+		return cursor.fetchall()
+
+def is_post_archieved(board: str, no : int):
+	ar = anti_ranges[board]
+	if no > ar.max_ or  no < ar.min_:
+		return False
+	pos = bisect.bisect_left(ar.not_, no)
+	if pos < len(ar.not_) and ar.not_[pos] == no:
+		return False
+	return True
+
+def insert_file(f : File, con : sqlite3.Connection):
+	query = "INSERT INTO files \
+					(name, post, board, path) \
+						VALUES \
+					('{0}', '{1}', '{2}', '{3}');".format(
+						f.name.replace("'", "''"),
+						f.post,
+						f.board,
+						f.path
+					)
+	while 1:
+		try:
+			con.execute(query)
+			con.commit()
+			print('\t\033[32mArchived file \033[34m\"{0}\"\033[32m.\033[0m'.format(f.name))
+			break
+		except sqlite3.OperationalError:
+			print('fuck, race condition', multiprocessing.current_process().pid)
+			time.sleep(random.uniform(0.1, 1.0))
+
+def insert_post(p : Post, board : str):
+	if p.thread == None:
+		var_col = 'subject'
+		var_val = p.subject.replace("'", "''")
+	else:
+		var_col = 'thread'
+		var_val = p.thread
+	query = "INSERT INTO posts \
+					( \
+						id, \
+						board, \
+						name, \
+						capcode, \
+						time, \
+						body, \
+						num_files, \
+						{var_col} \
+					) \
+						VALUES \
+					( \
+						'{id}', \
+						'{board}', \
+						'{name}', \
+						'{capcode}', \
+						'{date}', \
+						'{body}', \
+						{num_files}, \
+						'{var_val}' \
+					);".format(
+						id = p.no,
+						board = board,
+						name = p.poster.replace("'", "''"),
+						capcode = p.poster_id,
+						date = p.date,
+						body = p.text.replace("'", "''"),
+						num_files = p.num_files,
+						#
+						var_col = var_col,
+						var_val = var_val
+					)
+	try:
+		with sqlite3.Connection(CONNECT_TO) as con:
+			con.execute(query)
+			msg = ''.join(['\t\033[32mArchived post no. \033[34m', p.no, '\033[32m'])
+			if p.thread != None:
+				msg = ''.join([msg, ' (belonging to thread: ', '\033[34m', p.thread, '\033[32m)'])
+			msg = ''.join([msg, '.\033[0m'])
+			print(msg)
+	except sqlite3.IntegrityError:
+		pass
+
+def board2antirange(board : str):
+	with sqlite3.Connection(CONNECT_TO) as con:
+		query = "SELECT id FROM posts WHERE board = '{0}';".format(board)
+		r = con.execute(query)
+		return AntiRange([x[0] for x in r.fetchall()])
+	
+
+def insert_board(b : Board):
+	try:
+		with sqlite3.Connection(CONNECT_TO) as con:
+			con.execute("INSERT INTO boards (name, desc) \
+									VALUES \
+								('{0}', '{1}');".format(
+									b.name,
+									b.description
+								)
+						)
+	except sqlite3.IntegrityError:
+		pass
+
+
--- a/scrapper/files
+++ b/scrapper/files
@ -0,0 +1 @@
+../db/files
--- a/scrapper/main.py
+++ b/scrapper/main.py
@ -0,0 +1,89 @@
+#!/bin/python3
+import os
+import sys
+import fcntl
+import signal
+import multiprocessing
+from bs4 import BeautifulSoup
+#
+from antiRange import AntiRange, anti_ranges
+import scrap
+import db
+import opts
+import config
+
+#talom = {}
+lockf = None
+
+def handler(signum, frame):
+	print('\033[31mReceived SIGINT, exiting...\033[0m')
+	exit(1)
+
+def main(argv):
+	signal.signal(signal.SIGINT, handler)
+	# ---
+	opts.opts(argv)
+	# ---
+	db.connections_init()
+	# ---
+	if opts.is_service:
+		lockpath = 'service/scrapper.lock'
+		lockf = open(lockpath, 'r+')
+		while 1:
+			try:
+				fcntl.flock(lockf, fcntl.LOCK_EX | fcntl.LOCK_NB)
+				break
+			except OSError:
+				if opts.restart_service:
+					prev_inst_pid = int(lockf.read())
+					os.kill(prev_inst_pid, signal.SIGINT)
+					print('\033[31mPrevious instance (\033[34m', prev_inst_pid, '\033[31m) killed.\033[0m', sep='')
+					import time
+					time.sleep(1)
+				else:
+					print('\033[31mAnother instance is blocking execution. Quiting...\033[0m')
+					signal.raise_signal(signal.SIGINT)
+					# NOT REACHED
+		pid = os.getpid()
+		lockf.seek(0, 0)
+		lockf.truncate()
+		lockf.write(str(pid))
+		lockf.flush()
+	# ---
+	if opts.integrity_check:
+		corrupted = db.corrupt_posts()
+		print('\033[31mFound the following threads to be corrupted: \033[34m', str(corrupted), '\033[31m.\033[0m', sep='')
+		for c in corrupted:
+			board = c[0]
+			no = str(c[1])
+			op = str(c[2])
+			got = 0 if c[3] == None else str(c[3])
+			expected = c[4]
+			print('\033[33mRepairing: \033[34m', board, no, ' (', got, '/', expected, ')\033[33m.\033[0m', sep='')
+			scrap.repair_corrupted(board, op, no)
+		if opts.only_integrity_check:
+			return 0
+	# ---
+	if config.boards == []:
+		print('\033[33mScrapping board names... \033[0m', end='')
+		boards = scrap.get_boards_from_site()
+		if boards == None:
+			signal.raise_signal(signal.SIGINT)
+		print('\033[32mDone. Got:\033[0m', '\033[34m{0}\033[0m'.format(str([b.name for b in boards])))
+	else:
+		boards = config.boards
+	# ---
+	for b in boards:
+		print('\033[33mArchiving board: \033[34m\'{0}\'\033[0m'.format(b.name))
+		db.insert_board(b)
+		anti_ranges[b.name] = db.board2antirange(b.name)
+		scrap.archive_board(b.name)
+		print('\033[32mArchived board: \033[34m\'{0}\'\033[0m'.format(b.name))
+	# ---
+	print('\033[32mFinished.')
+			
+
+if __name__ != '__main__': 
+	exit(1)
+
+main(sys.argv)
--- a/scrapper/opts.py
+++ b/scrapper/opts.py
@ -0,0 +1,41 @@
+import getopt
+#
+import config
+import usage
+
+archive_all = False
+integrity_check = False
+only_integrity_check = False
+is_service = False
+restart_service = False
+
+def opts(argv : list):
+	global archive_all, integrity_check, only_integrity_check, is_service, restart_service
+	try:
+		opts = getopt.getopt(args = argv[1:], shortopts = 'ab:ish')[0]
+		for o in opts:
+			if o[0] == '-a':
+				archive_all = True
+				config.min_page = 1
+				config.max_page = 10000
+			elif o[0] == '-b':
+				exec('config.boards = ' + o[1])
+			elif o[0] == '-i':
+				if not integrity_check:
+					integrity_check = True
+				else:
+					only_integrity_check = True
+			elif o[0] == '-s':
+				if not is_service:
+					is_service = True
+				else:
+					restart_service = True
+			elif o[0] == '-h':
+				usage.print_usage(argv[0])
+				exit(0)
+			else:
+				raise getopt.GetoptError(msg = '', opt = o[0])
+	except getopt.GetoptError as e:
+		print("\033[31mUnrecognized command line option '{0}'.\033[0m".format(e.opt))
+		usage.print_usage(argv[0])
+		exit(1)
--- a/scrapper/requirements.txt
+++ b/scrapper/requirements.txt
@ -0,0 +1,2 @@
+requests
+bs4
--- a/scrapper/run.sh
+++ b/scrapper/run.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+
+set -e
+
+source venv/bin/activate
+
+echo -16 > /proc/$$/oom_adj
+echo -1000 > /proc/$$/oom_score_adj
+
+if [ "$1" == '-r' ]; then
+	python main.py -s -s -i -i
+else
+	python main.py -s -s
+fi
--- a/scrapper/scrap.py
+++ b/scrapper/scrap.py
@ -0,0 +1,239 @@
+import os
+import multiprocessing
+import hashlib
+import sqlite3
+import requests as req
+from bs4 import BeautifulSoup
+#
+from antiRange import AntiRange, anti_ranges
+import db
+import config
+import opts
+
+def try_get(url : str):
+	try:
+		return req.get(url, timeout = config.request_time_out)
+	except (req.exceptions.ConnectionError, req.exceptions.Timeout) as e:
+		print('\033[31mConnection error on {0}\033[0m'.format(url), vars(e))
+		return None
+
+def print_status_got(page : int, status : int):
+	print('\033[32mOn page {page}, got {color}\'{status}\'\033[32m.\033[0m'
+			.format(page = page,
+					color = '\033[32m' if status == 200 else '\033[33m',
+					status = status
+				)
+		)
+
+def get_threads_from_page(url : str):	
+	response = try_get(url)
+	if response == None:
+		return
+	threads = BeautifulSoup(
+					response.text,
+					'html.parser'
+				) \
+				.find_all(class_='thread')
+	return response, threads
+
+def get_boards_from_site():
+	r = try_get(config.base_url)
+	if r == None:
+		return
+	board_elements = BeautifulSoup(
+						r.text,
+						'html.parser'
+					) \
+					.find("select") \
+					.find_all("option")
+	boards = [db.Board(i['value'], i.text) for i in board_elements[2:]]
+	return boards
+
+def archive_op(bs : BeautifulSoup, board : str):
+	op = bs.find(class_='op')
+	no = op.find_all(class_='post_no')[1].text
+	if db.is_post_archieved(board, int(no)):
+		return no
+	subject = op.find(class_='subject')
+	subject = subject.text if subject != None else ''
+	t = db.Post(
+				no = no,
+				poster = op.find(class_='name').text,
+				poster_id = op.find(class_='poster_id').text,
+				date = op.find('time').text,
+				subject = subject,
+				text = op.find(class_='body').decode_contents(),
+				board = board,
+				num_files = len(op.find_all(class_='file'))
+			)
+	db.insert_post(t, board)
+	return no
+
+def archive_posts(op : str, bs : BeautifulSoup, board : str):
+	posts = bs.find_all(class_='reply')
+	posts.reverse()
+	for p in posts:
+		no = p.find_all(class_='post_no')[1].text
+		if db.is_post_archieved(board, int(no)):
+			return
+		post = db.Post(
+					no = no,
+					poster = p.find(class_='name').text,
+					poster_id = p.find(class_='poster_id').text,
+					date = p.find('time').text,
+					text = p.find(class_='body').decode_contents(),
+					thread = op,
+					num_files = len(p.find_all(class_='file'))
+				)
+		db.insert_post(post, board)
+
+def archive_file(board : str, post : str, fileinfo : BeautifulSoup, c : sqlite3.Connection, clutter = False):
+		name = fileinfo.find('span')\
+						.find('span').text
+		path = 'files/' + hashlib.blake2s(name.encode()).hexdigest()
+		if not clutter and os.path.isfile(path):
+			print('\t\33[33mFile \033[34m\'', path, '\'\033[33m already exists.\033[0m', sep='')
+			return
+		r = try_get(config.base_url + fileinfo.find('a').attrs['href'])
+		if r == None:
+			return
+		with open(path, 'wb') as f:
+			f.write(r.content)
+		f = db.File(
+				name,
+				post,
+				board,
+				path
+		)
+		db.insert_file(f, c)
+
+def archive_files(bs : BeautifulSoup, board : str):
+	multiprocessing.Event()
+	files = bs.find(class_='files')
+	for fileinfo in files.find_all(class_='fileinfo'):
+		archive_file(board,
+						bs.find(class_='thread').attrs['id'].split('_')[1],
+						fileinfo,
+						db.connection_pool[0]
+					)
+	thread_pool = []
+	for p in bs.find_all(class_='post')[1:]:
+		i = p.find_all(class_='fileinfo')
+		for fileinfo in i:
+			no = p.attrs['id'].split('_')[1]
+			con = None
+			while 1:
+				with db.connection_pool_lock:
+					if len(db.connection_pool) != 0:
+						con = db.connection_pool.pop(0)
+				if con == None:
+					db.connection_produced.wait()
+				else:
+					break
+			thread = multiprocessing.Process(target=archive_file, args=[board, no, fileinfo, con])
+			with db.connection_pool_lock:
+				db.connection_pool.append(con)
+			thread.daemon = True
+			thread_pool.append(thread)
+			thread.start()
+	for t in thread_pool:
+		t.join()
+
+def archive_thread(url : str, board : str):
+	print(''.join(['\033[33mScrapping: ', url, '.\033[0m']))
+	response = try_get(url)
+	if response == None:
+		return
+	if response.url == config._404_url:
+		print('\033[31mThread at ', url, ' 404d. It seems like it has been deleted in the meanwhile.\033[0m')
+		return
+	p = BeautifulSoup(
+						response.text,
+						'html.parser'
+					)
+	del response
+	if not opts.archive_all and not config.is_thread_allegeable(p):
+		return
+	op = archive_op(p, board)
+	archive_posts(op, p, board)
+	archive_files(p, board)
+
+def archive_threads(board_name : str, threads : list):
+	# the magic number '7' is len('thread_')
+	for t in threads:
+		archive_thread(
+						''.join([config.base_url, '/', board_name, '/res/', t.attrs['id'][7:], '.html']),
+						board_name
+					)
+
+
+def archive_board(board_name : str):
+	board_url = config.base_url + board_name
+	status = 0
+	for i in range(config.min_page, config.max_page):
+		if i == 1:
+			url = board_url + '/index.html'
+		else:
+			url = ''.join([board_url, '/', str(i), ".html"])
+		try:
+			response, threads = get_threads_from_page(url)
+		except TypeError:
+			continue
+		print_status_got(i, response.status_code)
+		if response.url == (config._404_url):
+			return
+		elif response.status_code != 200:	# add better error handling
+			#talom['board_url'] = ['board', 5]
+			continue
+		archive_threads(board_name, threads)
+
+
+def repair_corrupted(board : str, op : str, no : str):
+	response = try_get(''.join([config.base_url, '/', board, '/res/', op, '.html']))
+	if response == None:
+		return
+	thread = BeautifulSoup(
+			response.text,
+			'html.parser'
+		)
+	posts = thread.find_all(class_='post')
+	fileinfos = None
+	l = 0
+	h = len(posts)-1
+	while 1:
+		c = int((l + h) / 2)
+		n = posts[c].attrs['id'].split('_')[1]
+		if n == no:
+			fileinfos = posts[c].find_all(class_='fileinfo')
+			break
+		if h - l < 2:
+			hno = posts[h].attrs['id'].split('_')[1]
+			if hno == no:
+				fileinfos = posts[h].find_all(class_='fileinfo')
+			break
+		if n < no:
+			l = c
+		else:
+			h = c
+	if fileinfos == None:
+		print('\033[31mCould not fetch fileinfos for \033[34m(', board, ', ', no, ')\033[31m.\033[0m', sep='' )
+		return
+	thread_pool = []
+	for fi in fileinfos:
+		while 1:
+			with db.connection_pool_lock:
+				if len(db.connection_pool) != 0:
+					con = db.connection_pool.pop(0)
+			if con == None:
+				db.connection_produced.wait()
+			else:
+				break
+		thread = multiprocessing.Process(target=archive_file, args=[board, no, fi, con, True])
+		with db.connection_pool_lock:
+			db.connection_pool.append(con)
+		thread.daemon = True
+		thread_pool.append(thread)
+		thread.start()
+	for t in thread_pool:
+		t.join()
+	print('\033[32mRepaired: \033[34m', board, '/', no, '\033[32m.\033[0m', sep='')
--- a/scrapper/service/cron.m4
+++ b/scrapper/service/cron.m4
@ -0,0 +1,7 @@
+define(NL, `
+')dnl
+define(`PWD', translit(esyscmd(`pwd'), NL))dnl
+define(realpath, `translit(esyscmd(readlink -f $1), NL)')dnl
+define(`ROOT', realpath(PWD`/../../'))dnl
+0	*	* * *	root	make -C "ROOT" scrap
+30	*/3 * * *	root	make -C "ROOT" repair
--- a/scrapper/threadpool.py
+++ b/scrapper/threadpool.py
@ -0,0 +1,5 @@
+threadpool = []
+
+def init_threads():
+	for i in range(max_threads):
+		
--- a/scrapper/usage.py
+++ b/scrapper/usage.py
@ -0,0 +1,10 @@
+usage_msg = '''\033[1m{0} [options]\033[0m
+	-a        : scrap all; ignore all filters
+	-b <list> : provide a list of boards to archive
+	             the default is all that can be found
+	             <list> must be a valid python list of strings
+	-i        : perform integrity check; specify twice to do not carry on with regular scrapping
+	'''
+
+def print_usage(program_name = 'scrapper'):
+	print(usage_msg.format(program_name))
--- a/srv/archive.apache2.vhost.conf.m4
+++ b/srv/archive.apache2.vhost.conf.m4
@ -0,0 +1,11 @@
+define(`PWD', esyscmd(`pwd'))
+define(`PWD', substr(PWD, 0, eval(len(PWD) - 1)))
+
+include(PWD`/srv/config.m4')
+
+Listen PORT
+
+<VirtualHost *:PORT>
+	ServerName DOMAIN
+	DocumentRoot "PWD`/front_end/'"
+</VirtualHost>
--- a/srv/config.m4
+++ b/srv/config.m4
@ -0,0 +1,2 @@
+define(`DOMAIN', `my_archive.org')
+define(`PORT', `45872')
				`@ -0,0 +1 @@`
				`{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}`