This commit is contained in:
Izalia Mae 2020-01-13 04:59:27 -05:00
parent 30f9057dcc
commit dada122e43
13 changed files with 114 additions and 16 deletions

View file

@ -1,12 +1,12 @@
# Hide Every Collection, Comrade! (HECC)
# Protection Against Web Scrapers (PAWS)
Web proxy for Mastodon that puts public profiles behind an auth layer.
## How it works
HECC sits between Mastodon and your front-facing web proxy to intercept incoming requests. If a profile, a toot, or any related json is requested, it will be blocked unless authenticated.
PAWS sits between Mastodon and your front-facing web proxy to intercept incoming requests. If a profile, toot, or any related json is requested, it will be blocked unless authenticated.
Note: Still very much a WIP. Currently it's just simple http auth, but I plan on adding the ability to auth via a local Mastodon account or oauth
Note: Still very much a WIP. Currently it's just simple http auth, but I plan on adding the ability to login via oauth
## Installation

View file

View file

@ -9,11 +9,11 @@ from .routes import main
if 'install' in sys.argv:
from .config import mastodir, logging
script = f'{mastodir}/hecc.sh'
script = f'{mastodir}/paws.sh'
start_script = f'''#!/bin/sh
export MASTODIR={mastodir}
(cd MASTODIR && python -m hecc)'''
(cd MASTODIR && python -m paws)'''
with open(script, 'w') as sh:
sh.write(start_script)

View file

@ -11,7 +11,7 @@ from .functions import bool_check
VERSION = '0.1'
mastodir = env.get('MASTODIR', os.getcwd())
stor_path = abspath(f'{mastodir}/hecc-data')
stor_path = abspath(f'{mastodir}/paws-data')
if not isdir(stor_path):

96
paws/database.py Normal file
View file

@ -0,0 +1,96 @@
import sys
from DBUtils.PooledPg import PooledPg as DB
from datetime import datetime
from tinydb import TinyDB, Query
from tinydb_smartcache import SmartCacheTable
from tinyrecord import transaction as trans
from tldextract import extract
from json.decoder import JSONDecodeError
from .config import stor_path, logging, MASTOCONFIG as mdb
def jsondb():
try:
db = TinyDB(f'{stor_path}/db.json', indent='\t')
except JSONDecodeError as e:
logging.critical(f'Failed to load DB: {e}. Exiting...')
sys.exit()
db.table_class = SmartCacheTable
tables = {
'bans': db.table('bans'),
'follows': db.table('follows'),
'users': db.table('users'),
'tokens': db.table('tokens')
}
return tables
def pgdb():
try:
if type(dbpass) == str:
return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser'], passwd=mdb['dbpass'])
else:
return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser'])
except Exception as e:
logging.critical(f'Failed to connect to DB: {e}. Exiting...')
sys.exit()
def get_bans():
domains = mastodb.query('SELECT * FROM public.domain_blocks;').dictresult()
banlist = {}
for domain in domains:
instance = domain['domain']
banlist[instance] = {
'severity': domain['severity'],
'media': boolean(domain['reject_media']),
'reports': boolean(domain['reject_reports']),
'private': domain['private_comment'],
'public': domain['public_comment'],
'updated': domain['updated_at']
}
return banlist
def update_bans():
'''I'll implement this later'''
pass
def update_bancache():
bans = get_bans()
banlist = cache.get('bans')
for domain in bans:
if domain not in banlist or bans[domain]['updated'] > banlist[domain]['updated']:
banlist[domain] = bans[domain]
cache.get('bans') = banlist
logging.debug('Updated ban cache')
def ban_check(url):
instance = urlparse(url).netloc if url.startswith('https') else url
domain = extract(url)
parsed = f'{domain.domain}.{domain.suffix}'
for ban in cache.get('ban'):
if ban in [url, parsed]:
return True
logging.debug(f'{parsed} not in blocklist')
pawsdb = jsondb()
query = Query()
mastodb = pgdb()
cache = {'bans': get_bans()}

View file

@ -13,6 +13,7 @@ from aiohttp.client_exceptions import *
from .signature import validate, pass_hash
from .functions import json_error, user_check
from .config import MASTOCONFIG, script_path
from . import database as db
# I'm a little teapot :3
@ -123,7 +124,7 @@ async def http_signatures(app, handler):
else:
auth_username = 'admin'
auth_password = 'heck'
auth_password = 'doubleheck'
auth_realm = 'Nope'
auth_header = request.headers.get(aiohttp.hdrs.AUTHORIZATION)
@ -154,9 +155,15 @@ async def http_signatures(app, handler):
async def http_filter(app, handler):
async def http_filter_handler(request):
data = await request.json()
actor = data.get('actor')
if [agent for agent in blocked_agents if agent in request.headers.get('User-Agent', '').lower()]:
raise HTTPTeapot(body='418 This teapot kills fascists', content_type='text/plain')
if db.ban_check(actor)
raise json_error(403, 'Forbidden')
return (await handler(request))
return http_filter_handler

View file

@ -23,13 +23,6 @@ def webserver():
middleware.http_redirect
])
#app = aiohttp.web.Application(middlewares=[
# http_filter_middleware,
# http_trailing_slash,
# http_signatures_middleware,
# http_auth_middleware
#])
async def global_vars(request):
return {
'VERSION': VERSION,
@ -56,8 +49,10 @@ def webserver():
aiohttp.web.route('*', '/', views.heck),
aiohttp.web.route('*', '/@{user}', views.heck),
aiohttp.web.route('*', '/@{user}/{post}', views.heck),
aiohttp.web.route('*', '/@{user}/{post}/activity', views.heck),
aiohttp.web.route('*', '/users/{user}', views.heck),
aiohttp.web.route('*', '/users/{user}/{post}', views.heck)
aiohttp.web.route('*', '/users/{user}/{post}', views.heck),
aiohttp.web.route('*', '/users/{user}/{post}/activity', views.heck)
])
return web

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python3
from hecc.routes import main
from paws.routes import main
if __name__ == '__main__':
main()