diff --git a/README.md b/README.md index 2c6d220..2ea1208 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# Hide Every Collection, Comrade! (HECC) +# Protection Against Web Scrapers (PAWS) Web proxy for Mastodon that puts public profiles behind an auth layer. ## How it works -HECC sits between Mastodon and your front-facing web proxy to intercept incoming requests. If a profile, a toot, or any related json is requested, it will be blocked unless authenticated. +PAWS sits between Mastodon and your front-facing web proxy to intercept incoming requests. If a profile, toot, or any related json is requested, it will be blocked unless authenticated. -Note: Still very much a WIP. Currently it's just simple http auth, but I plan on adding the ability to auth via a local Mastodon account or oauth +Note: Still very much a WIP. Currently it's just simple http auth, but I plan on adding the ability to login via oauth ## Installation diff --git a/hecc/database.py b/hecc/database.py deleted file mode 100644 index e69de29..0000000 diff --git a/hecc/__init__.py b/paws/__init__.py similarity index 100% rename from hecc/__init__.py rename to paws/__init__.py diff --git a/hecc/__main__.py b/paws/__main__.py similarity index 87% rename from hecc/__main__.py rename to paws/__main__.py index 9dada21..c0cffec 100644 --- a/hecc/__main__.py +++ b/paws/__main__.py @@ -9,11 +9,11 @@ from .routes import main if 'install' in sys.argv: from .config import mastodir, logging - script = f'{mastodir}/hecc.sh' + script = f'{mastodir}/paws.sh' start_script = f'''#!/bin/sh export MASTODIR={mastodir} -(cd MASTODIR && python -m hecc)''' +(cd MASTODIR && python -m paws)''' with open(script, 'w') as sh: sh.write(start_script) diff --git a/hecc/config.py b/paws/config.py similarity index 97% rename from hecc/config.py rename to paws/config.py index 261549b..36322f6 100644 --- a/hecc/config.py +++ b/paws/config.py @@ -11,7 +11,7 @@ from .functions import bool_check VERSION = '0.1' mastodir = env.get('MASTODIR', os.getcwd()) -stor_path = abspath(f'{mastodir}/hecc-data') +stor_path = abspath(f'{mastodir}/paws-data') if not isdir(stor_path): diff --git a/paws/database.py b/paws/database.py new file mode 100644 index 0000000..8669015 --- /dev/null +++ b/paws/database.py @@ -0,0 +1,96 @@ +import sys +from DBUtils.PooledPg import PooledPg as DB +from datetime import datetime +from tinydb import TinyDB, Query +from tinydb_smartcache import SmartCacheTable +from tinyrecord import transaction as trans +from tldextract import extract +from json.decoder import JSONDecodeError + +from .config import stor_path, logging, MASTOCONFIG as mdb + + +def jsondb(): + try: + db = TinyDB(f'{stor_path}/db.json', indent='\t') + + except JSONDecodeError as e: + logging.critical(f'Failed to load DB: {e}. Exiting...') + sys.exit() + + db.table_class = SmartCacheTable + + tables = { + 'bans': db.table('bans'), + 'follows': db.table('follows'), + 'users': db.table('users'), + 'tokens': db.table('tokens') + } + + return tables + + +def pgdb(): + try: + if type(dbpass) == str: + return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser'], passwd=mdb['dbpass']) + + else: + return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser']) + + except Exception as e: + logging.critical(f'Failed to connect to DB: {e}. Exiting...') + sys.exit() + + +def get_bans(): + domains = mastodb.query('SELECT * FROM public.domain_blocks;').dictresult() + banlist = {} + + for domain in domains: + instance = domain['domain'] + + banlist[instance] = { + 'severity': domain['severity'], + 'media': boolean(domain['reject_media']), + 'reports': boolean(domain['reject_reports']), + 'private': domain['private_comment'], + 'public': domain['public_comment'], + 'updated': domain['updated_at'] + } + + return banlist + + +def update_bans(): + '''I'll implement this later''' + pass + + +def update_bancache(): + bans = get_bans() + banlist = cache.get('bans') + + for domain in bans: + if domain not in banlist or bans[domain]['updated'] > banlist[domain]['updated']: + banlist[domain] = bans[domain] + + cache.get('bans') = banlist + logging.debug('Updated ban cache') + + +def ban_check(url): + instance = urlparse(url).netloc if url.startswith('https') else url + domain = extract(url) + parsed = f'{domain.domain}.{domain.suffix}' + + for ban in cache.get('ban'): + if ban in [url, parsed]: + return True + + logging.debug(f'{parsed} not in blocklist') + +pawsdb = jsondb() +query = Query() +mastodb = pgdb() +cache = {'bans': get_bans()} diff --git a/hecc/functions.py b/paws/functions.py similarity index 100% rename from hecc/functions.py rename to paws/functions.py diff --git a/hecc/middleware.py b/paws/middleware.py similarity index 96% rename from hecc/middleware.py rename to paws/middleware.py index 7546c41..ab594b9 100644 --- a/hecc/middleware.py +++ b/paws/middleware.py @@ -13,6 +13,7 @@ from aiohttp.client_exceptions import * from .signature import validate, pass_hash from .functions import json_error, user_check from .config import MASTOCONFIG, script_path +from . import database as db # I'm a little teapot :3 @@ -123,7 +124,7 @@ async def http_signatures(app, handler): else: auth_username = 'admin' - auth_password = 'heck' + auth_password = 'doubleheck' auth_realm = 'Nope' auth_header = request.headers.get(aiohttp.hdrs.AUTHORIZATION) @@ -154,9 +155,15 @@ async def http_signatures(app, handler): async def http_filter(app, handler): async def http_filter_handler(request): + data = await request.json() + actor = data.get('actor') + if [agent for agent in blocked_agents if agent in request.headers.get('User-Agent', '').lower()]: raise HTTPTeapot(body='418 This teapot kills fascists', content_type='text/plain') + if db.ban_check(actor) + raise json_error(403, 'Forbidden') + return (await handler(request)) return http_filter_handler diff --git a/hecc/routes.py b/paws/routes.py similarity index 91% rename from hecc/routes.py rename to paws/routes.py index 570be36..38efc76 100644 --- a/hecc/routes.py +++ b/paws/routes.py @@ -23,13 +23,6 @@ def webserver(): middleware.http_redirect ]) - #app = aiohttp.web.Application(middlewares=[ - # http_filter_middleware, - # http_trailing_slash, - # http_signatures_middleware, - # http_auth_middleware - #]) - async def global_vars(request): return { 'VERSION': VERSION, @@ -56,8 +49,10 @@ def webserver(): aiohttp.web.route('*', '/', views.heck), aiohttp.web.route('*', '/@{user}', views.heck), aiohttp.web.route('*', '/@{user}/{post}', views.heck), + aiohttp.web.route('*', '/@{user}/{post}/activity', views.heck), aiohttp.web.route('*', '/users/{user}', views.heck), - aiohttp.web.route('*', '/users/{user}/{post}', views.heck) + aiohttp.web.route('*', '/users/{user}/{post}', views.heck), + aiohttp.web.route('*', '/users/{user}/{post}/activity', views.heck) ]) return web diff --git a/hecc/signature.py b/paws/signature.py similarity index 100% rename from hecc/signature.py rename to paws/signature.py diff --git a/hecc/templates/unauthorized.html b/paws/templates/unauthorized.html similarity index 100% rename from hecc/templates/unauthorized.html rename to paws/templates/unauthorized.html diff --git a/hecc/views.py b/paws/views.py similarity index 100% rename from hecc/views.py rename to paws/views.py diff --git a/server.py b/server.py index 7cb8a8a..77aa837 100755 --- a/server.py +++ b/server.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from hecc.routes import main +from paws.routes import main if __name__ == '__main__': main()