diff --git a/.gitignore b/.gitignore index f295602..fd723dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ -hecc-data -hecc.sh +data build* # ---> Python diff --git a/paws/cache.py b/paws/cache.py new file mode 100644 index 0000000..7c353b3 --- /dev/null +++ b/paws/cache.py @@ -0,0 +1,111 @@ +import re + +from datetime import datetime +from collections import OrderedDict + + +def parse_ttl(ttl): + m = re.match(r'^(\d+)([smhdw]?)$', ttl) + + if not m: + logging.warning(f'Invalid TTL: {ttl}. Setting to default: 1h') + amount = 1 + unit = 'h' + + else: + amount = m.group(1) + unit = m.group(2) + + units = { + 's': 1, + 'm': 60, + 'h': 60 * 60, + 'd': 24 * 60 * 60, + 'w': 7 * 24 * 60 * 60, + } + + if unit: + multiplier = units[unit] + + else: + multiplier = 1 + + return multiplier * int(amount) + + +class TTLCache: + def __init__(self, ttl='1h', maxsize=1024): + self.items = OrderedDict() + self.ttl = parse_ttl(ttl) + self.maxsize = maxsize + + + def invalidate(self, key): + if key in self.items: + del self.items[key] + + + def store(self, key, value): + timestamp = int(datetime.timestamp(datetime.now())) + item = self.items.get(key) + + while len(self.items) >= self.maxsize and self.maxsize != 0: + self.items.popitem(last=False) + + if item == None: + data = {'data': value} + self.items[key] = data + + elif self.items[key]['timestamp'] + self.ttl < timestamp: + del self.items[key] + + self.items[key]['timestamp'] = timestamp + self.ttl + self.items.move_to_end(key) + + + def fetch(self, key): + item = self.items.get(key) + + if item != None: + timestamp = int(datetime.timestamp(datetime.now())) + + if timestamp >= self.items[key]['timestamp']: + del self.items[key] + + else: + self.items[key]['timestamp'] = timestamp + self.ttl + self.items.move_to_end(key) + return self.items[key]['data'] + + +class LRUCache: + def __init__(self, maxsize=1024): + self.items = OrderedDict() + self.maxsize = maxsize + + + def invalidate(self, key): + if key in self.items: + del self.items[key] + + return True + + return False + + + def store(self, key, value): + while len(self.items) >= self.maxsize and self.maxsize != 0: + self.items.popitem(last=False) + + if (key in self.items) == False: + self.items[key] = value + + self.items.move_to_end(key) + + + def fetch(self, key): + if key in self.items: + return self.items[key] + + return None + diff --git a/paws/config.py b/paws/config.py index 36322f6..e12d1d6 100644 --- a/paws/config.py +++ b/paws/config.py @@ -10,8 +10,11 @@ from envbash import load_envbash from .functions import bool_check VERSION = '0.1' -mastodir = env.get('MASTODIR', os.getcwd()) -stor_path = abspath(f'{mastodir}/paws-data') + +full_path = abspath(sys.executable) if getattr(sys, 'frozen', False) else abspath(__file__) +script_path = getattr(sys, '_MEIPASS', dirname(abspath(__file__))) +script_name = basename(full_path) +stor_path = abspath(f'{script_path}/../data') if not isdir(stor_path): @@ -41,39 +44,33 @@ console.formatter = logger.Formatter(log_format) logging.addHandler(console) -full_path = abspath(sys.executable) if getattr(sys, 'frozen', False) else abspath(__file__) -script_path = getattr(sys, '_MEIPASS', dirname(abspath(__file__))) -script_name = basename(full_path) - - -if not isfile(f'{mastodir}/.env.production'): - logging.error(f'Mastodon environment file doesn\'t exist: {mastodir}/.env.production') - -else: - load_envbash(f'{mastodir}/.env.production') - if not isfile(f'{stor_path}/production.env'): - logging.error(f'HECC environment file doesn\'t exist: {stor_path}/production.env') + logging.error(f'PAWS environment file doesn\'t exist: {stor_path}/production.env') else: load_envbash(f'{stor_path}/production.env') +PAWSCONFIG = { + 'host': env.get('PAWS_HOST', '127.0.0.1'), + 'port': env.get('PAWS_PORT', 3001), + 'mastopath': env.get('MASTOPATH', os.getcwd()) +} + + +masto_path = PAWSCONFIG['mastopath'] + +if not isfile(f'{masto_path}/.env.production'): + logging.error(f'Mastodon environment file doesn\'t exist: {masto_path}/.env.production') + +else: + load_envbash(f'{masto_path}/.env.production') + MASTOCONFIG={ 'domain': env.get('WEB_DOMAIN', env.get('LOCAL_DOMAIN', 'localhost:3000')), 'dbhost': env.get('DB_HOST', '/var/run/postgresql'), - 'dbport': env.get('DB_PORT', 5432), + 'dbport': int(env.get('DB_PORT', 5432)), 'dbname': env.get('DB_NAME', 'mastodon_production'), 'dbuser': env.get('DB_USER', env.get('USER')), 'dbpass': env.get('DB_PASS') } -HECCCONFIG = { - 'host': env.get('HECC_HOST', '127.0.0.1'), - 'port': env.get('HECC_PORT', 3001), - 'dbhost': env.get('HECC_DBHOST', MASTOCONFIG['dbhost']), - 'dbport': env.get('HECC_DBPORT', MASTOCONFIG['dbport']), - 'dbname': env.get('HECC_DBNAME', 'hecc'), - 'dbuser': env.get('HECC_DBUSER', MASTOCONFIG['dbuser']), - 'dbpass': env.get('HECC_DBPASS', MASTOCONFIG['dbpass']) -} - diff --git a/paws/database.py b/paws/database.py index 8669015..d8d7584 100644 --- a/paws/database.py +++ b/paws/database.py @@ -1,18 +1,21 @@ import sys + from DBUtils.PooledPg import PooledPg as DB from datetime import datetime from tinydb import TinyDB, Query from tinydb_smartcache import SmartCacheTable from tinyrecord import transaction as trans from tldextract import extract +from urllib.parse import urlparse from json.decoder import JSONDecodeError from .config import stor_path, logging, MASTOCONFIG as mdb +from .functions import bool_check def jsondb(): try: - db = TinyDB(f'{stor_path}/db.json', indent='\t') + db = TinyDB(f'{stor_path}/db.json', indent='\t') except JSONDecodeError as e: logging.critical(f'Failed to load DB: {e}. Exiting...') @@ -32,11 +35,11 @@ def jsondb(): def pgdb(): try: - if type(dbpass) == str: - return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser'], passwd=mdb['dbpass']) + if mdb['dbpass']: + return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser'], passwd=mdb['dbpass']).connection() else: - return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser']) + return DB(dbname=mdb['dbname'], host=mdb['dbhost'], port=mdb['dbport'], user=mdb['dbuser']).connection() except Exception as e: logging.critical(f'Failed to connect to DB: {e}. Exiting...') @@ -52,8 +55,8 @@ def get_bans(): banlist[instance] = { 'severity': domain['severity'], - 'media': boolean(domain['reject_media']), - 'reports': boolean(domain['reject_reports']), + 'media': bool_check(domain['reject_media']), + 'reports': bool_check(domain['reject_reports']), 'private': domain['private_comment'], 'public': domain['public_comment'], 'updated': domain['updated_at'] @@ -75,17 +78,17 @@ def update_bancache(): if domain not in banlist or bans[domain]['updated'] > banlist[domain]['updated']: banlist[domain] = bans[domain] - cache.get('bans') = banlist + cache['bans'] = banlist logging.debug('Updated ban cache') def ban_check(url): - instance = urlparse(url).netloc if url.startswith('https') else url + instance = urlparse(url).netloc if url.startswith('http') else url domain = extract(url) parsed = f'{domain.domain}.{domain.suffix}' - for ban in cache.get('ban'): - if ban in [url, parsed]: + for ban in get_bans(): + if ban in [instance, parsed]: return True logging.debug(f'{parsed} not in blocklist') @@ -93,4 +96,4 @@ def ban_check(url): pawsdb = jsondb() query = Query() mastodb = pgdb() -cache = {'bans': get_bans()} + diff --git a/paws/functions.py b/paws/functions.py index ac24eec..d39d663 100644 --- a/paws/functions.py +++ b/paws/functions.py @@ -18,10 +18,10 @@ error_codes = { def bool_check(value): - if value.lower() in ['yes', 'true', 'enable', True]: + if value == True or str(value).lower() in ['yes', 'true', 'enable']: return True - elif value.lower() in ['no', 'false', 'disable', '', None, False]: + elif value in [None, False] or str(value).lower() in ['no', 'false', 'disable', '']: return False else: diff --git a/paws/middleware.py b/paws/middleware.py index ab594b9..93f0507 100644 --- a/paws/middleware.py +++ b/paws/middleware.py @@ -39,6 +39,28 @@ auth_paths = [ ] +def parse_sig(signature): + for line in signature.split(','): + if 'keyId' in line: + actor = line.split('=')[1].split('#')[0].replace('"', '') + return actor + + +def parse_ua(agent): + if not agent: + return + + ua1 = agent.split('+https://') + + if len(ua1) < 2: + return + + ua2 = ua1[1].split('/') + + if len(ua2) > 1: + return ua2[0] + + async def raise_auth_error(request, auth_realm): raise aiohttp.web.HTTPUnauthorized( headers={aiohttp.hdrs.WWW_AUTHENTICATE: f'Basic realm={auth_realm}'}, @@ -54,22 +76,22 @@ async def passthrough(path, headers, post=None, query=None): try: async with aiohttp.request(reqtype, f'https://{MASTOCONFIG["domain"]}/{path}{query}', headers=headers, data=post) as resp: + data = await resp.read() + if resp.status not in [200, 202]: + print(data) logging.warning(f'Recieved error {resp.status} from Mastodon') json_error(504, f'Failed to forward request. Recieved error {resp.status} from Mastodon') - data = await resp.read() - raise aiohttp.web.HTTPOk(body=data, content_type=resp.content_type) except ClientConnectorError: + traceback.print_exc() return json_error(504, f'Failed to connect to Mastodon') async def http_redirect(app, handler): async def redirect_handler(request): - headers = {'Host': MASTOCONFIG["domain"]} - json_req = request.headers.get('Accept') == 'application/json' querydata = request.query rawquery = '?' @@ -84,9 +106,6 @@ async def http_redirect(app, handler): query = rawquery if rawquery != '' else None - if json_req: - headers.update({'Accept': 'application/json'}) - try: data = await request.json() @@ -94,7 +113,7 @@ async def http_redirect(app, handler): #logging.warning(f'failed to grab data: {e}') data = None - await passthrough(request.path, headers, post=data, query=query) + await passthrough(request.path, request.headers, post=data, query=query) return (await handler(request)) return redirect_handler @@ -103,25 +122,44 @@ async def http_redirect(app, handler): async def http_signatures(app, handler): async def http_signatures_handler(request): request['validated'] = False - json_req = request.headers.get('Accept') == 'application/json' + json_req = True if 'json' in request.headers.get('Accept', '') else False - if any(map(request.path.startswith, auth_paths)) and not user_check(request.path): - if json_req or request.path.endswith('.json'): - if 'signature' in request.headers: - data = await request.json() - print(json.dumps(data, indent=' ')) + if request.method == 'POST': + if 'signature' in request.headers: + data = await request.json() - if 'actor' not in data: - raise json_error(401, 'signature check failed, no actor in message') + #print(json.dumps(data, indent=' ')) - actor = data["actor"] - if not (await validate(actor, request)): - logging.info(f'Signature validation failed for: {actor}') - raise json_error(401, 'signature check failed, signature did not match key') + if 'actor' not in data: + logging.info('signature check failed, no actor in message') + raise json_error(401, 'signature check failed, no actor in message') - else: + actor = data["actor"] + if not (await validate(actor, request)): + logging.info(f'Signature validation failed for: {actor}') + raise json_error(401, 'signature check failed, signature did not match key') + + else: + logging.info('missing signature') + raise json_error(401, 'Missing signature') + + if any(map(request.path.startswith, auth_paths)) and request.method != 'POST': + if user_check(request.path): + logging.info('allowing passthrough of user') + + elif json_req or request.path.endswith('.json'): + signature = request.headers.get('signature', '') + + if not signature: + logging.info('missing signature') raise json_error(401, 'Missing signature') + actor = parse_sig(signature) + + if not (await validate(actor, request)): + logging.info(f'Signature validation failed for: {actor}') + raise json_error(401, 'signature check failed, signature did not match key') + else: auth_username = 'admin' auth_password = 'doubleheck' @@ -155,13 +193,17 @@ async def http_signatures(app, handler): async def http_filter(app, handler): async def http_filter_handler(request): - data = await request.json() - actor = data.get('actor') + domain = parse_ua(request.headers.get('user-agent')) + + if not domain: + raise json_error(401, 'Missing User-Agent') if [agent for agent in blocked_agents if agent in request.headers.get('User-Agent', '').lower()]: + logging.info(f'Blocked garbage: {domain}') raise HTTPTeapot(body='418 This teapot kills fascists', content_type='text/plain') - if db.ban_check(actor) + if db.ban_check(domain): + logging.info(f'Blocked instance: {domain}') raise json_error(403, 'Forbidden') return (await handler(request)) diff --git a/paws/routes.py b/paws/routes.py index 38efc76..d99ad73 100644 --- a/paws/routes.py +++ b/paws/routes.py @@ -9,7 +9,7 @@ from jinja2 import select_autoescape, FileSystemLoader from ipaddress import ip_address as address from urllib.parse import urlparse -from .config import HECCCONFIG, VERSION, script_path, logging +from .config import PAWSCONFIG, VERSION, script_path, logging from .functions import color from . import middleware @@ -63,8 +63,8 @@ async def start_webserver(): runner = aiohttp.web.AppRunner(app, access_log_format='%{X-Real-Ip}i "%r" %s %b "%{User-Agent}i"') await runner.setup() - listen = HECCCONFIG['host'] - port = HECCCONFIG['port'] + listen = PAWSCONFIG['host'] + port = PAWSCONFIG['port'] if listen.startswith('unix:'): if sys.platform != 'win32': diff --git a/paws/signature.py b/paws/signature.py index d707ded..b7ef1fa 100644 --- a/paws/signature.py +++ b/paws/signature.py @@ -10,7 +10,15 @@ from Crypto.PublicKey import RSA from Crypto.Hash import SHA, SHA256, SHA512 from Crypto.Signature import PKCS1_v1_5 -from .config import MASTOCONFIG +from .config import MASTOCONFIG, VERSION + + +class cache: + from .cache import LRUCache, TTLCache + messages = LRUCache() + actors = TTLCache() + keys = LRUCache() + sigstrings = LRUCache() def pass_hash(): @@ -76,16 +84,14 @@ async def fetch_actor(uri, force=False): try: headers = { - '(request-target)': uri, 'Accept': 'application/activity+json', 'User-Agent': f'MAW/{VERSION}; https://{domain}' } - headers['signature'] = sign_headers(headers, PRIVKEY, f'https://{domain}/actor#main-key') - headers.pop('(request-target)') - async with aiohttp.ClientSession(trace_configs=[http_debug()]) as session: + async with aiohttp.ClientSession() as session: async with session.get(uri, headers=headers) as resp: if resp.status != 200: + print(await resp.read()) return data = await resp.json(encoding='utf-8') @@ -102,12 +108,15 @@ async def fetch_actor_key(actor): actor_data = await fetch_actor(actor) if not actor_data: + logging.debug('Failed to fetch actor') return None if 'publicKey' not in actor_data: + logging.debug('publicKey not in actor') return None if 'publicKeyPem' not in actor_data['publicKey']: + logging.debug('Missing pubkey in actor') return None cache.keys.store(actor, actor_data['publicKey']['publicKeyPem']) @@ -118,6 +127,7 @@ async def fetch_actor_key(actor): async def validate(actor, request): pubkey = await fetch_actor_key(actor) if not pubkey: + logging.debug(f'Failed to fetch pubkey for actor: {actor}') return False logging.debug(f'actor key: {pubkey}') @@ -143,5 +153,5 @@ async def validate(actor, request): request['validated'] = result - logging.debug('validates? {result}') + logging.debug(f'validates? {result}') return result diff --git a/reload.cfg b/reload.cfg index 1645ac4..7ccc00b 100644 --- a/reload.cfg +++ b/reload.cfg @@ -1,4 +1,4 @@ -exec = python3 -m hecc +exec = python3 -m paws watch_ext = py, env ignore_dirs = build, data ignore_files = reload.py, test.py