From: rs <> Date: Mon, 22 Dec 2025 19:29:25 +0000 (-0600) Subject: Refactor X-Git-Url: https://git.the-white-hart.net/?a=commitdiff_plain;p=gemini%2Fcbs-server.git Refactor --- diff --git a/cbs-srv.py b/cbs-srv.py index 1940529..5aae776 100755 --- a/cbs-srv.py +++ b/cbs-srv.py @@ -4,13 +4,14 @@ import select import socket from OpenSSL import SSL from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat -from urllib.parse import urlparse, unquote +from urllib.parse import urlsplit, urlunsplit, unquote, ParseResult import re from os import path, environ import subprocess import mimetypes +import traceback import logging import yaml import time @@ -24,6 +25,7 @@ mimetypes.add_type('text/gemini', '.gemini') def accept_client_cert(conn, cert, err_num, err_depth, ret_code): + # TODO: validate cert format, dates, signature, etc. return True @@ -34,87 +36,96 @@ class CBSException(Exception): self.logdata = logdata -def recv_req(conn: SSL.Connection, timeout=.5): +def recv_request(conn: SSL.Connection, timeout=.5) -> bytes: data = b'' start = time.time() while True: - # This prevents "slow loris" types of timeouts - if time.time() > start + timeout: - raise CBSException(59, 'Timeout while waiting for URL') + if time.time() > start + timeout: # Slow loris timeout + raise CBSException(59, 'Timeout while waiting for request') ready = select.select([conn], [], [], timeout) if ready[0]: data += conn.recv(1024) if b'\r\n' in data: lines = data.splitlines() - if len(lines) > 1: - logging.warning(f'Discarding data after URL line of request: {data}') - if len(lines[0]) > 1024: - raise CBSException(59, 'URL too long', lines[0]) - try: - req = lines[0].decode('ascii') - except UnicodeDecodeError: - raise CBSException(59, 'Non-ascii URL', lines[0]) - return req + return lines[0] else: - raise CBSException(59, 'Timeout while waiting for URL') + raise CBSException(59, 'Timeout while waiting for request') -def translate_path(url_path: str, base_path: str, check_existence=True, allow_extra=True): - # Build path one element at a time until we find a file - trans_path = base_path - path_len = 0 - for part in url_path.split('/'): - path_len += len(part) + 1 - # RFC 3986 says path components may have parameters, so look for any - # reserved delimiter characters and discard everything after one. - # Although the Gemini spec says not all of the components of generic URI - # syntax are supported, and disallowing path parameters seems in the - # spirit of the protocol, path parameters are not specifically mentioned - # so I try to do what feels safest and expect that they may show up. - part = unquote(re.split('[!$&\'()*+,;=]', part)[0]) - trans_path = path.join(trans_path, part) - if check_existence and path.isfile(trans_path): - break - else: - if check_existence: - if path.isdir(trans_path): - trans_path = path.join(trans_path, 'index.gmi') - if not path.isfile(trans_path): - raise CBSException(51, 'URL not found', trans_path) - else: - raise CBSException(51, 'URL not found', trans_path) +def check_request(request: bytes) -> ParseResult: + # Gemini protocol specifies max 1024-byte URI + if len(request) > 1024: + raise CBSException(59, 'Request URI too long') + + # The gemini protocol trades in UTF-8, but URIs can only contain ASCII + try: + request = request.decode('ascii') + except UnicodeDecodeError: + raise CBSException(59, 'Non-ASCII URI') - # Make sure the path didn't escape the base path. - trans_path = path.realpath(trans_path) - if path.commonpath([base_path, trans_path]) != base_path: - raise CBSException(59, 'Naughty directory traversal', trans_path) + # Parse URI and do some sanity checks + try: + parsed = urlsplit(request) # May raise ValueError + uri_port = parsed.port # Invalid port number raises ValueError on access + except ValueError: + raise CBSException(59, 'Invalid URI') + if parsed.scheme != 'gemini': + raise CBSException(59, 'Non-gemini scheme') + if parsed.username is not None: + raise CBSException(59, 'Username in URI disallowed') + if parsed.password is not None: + raise CBSException(59, 'Password in URI disallowed') + if parsed.fragment != '': + raise CBSException(59, 'Fragment in URI disallowed') + if any(delim in parsed.path for delim in ':?#[]@!$&\'(),;=*'): + raise CBSException(59, 'Invalid URI path') + + return parsed + + +def lookup_request(url_path: str, docroot: str): + # Build a resource path (and extra path for CGI) + translated = docroot + extra = '' + found = False + for part in url_path.split('/'): + unquoted = unquote(part) + if '/' in unquoted: # Don't want to deal with escaped path delimiters + raise CBSException(59, 'Invalid URI path') + if not found: + translated = path.join(translated, unquoted) + if path.isfile(translated): + found = True + else: + extra += '/' + unquoted + + # Look for an index if the path is a directory + if not found: + if path.isdir(translated): + translated = path.join(translated, 'index.gmi') + if not path.isfile(translated): + raise CBSException(51, 'URL not found') + else: + raise CBSException(51, 'URL not found') - # Grab all the leftovers verbatim for CGI scripts. - extra_path = url_path[max(path_len-1, 0):] - if extra_path and not allow_extra: - raise CBSException(59, 'Extra unexpected path information', extra_path) + # Make sure path doesn't escape the document root + abs_path = path.realpath(translated) + if path.commonpath([abs_path, docroot]) != docroot: + raise CBSException(59, 'Invalid URI path') - return trans_path, extra_path + return abs_path, translated, extra # ------------------------------------------------------------------------------ # Serving -def serve_req(conn: SSL.Connection, addr, url: str, conf: dict): - # Attempt to parse the url and do basic validation - logging.info('Serving URL "{}"'.format(url)) - try: - url_parsed = urlparse(url) - except ValueError: - raise CBSException(59, 'Could not parse URL', url) - if url_parsed.scheme != 'gemini': - raise CBSException(59, 'Non-gemini scheme', url_parsed.scheme) - if url_parsed.netloc == '': - raise CBSException(59, 'Netloc unspecified', url) - +def serve_req(conn: SSL.Connection, addr, url_parsed, conf: dict, absolute, relative, extra): # Parse the path information into a system path - req_path, extra_path = translate_path(url_parsed.path, conf['servedir']) + url = urlunsplit(url_parsed) + req_path = absolute + extra_path = extra + logging.info('Serving URL "{}"'.format(url)) # If the path is in the cgi directory then do some special CGI stuff. if conf['cgidir'] is not None and path.commonpath([conf['cgidir'], req_path]) == conf['cgidir']: @@ -130,7 +141,8 @@ def serve_req(conn: SSL.Connection, addr, url: str, conf: dict): def serve_cgi(conn: SSL.Connection, addr, req_path, extra_path, url, conf: dict): cert = conn.get_peer_certificate() - extra_trans, _ = translate_path(extra_path, conf['servedir'], check_existence=False, allow_extra=False) + #extra_trans, _ = translate_path(extra_path, conf['servedir'], check_existence=False, allow_extra=False) + extra_trans = path.join(conf['servedir'], extra_path) # TODO: properly escape characters in DNs, see RFC 2253 if cert is None: @@ -257,8 +269,10 @@ def main(): conn, addr = ssock.accept() conn.do_handshake() logging.info('Connection from {}'.format(addr)) - req = recv_req(conn) - serve_req(conn, addr, req, conf) + req = recv_request(conn) + url = check_request(req) + absolute, relative, extra = lookup_request(url.path, conf['servedir']) + serve_req(conn, addr, url, conf, absolute, relative, extra) conn.shutdown() conn.sock_shutdown(socket.SHUT_RDWR) except SSL.SysCallError as x: @@ -271,6 +285,7 @@ def main(): conn.sock_shutdown(socket.SHUT_RDWR) except Exception as x: logging.error('Exception: {}'.format(x)) + logging.error(traceback.format_exc()) conn.sendall('40 Server error\r\n'.encode('utf-8')) conn.shutdown() conn.sock_shutdown(socket.SHUT_RDWR)