- # Make sure the path didn't escape the base path.
- trans_path = path.realpath(trans_path)
- if path.commonpath([base_path, trans_path]) != base_path:
- raise CBSException(59, 'Naughty directory traversal', trans_path)
+ # Parse URI and do some sanity checks
+ try:
+ parsed = urlsplit(request) # May raise ValueError
+ uri_port = parsed.port # Invalid port number raises ValueError on access
+ except ValueError:
+ raise CBSException(59, 'Invalid URI')
+ if parsed.scheme != 'gemini':
+ raise CBSException(59, 'Non-gemini scheme')
+ if parsed.username is not None:
+ raise CBSException(59, 'Username in URI disallowed')
+ if parsed.password is not None:
+ raise CBSException(59, 'Password in URI disallowed')
+ if parsed.fragment != '':
+ raise CBSException(59, 'Fragment in URI disallowed')
+ if any(delim in parsed.path for delim in ':?#[]@!$&\'(),;=*'):
+ raise CBSException(59, 'Invalid URI path')
+
+ return parsed
+
+
+def lookup_request(url_path: str, docroot: str):
+ # Build a resource path (and extra path for CGI)
+ translated = docroot
+ extra = ''
+ found = False
+ for part in url_path.split('/'):
+ unquoted = unquote(part)
+ if '/' in unquoted: # Don't want to deal with escaped path delimiters
+ raise CBSException(59, 'Invalid URI path')
+ if not found:
+ translated = path.join(translated, unquoted)
+ if path.isfile(translated):
+ found = True
+ else:
+ extra += '/' + unquoted
+
+ # Look for an index if the path is a directory
+ if not found:
+ if path.isdir(translated):
+ translated = path.join(translated, 'index.gmi')
+ if not path.isfile(translated):
+ raise CBSException(51, 'URL not found')
+ else:
+ raise CBSException(51, 'URL not found')