Skip to content

Commit

Permalink
Move request sanitization and upload disallowed out of file_upload
Browse files Browse the repository at this point in the history
  • Loading branch information
dstufft committed May 31, 2024
1 parent 4d077ff commit 71088d1
Showing 1 changed file with 108 additions and 72 deletions.
180 changes: 108 additions & 72 deletions warehouse/forklift/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
HTTPPermanentRedirect,
HTTPTooManyRequests,
)
from pyramid.request import Request
from pyramid.view import view_config
from sqlalchemy import and_, exists, func, orm
from sqlalchemy.exc import MultipleResultsFound, NoResultFound
Expand Down Expand Up @@ -378,87 +379,30 @@ def file_upload(request):
# This is a list of warnings that we'll emit *IF* the request is successful.
warnings = []

# If we're in read-only mode, let upload clients know
if request.flags.enabled(AdminFlagValue.READ_ONLY):
raise _exc_with_message(
HTTPForbidden, "Read-only mode: Uploads are temporarily disabled."
)

if request.flags.enabled(AdminFlagValue.DISALLOW_NEW_UPLOAD):
raise _exc_with_message(
HTTPForbidden,
"New uploads are temporarily disabled. "
"See {projecthelp} for more information.".format(
projecthelp=request.help_url(_anchor="admin-intervention")
),
)

# Log an attempt to upload
metrics = request.find_service(IMetricsService, context=None)
metrics.increment("warehouse.upload.attempt")

# Before we do anything, if there isn't an authenticated identity with
# this request, then we'll go ahead and bomb out.
if request.identity is None:
raise _exc_with_message(
HTTPForbidden,
"Invalid or non-existent authentication information. "
"See {projecthelp} for more information.".format(
projecthelp=request.help_url(_anchor="invalid-auth")
),
)

# These checks only make sense when our authenticated identity is a user,
# not a project identity (like OIDC-minted tokens.)
if request.user:
# Ensure that user has a verified, primary email address. This should both
# reduce the ease of spam account creation and activity, as well as act as
# a forcing function for https://github.com/pypa/warehouse/issues/3632.
# TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved,
# we might consider a different condition, possibly looking at
# User.is_active instead.
if not (request.user.primary_email and request.user.primary_email.verified):
raise _exc_with_message(
HTTPBadRequest,
(
"User {!r} does not have a verified primary email address. "
"Please add a verified primary email before attempting to "
"upload to PyPI. See {project_help} for more information."
).format(
request.user.username,
project_help=request.help_url(_anchor="verified-email"),
),
) from None

# Do some cleanup of the various form fields
for key in list(request.POST):
value = request.POST.get(key)
if isinstance(value, str):
# distutils "helpfully" substitutes unknown, but "required" values
# with the string "UNKNOWN". This is basically never what anyone
# actually wants so we'll just go ahead and delete anything whose
# value is UNKNOWN.
if value.strip() == "UNKNOWN":
del request.POST[key]

# Escape NUL characters, which psycopg doesn't like
if "\x00" in value:
request.POST[key] = value.replace("\x00", "\\x00")

# We require protocol_version 1, it's the only supported version however
# passing a different version should raise an error.
if request.POST.get("protocol_version", "1") != "1":
raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.")

# Check if any fields were supplied as a tuple and have become a
# FieldStorage. The 'content' field _should_ be a FieldStorage, however,
# and we don't care about the legacy gpg_signature field.
# ref: https://github.com/pypi/warehouse/issues/2185
# ref: https://github.com/pypi/warehouse/issues/2491
for field in set(request.POST) - {"content", "gpg_signature"}:
values = request.POST.getall(field)
if any(isinstance(value, FieldStorage) for value in values):
raise _exc_with_message(HTTPBadRequest, f"{field}: Should not be a tuple.")
# Sanitize the incoming request. There's a lot of garbage that gets sent to
# this view, which we'll sanitize to clean that up and/or fail early rather
# than getting failures deeper in the stack.
#
# NOTE: This method mutates the current request to do it's cleanup, but it
# can also return an error message if it could not sanitize.
if (reason := _sanitize_request(request)) is not None:
raise _exc_with_message(HTTPBadRequest, reason)

# Do some basic check to make sure that we're allowing uploads, either
# generally or for the current identity. Wo do this first, before doing
# anything else so that we can bail out early if there's no chance we're
# going to accept the upload anyways.
if (reason := _upload_disallowed(request)) is not None:
raise _exc_with_message(HTTPForbidden, reason)

# Validate and process the incoming file data.
form = UploadForm(request.POST)
Expand Down Expand Up @@ -1232,6 +1176,98 @@ def file_upload(request):
return HTTPOk(body="\n".join(warnings))


def _sanitize_request(request: Request) -> str | None:
"""
Sanitize a Pyramid request.
There's a lot of garbage that gets sent to the upload view, which we'll
sanitize to clean that up and/or fail early rather than getting failures
deeper in the stack.
This method will mutate the passed in request to sanitize it.
:return: An error message if the request could not be sanitized, otherwise
None.
"""
# Do some cleanup of the various form fields, there's a lot of garbage that
# gets sent to this view, and this helps prevent issues later on.
for key in list(request.POST):
value = request.POST.get(key)
if isinstance(value, str):
# distutils "helpfully" substitutes unknown, but "required" values
# with the string "UNKNOWN". This is basically never what anyone
# actually wants so we'll just go ahead and delete anything whose
# value is UNKNOWN.
if value.strip() == "UNKNOWN":
del request.POST[key]

# Escape NUL characters, which psycopg doesn't like
if "\x00" in value:
request.POST[key] = value.replace("\x00", "\\x00")

# Check if any fields were supplied as a tuple and have become a
# FieldStorage. The 'content' field _should_ be a FieldStorage, however,
# and we don't care about the legacy gpg_signature field.
# ref: https://github.com/pypi/warehouse/issues/2185
# ref: https://github.com/pypi/warehouse/issues/2491
for field in set(request.POST) - {"content", "gpg_signature"}:
values = request.POST.getall(field)
if any(isinstance(value, FieldStorage) for value in values):
return f"{field}: Should not be a tuple."


def _upload_disallowed(request: Request) -> str | None:
"""
Ensures that we're currently allowing uploads, either generally or for the
current request.identity.
:return: An error message if uploading is to be disallowed, otherwise None.
"""
# The very first thing we want to check, is whether we're currently in
# read only mode, because if we're in read only mode nothing else matters.
if request.flags.enabled(AdminFlagValue.READ_ONLY):
return "Read-only mode: Uploads are temporarily disabled."

# After that, we want to check if we're disallowing new uploads, which is
# functionally the same as read only mode, but only for the upload endpoint.
if request.flags.enabled(AdminFlagValue.DISALLOW_NEW_UPLOAD):
return (
"New uploads are temporarily disabled. "
"See {projecthelp} for more information.".format(
projecthelp=request.help_url(_anchor="admin-intervention")
)
)

# Before we do anything else, if there isn't an authenticated identity with
# this request, then we'll go ahead and bomb out.
if request.identity is None:
return (
"Invalid or non-existent authentication information. "
"See {projecthelp} for more information.".format(
projecthelp=request.help_url(_anchor="invalid-auth")
),
)

# These checks only make sense when our authenticated identity is a user,
# not a project identity (like OIDC-minted tokens.)
if request.user:
# Ensure that user has a verified, primary email address. This should both
# reduce the ease of spam account creation and activity, as well as act as
# a forcing function for https://github.com/pypa/warehouse/issues/3632.
# TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved,
# we might consider a different condition, possibly looking at
# User.is_active instead.
if not (request.user.primary_email and request.user.primary_email.verified):
return (
"User {!r} does not have a verified primary email address. "
"Please add a verified primary email before attempting to "
"upload to PyPI. See {project_help} for more information."
).format(
request.user.username,
project_help=request.help_url(_anchor="verified-email"),
)


@view_config(
route_name="forklift.legacy.submit", require_csrf=False, require_methods=["POST"]
)
Expand Down

0 comments on commit 71088d1

Please sign in to comment.