From 9bc08a5a6ed24ff7a295b6c5d987d13c6e73ff5f Mon Sep 17 00:00:00 2001 From: Gingeh <39150378+Gingeh@users.noreply.github.com> Date: Mon, 21 Oct 2024 14:45:35 +1100 Subject: [PATCH] LibWeb: Implement and use "isomorphic decoding" --- Userland/Libraries/LibWeb/DOM/Document.cpp | 2 +- .../Fetch/Infrastructure/HTTP/Headers.cpp | 15 +++++++----- .../LibWeb/Fetch/Infrastructure/URL.cpp | 3 ++- .../LibWeb/HTML/WindowOrWorkerGlobalScope.cpp | 8 +++---- Userland/Libraries/LibWeb/Infra/Strings.cpp | 23 +++++++++++++++++++ Userland/Libraries/LibWeb/Infra/Strings.h | 2 ++ 6 files changed, 40 insertions(+), 13 deletions(-) diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index fd7400a144125..d436a4533ea17 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -347,7 +347,7 @@ WebIDL::ExceptionOr> Document::create_and_initialize( // 16. If navigationParams's response has a `Refresh` header, then: if (auto maybe_refresh = navigation_params.response->header_list()->get("Refresh"sv.bytes()); maybe_refresh.has_value()) { // 1. Let value be the isomorphic decoding of the value of the header. - auto const& value = maybe_refresh.value(); + auto value = Infra::isomorphic_decode(maybe_refresh.value()); // 2. Run the shared declarative refresh steps with document and value. document->shared_declarative_refresh_steps(value, nullptr); diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp index ed07b9977ee22..1c9480a8ee0f3 100644 --- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp +++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -45,8 +46,8 @@ requires(IsSameIgnoringCV) struct CaseInsensitiveBytesTraits : public Tra Header Header::from_string_pair(StringView name, StringView value) { return Header { - .name = MUST(ByteBuffer::copy(name.bytes())), - .value = MUST(ByteBuffer::copy(value.bytes())), + .name = Infra::isomorphic_encode(name), + .value = Infra::isomorphic_encode(value), }; } @@ -128,7 +129,7 @@ Optional> get_decode_and_split_header_value(ReadonlyBytes value) // To get, decode, and split a header value value, run these steps: // 1. Let input be the result of isomorphic decoding value. - auto input = StringView { value }; + auto input = Infra::isomorphic_decode(value); // 2. Let position be a position variable for input, initially pointing at the start of input. auto lexer = GenericLexer { input }; @@ -523,7 +524,8 @@ bool is_cors_safelisted_request_header(Header const& header) return false; // 2. Let mimeType be the result of parsing the result of isomorphic decoding value. - auto mime_type = MimeSniff::MimeType::parse(StringView { value }); + auto decoded = Infra::isomorphic_decode(value); + auto mime_type = MimeSniff::MimeType::parse(decoded); // 3. If mimeType is failure, then return false. if (!mime_type.has_value()) @@ -726,6 +728,7 @@ bool is_forbidden_request_header(Header const& header) auto parsed_values = get_decode_and_split_header_value(header.value); // 2. For each method of parsedValues: if the isomorphic encoding of method is a forbidden method, then return true. + // Note: The values returned from get_decode_and_split_header_value have already been decoded. if (parsed_values.has_value() && any_of(*parsed_values, [](auto method) { return is_forbidden_method(method.bytes()); })) return true; } @@ -826,10 +829,10 @@ Variant, ExtractHeaderParseFailure, Empty> extract_header_lis Optional parse_single_range_header_value(ReadonlyBytes value) { // 1. Let data be the isomorphic decoding of value. - auto data = StringView { value }; + auto data = Infra::isomorphic_decode(value); // 2. If data does not start with "bytes=", then return failure. - if (!data.starts_with("bytes="sv)) + if (!data.starts_with_bytes("bytes="sv)) return {}; // 3. Let position be a position variable for data, initially pointing at the 6th code point of data. diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp b/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp index 509489b3190a8..998f8133a4e1c 100644 --- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp +++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace Web::Fetch::Infrastructure { @@ -75,7 +76,7 @@ ErrorOr process_data_url(URL::URL const& data_url) trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right); if (trimmed_substring_view.ends_with(';')) { // 1. Let stringBody be the isomorphic decode of body. - auto string_body = StringView(body); + auto string_body = Infra::isomorphic_decode(body); // 2. Set body to the forgiving-base64 decode of stringBody. // 3. If body is failure, then return failure. diff --git a/Userland/Libraries/LibWeb/HTML/WindowOrWorkerGlobalScope.cpp b/Userland/Libraries/LibWeb/HTML/WindowOrWorkerGlobalScope.cpp index afaee04520870..e34d6d66a9211 100644 --- a/Userland/Libraries/LibWeb/HTML/WindowOrWorkerGlobalScope.cpp +++ b/Userland/Libraries/LibWeb/HTML/WindowOrWorkerGlobalScope.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -34,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -139,10 +139,8 @@ WebIDL::ExceptionOr WindowOrWorkerGlobalScopeMixin::atob(String const& d return WebIDL::InvalidCharacterError::create(realm, "Input string is not valid base64 data"_string); // 3. Return decodedData. - // decode_base64() returns a byte buffer. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8. - auto decoder = TextCodec::decoder_for_exact_name("ISO-8859-1"sv); - VERIFY(decoder.has_value()); - return TRY_OR_THROW_OOM(vm, decoder->to_utf8(decoded_data.value())); + // decode_base64() returns a byte buffer. LibJS uses UTF-8 for strings. Use isomorphic decoding to convert bytes to UTF-8. + return Infra::isomorphic_decode(decoded_data.value()); } // https://html.spec.whatwg.org/multipage/timers-and-user-prompts.html#dom-queuemicrotask diff --git a/Userland/Libraries/LibWeb/Infra/Strings.cpp b/Userland/Libraries/LibWeb/Infra/Strings.cpp index 66986181b7326..99ac0708a8a2f 100644 --- a/Userland/Libraries/LibWeb/Infra/Strings.cpp +++ b/Userland/Libraries/LibWeb/Infra/Strings.cpp @@ -144,4 +144,27 @@ ErrorOr to_ascii_uppercase(StringView string) return string_builder.to_string(); } +// https://infra.spec.whatwg.org/#isomorphic-encode +ByteBuffer isomorphic_encode(StringView input) +{ + ByteBuffer buf = {}; + for (auto code_point : Utf8View { input }) { + // VERIFY(code_point <= 0xFF); + if (code_point > 0xFF) + dbgln("FIXME: Trying to isomorphic encode a string with code points > U+00FF."); + buf.append((u8)code_point); + } + return buf; +} + +// https://infra.spec.whatwg.org/#isomorphic-decode +String isomorphic_decode(ReadonlyBytes input) +{ + StringBuilder builder(input.size()); + for (u8 code_point : input) { + builder.append_code_point(code_point); + } + return builder.to_string_without_validation(); +} + } diff --git a/Userland/Libraries/LibWeb/Infra/Strings.h b/Userland/Libraries/LibWeb/Infra/Strings.h index 4d806111364ba..963638cb4a2af 100644 --- a/Userland/Libraries/LibWeb/Infra/Strings.h +++ b/Userland/Libraries/LibWeb/Infra/Strings.h @@ -20,5 +20,7 @@ bool is_code_unit_prefix(StringView potential_prefix, StringView input); ErrorOr convert_to_scalar_value_string(StringView string); ErrorOr to_ascii_lowercase(StringView string); ErrorOr to_ascii_uppercase(StringView string); +ByteBuffer isomorphic_encode(StringView input); +String isomorphic_decode(ReadonlyBytes input); }