Skip to content

Commit

Permalink
LibWeb: Implement and use "isomorphic decoding"
Browse files Browse the repository at this point in the history
  • Loading branch information
Gingeh committed Oct 22, 2024
1 parent 07400b5 commit 93f690d
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Userland/Libraries/LibWeb/DOM/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Document>> Document::create_and_initialize(
// 16. If navigationParams's response has a `Refresh` header, then:
if (auto maybe_refresh = navigation_params.response->header_list()->get("Refresh"sv.bytes()); maybe_refresh.has_value()) {
// 1. Let value be the isomorphic decoding of the value of the header.
auto const& value = maybe_refresh.value();
auto value = Infra::isomorphic_decode(maybe_refresh.value());

// 2. Run the shared declarative refresh steps with document and value.
document->shared_declarative_refresh_steps(value, nullptr);
Expand Down
15 changes: 9 additions & 6 deletions Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Methods.h>
#include <LibWeb/Infra/ByteSequences.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/MimeSniff/MimeType.h>

Expand All @@ -45,8 +46,8 @@ requires(IsSameIgnoringCV<T, u8>) struct CaseInsensitiveBytesTraits : public Tra
Header Header::from_string_pair(StringView name, StringView value)
{
return Header {
.name = MUST(ByteBuffer::copy(name.bytes())),
.value = MUST(ByteBuffer::copy(value.bytes())),
.name = Infra::isomorphic_encode(name),
.value = Infra::isomorphic_encode(value),
};
}

Expand Down Expand Up @@ -128,7 +129,7 @@ Optional<Vector<String>> get_decode_and_split_header_value(ReadonlyBytes value)
// To get, decode, and split a header value value, run these steps:

// 1. Let input be the result of isomorphic decoding value.
auto input = StringView { value };
auto input = Infra::isomorphic_decode(value);

// 2. Let position be a position variable for input, initially pointing at the start of input.
auto lexer = GenericLexer { input };
Expand Down Expand Up @@ -523,7 +524,8 @@ bool is_cors_safelisted_request_header(Header const& header)
return false;

// 2. Let mimeType be the result of parsing the result of isomorphic decoding value.
auto mime_type = MimeSniff::MimeType::parse(StringView { value });
auto decoded = Infra::isomorphic_decode(value);
auto mime_type = MimeSniff::MimeType::parse(decoded);

// 3. If mimeType is failure, then return false.
if (!mime_type.has_value())
Expand Down Expand Up @@ -726,6 +728,7 @@ bool is_forbidden_request_header(Header const& header)
auto parsed_values = get_decode_and_split_header_value(header.value);

// 2. For each method of parsedValues: if the isomorphic encoding of method is a forbidden method, then return true.
// Note: The values returned from get_decode_and_split_header_value have already been decoded.
if (parsed_values.has_value() && any_of(*parsed_values, [](auto method) { return is_forbidden_method(method.bytes()); }))
return true;
}
Expand Down Expand Up @@ -826,10 +829,10 @@ Variant<Vector<ByteBuffer>, ExtractHeaderParseFailure, Empty> extract_header_lis
Optional<RangeHeaderValue> parse_single_range_header_value(ReadonlyBytes value)
{
// 1. Let data be the isomorphic decoding of value.
auto data = StringView { value };
auto data = Infra::isomorphic_decode(value);

// 2. If data does not start with "bytes=", then return failure.
if (!data.starts_with("bytes="sv))
if (!data.starts_with_bytes("bytes="sv))
return {};

// 3. Let position be a position variable for data, initially pointing at the 6th code point of data.
Expand Down
3 changes: 2 additions & 1 deletion Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <AK/Base64.h>
#include <LibURL/URL.h>
#include <LibWeb/Fetch/Infrastructure/URL.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/MimeSniff/MimeType.h>

namespace Web::Fetch::Infrastructure {
Expand Down Expand Up @@ -75,7 +76,7 @@ ErrorOr<DataURL> process_data_url(URL::URL const& data_url)
trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right);
if (trimmed_substring_view.ends_with(';')) {
// 1. Let stringBody be the isomorphic decode of body.
auto string_body = StringView(body);
auto string_body = Infra::isomorphic_decode(body);

// 2. Set body to the forgiving-base64 decode of stringBody.
// 3. If body is failure, then return failure.
Expand Down
8 changes: 3 additions & 5 deletions Userland/Libraries/LibWeb/HTML/WindowOrWorkerGlobalScope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include <AK/Vector.h>
#include <LibJS/Heap/HeapFunction.h>
#include <LibJS/Runtime/Array.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/Crypto/Crypto.h>
#include <LibWeb/Fetch/FetchMethod.h>
Expand All @@ -34,6 +33,7 @@
#include <LibWeb/HighResolutionTime/Performance.h>
#include <LibWeb/HighResolutionTime/SupportedPerformanceTypes.h>
#include <LibWeb/IndexedDB/IDBFactory.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/PerformanceTimeline/EntryTypes.h>
#include <LibWeb/PerformanceTimeline/PerformanceObserver.h>
#include <LibWeb/PerformanceTimeline/PerformanceObserverEntryList.h>
Expand Down Expand Up @@ -139,10 +139,8 @@ WebIDL::ExceptionOr<String> WindowOrWorkerGlobalScopeMixin::atob(String const& d
return WebIDL::InvalidCharacterError::create(realm, "Input string is not valid base64 data"_string);

// 3. Return decodedData.
// decode_base64() returns a byte buffer. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
auto decoder = TextCodec::decoder_for_exact_name("ISO-8859-1"sv);
VERIFY(decoder.has_value());
return TRY_OR_THROW_OOM(vm, decoder->to_utf8(decoded_data.value()));
// decode_base64() returns a byte buffer. LibJS uses UTF-8 for strings. Use isomorphic decoding to convert bytes to UTF-8.
return Infra::isomorphic_decode(decoded_data.value());
}

// https://html.spec.whatwg.org/multipage/timers-and-user-prompts.html#dom-queuemicrotask
Expand Down
21 changes: 21 additions & 0 deletions Userland/Libraries/LibWeb/Infra/Strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,25 @@ ErrorOr<String> to_ascii_uppercase(StringView string)
return string_builder.to_string();
}

// https://infra.spec.whatwg.org/#isomorphic-encode
ByteBuffer isomorphic_encode(StringView input)
{
ByteBuffer buf = {};
for (auto code_point : Utf8View { input }) {
VERIFY(code_point <= 0xFF);
buf.append((u8)code_point);
}
return buf;
}

// https://infra.spec.whatwg.org/#isomorphic-decode
String isomorphic_decode(ReadonlyBytes input)
{
StringBuilder builder(input.size());
for (u8 code_point : input) {
builder.append_code_point(code_point);
}
return builder.to_string_without_validation();
}

}
2 changes: 2 additions & 0 deletions Userland/Libraries/LibWeb/Infra/Strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,7 @@ bool is_code_unit_prefix(StringView potential_prefix, StringView input);
ErrorOr<String> convert_to_scalar_value_string(StringView string);
ErrorOr<String> to_ascii_lowercase(StringView string);
ErrorOr<String> to_ascii_uppercase(StringView string);
ByteBuffer isomorphic_encode(StringView input);
String isomorphic_decode(ReadonlyBytes input);

}

0 comments on commit 93f690d

Please sign in to comment.