From 56647683444406ca9886e29ef75af39961a2c674 Mon Sep 17 00:00:00 2001 From: Feng Yu Date: Thu, 12 Dec 2024 10:26:41 -0800 Subject: [PATCH] LibWeb: Avoid re-encoding response headers isomorphic encoding a value that has already been encoded will result in garbage data. `response_headers` is already encoded in ISO-8859-1/latin1, we cannot use `from_string_pair`, as it triggers ISO-8859-1/latin1 encoding. Follow-up of https://github.com/LadybirdBrowser/ladybird/pull/1893 --- Libraries/LibWeb/Fetch/Fetching/Fetching.cpp | 6 +++--- Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp | 8 ++++++++ Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h | 1 + Libraries/LibWeb/Infra/Strings.cpp | 8 +++++++- .../LibWeb/Text/expected/http-non-ascii-content-type.txt | 1 + Tests/LibWeb/Text/input/http-non-ascii-content-type.html | 3 ++- 6 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp index a576937d1dda7..9da3be3cfdb23 100644 --- a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp +++ b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp @@ -2330,7 +2330,7 @@ WebIDL::ExceptionOr> nonstandard_resource_loader_file_o } for (auto const& [name, value] : response_headers.headers()) { - auto header = Infrastructure::Header::from_string_pair(name, value); + auto header = Infrastructure::Header::from_latin1_pair(name, value); response->header_list()->append(move(header)); } @@ -2396,7 +2396,7 @@ WebIDL::ExceptionOr> nonstandard_resource_loader_file_o response->set_status(status_code.value_or(200)); response->set_body(move(body)); for (auto const& [name, value] : response_headers.headers()) { - auto header = Infrastructure::Header::from_string_pair(name, value); + auto header = Infrastructure::Header::from_latin1_pair(name, value); response->header_list()->append(move(header)); } @@ -2421,7 +2421,7 @@ WebIDL::ExceptionOr> nonstandard_resource_loader_file_o auto [body, _] = TRY_OR_IGNORE(extract_body(realm, data)); response->set_body(move(body)); for (auto const& [name, value] : response_headers.headers()) { - auto header = Infrastructure::Header::from_string_pair(name, value); + auto header = Infrastructure::Header::from_latin1_pair(name, value); response->header_list()->append(move(header)); } diff --git a/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp b/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp index 8f2be78b19ffa..c8e1d98c291b4 100644 --- a/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp +++ b/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp @@ -58,6 +58,14 @@ Header Header::from_string_pair(StringView name, StringView value) }; } +Header Header::from_latin1_pair(StringView name, StringView value) +{ + return Header { + .name = MUST(ByteBuffer::copy(name.bytes())), + .value = MUST(ByteBuffer::copy(value.bytes())), + }; +} + GC::Ref HeaderList::create(JS::VM& vm) { return vm.heap().allocate(); diff --git a/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h b/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h index f032aeb1fa257..7dc9a6c508288 100644 --- a/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h +++ b/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h @@ -29,6 +29,7 @@ struct Header { [[nodiscard]] static Header copy(Header const&); [[nodiscard]] static Header from_string_pair(StringView, StringView); + [[nodiscard]] static Header from_latin1_pair(StringView, StringView); }; // https://fetch.spec.whatwg.org/#concept-header-list diff --git a/Libraries/LibWeb/Infra/Strings.cpp b/Libraries/LibWeb/Infra/Strings.cpp index 2b71dffbf699e..f5e24501c42ef 100644 --- a/Libraries/LibWeb/Infra/Strings.cpp +++ b/Libraries/LibWeb/Infra/Strings.cpp @@ -147,9 +147,12 @@ ErrorOr to_ascii_uppercase(StringView string) // https://infra.spec.whatwg.org/#isomorphic-encode ByteBuffer isomorphic_encode(StringView input) { + // To isomorphic encode an isomorphic string input: return a byte sequence whose length is equal to input’s code + // point length and whose bytes have the same values as the values of input’s code points, in the same order. + // NOTE: This is essentially spec-speak for "Encode as ISO-8859-1 / Latin-1". ByteBuffer buf = {}; for (auto code_point : Utf8View { input }) { - // VERIFY(code_point <= 0xFF); + VERIFY(code_point <= 0xFF); if (code_point > 0xFF) dbgln("FIXME: Trying to isomorphic encode a string with code points > U+00FF."); buf.append((u8)code_point); @@ -160,6 +163,9 @@ ByteBuffer isomorphic_encode(StringView input) // https://infra.spec.whatwg.org/#isomorphic-decode String isomorphic_decode(ReadonlyBytes input) { + // To isomorphic decode a byte sequence input, return a string whose code point length is equal + // to input’s length and whose code points have the same values as the values of input’s bytes, in the same order. + // NOTE: This is essentially spec-speak for "Decode as ISO-8859-1 / Latin-1". StringBuilder builder(input.size()); for (u8 code_point : input) { builder.append_code_point(code_point); diff --git a/Tests/LibWeb/Text/expected/http-non-ascii-content-type.txt b/Tests/LibWeb/Text/expected/http-non-ascii-content-type.txt index aaecaf93c4a5b..b69ff2c8a83e4 100644 --- a/Tests/LibWeb/Text/expected/http-non-ascii-content-type.txt +++ b/Tests/LibWeb/Text/expected/http-non-ascii-content-type.txt @@ -1 +1,2 @@ +Content-Type:text/html;test=ÿ;charset=gbk PASS (didn't crash) diff --git a/Tests/LibWeb/Text/input/http-non-ascii-content-type.html b/Tests/LibWeb/Text/input/http-non-ascii-content-type.html index d37a392ec369a..a725ea8370553 100644 --- a/Tests/LibWeb/Text/input/http-non-ascii-content-type.html +++ b/Tests/LibWeb/Text/input/http-non-ascii-content-type.html @@ -12,7 +12,8 @@ }, }); - const blob = await fetch(url).then((response) => response.blob()); + const headers = await fetch(url).then((response) => response.headers); + println("Content-Type:" + headers.get("Content-Type")); println("PASS (didn't crash)"); } catch (err) { println("FAIL - " + err);