Skip to content

Commit

Permalink
fix string escaping
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Sep 11, 2023
1 parent e9fc201 commit cc14776
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 37 deletions.
90 changes: 58 additions & 32 deletions src/string_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,49 +14,29 @@ impl AbstractStringDecoder for StringDecoder {

fn decode(data: &[u8], mut index: usize) -> JsonResult<(Self::Output, usize)> {
index += 1;
let mut chars = Vec::new();
let mut bytes = Vec::new();
let start = index;
while let Some(next) = data.get(index) {
match next {
b'"' => {
index += 1;
let s = unsafe { String::from_utf8_unchecked(chars) };
let s = unsafe { String::from_utf8_unchecked(bytes) };
return Ok((s, index));
}
b'\\' => {
index += 1;
if let Some(next_inner) = data.get(index) {
match next_inner {
b'"' | b'\\' | b'/' => chars.push(*next_inner),
b'b' => chars.push(b'\x08'),
b'f' => chars.push(b'\x0C'),
b'n' => chars.push(b'\n'),
b'r' => chars.push(b'\r'),
b't' => chars.push(b'\t'),
b'"' | b'\\' | b'/' => bytes.push(*next_inner),
b'b' => bytes.push(b'\x08'),
b'f' => bytes.push(b'\x0C'),
b'n' => bytes.push(b'\n'),
b'r' => bytes.push(b'\r'),
b't' => bytes.push(b'\t'),
b'u' => {
let mut n = 0;
for _ in 0..4 {
index += 1;
let c = match data.get(index) {
Some(c) => *c,
None => return Err(JsonError::InvalidString(index - start)),
};
let hex = match c {
b'0'..=b'9' => (c & 0x0f) as u16,
b'a'..=b'f' => (c - b'a' + 10) as u16,
b'A'..=b'F' => (c - b'A' + 10) as u16,
_ => return Err(JsonError::InvalidStringEscapeSequence(index)),
};
n = (n << 4) + hex;
}
match char::from_u32(n as u32) {
Some(c) => {
for b in c.to_string().bytes() {
chars.push(b);
}
}
None => return Err(JsonError::InvalidString(index - start)),
}
let (c, new_index) = parse_escape(data, index, start)?;
index = new_index;
bytes.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
}
_ => return Err(JsonError::InvalidString(index - start)),
}
Expand All @@ -66,14 +46,60 @@ impl AbstractStringDecoder for StringDecoder {
}
// all values below 32 are invalid
next if *next < 32u8 => return Err(JsonError::InvalidString(index - start)),
_ => chars.push(*next),
_ => bytes.push(*next),
}
index += 1;
}
Err(JsonError::UnexpectedEnd)
}
}

/// Taken from https://github.com/serde-rs/json/blob/45f10ec816e3f2765ac08f7ca73752326b0475d7/src/read.rs#L873-L928
fn parse_escape(data: &[u8], index: usize, start: usize) -> JsonResult<(char, usize)> {
let (n, index) = parse_u4(data, index, start)?;
match n {
0xDC00..=0xDFFF => Err(JsonError::InvalidStringEscapeSequence(index - start)),
0xD800..=0xDBFF => match (data.get(index + 1), data.get(index + 2)) {
(Some(b'\\'), Some(b'u')) => {
let (n2, index) = parse_u4(data, index + 2, start)?;
if !(0xDC00..=0xDFFF).contains(&n2) {
return Err(JsonError::InvalidStringEscapeSequence(index - start));
}
let n2 = (((n - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;

match char::from_u32(n2) {
Some(c) => Ok((c, index)),
None => Err(JsonError::InvalidString(index - start)),
}
}
_ => Err(JsonError::InvalidStringEscapeSequence(index - start)),
},
_ => match char::from_u32(n as u32) {
Some(c) => Ok((c, index)),
None => Err(JsonError::InvalidString(index - start)),
},
}
}

fn parse_u4(data: &[u8], mut index: usize, start: usize) -> JsonResult<(u16, usize)> {
let mut n = 0;
for _ in 0..4 {
index += 1;
let c = match data.get(index) {
Some(c) => *c,
None => return Err(JsonError::InvalidString(index - start)),
};
let hex = match c {
b'0'..=b'9' => (c & 0x0f) as u16,
b'a'..=b'f' => (c - b'a' + 10) as u16,
b'A'..=b'F' => (c - b'A' + 10) as u16,
_ => return Err(JsonError::InvalidStringEscapeSequence(index - start)),
};
n = (n << 4) + hex;
}
Ok((n, index))
}

pub struct StringDecoderRange;

impl AbstractStringDecoder for StringDecoderRange {
Expand Down
23 changes: 18 additions & 5 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,12 @@ single_tests! {
float_exp_pos: ok => "2.2e+10", "Float(22000000000) @ 1:1";
// NOTICE - this might be brittle, if so move to to a separate test
float_exp_neg: ok => "2.2e-2", "Float(0.022000000000000002) @ 1:1";
float_exp_zero: ok => "0.000e123", "Float(0) @ 1:1";
float_exp_massive1: ok => "2e2147483647", "Float(inf) @ 1:1";
float_exp_massive2: ok => "2e2147483648", "Float(inf) @ 1:1";
float_exp_massive3: ok => "2e2147483646", "Float(inf) @ 1:1";
float_exp_massive4: ok => "2e2147483646", "Float(inf) @ 1:1";
float_exp_massive5: ok => "18446744073709551615000.0", "Float(18446744073709552000000) @ 1:1";
float_exp_tiny0: ok => "2e-2147483647", "Float(0) @ 1:1";
float_exp_tiny1: ok => "2e-2147483648", "Float(0) @ 1:1";
float_exp_tiny2: ok => "2e-2147483646", "Float(0) @ 1:1";
Expand Down Expand Up @@ -227,7 +230,7 @@ string_tests! {
newline: r#" "foo\nbar" "# => "foo\nbar";
pound_sign: r#" "\u00a3" "# => "£";
double_quote: r#" "\"" "# => r#"""#;
backslash: r#""\\""# => r#"\"#;
backslash: r#""\\""# => r"\";
controls: "\"\\b\\f\\n\\r\\t\"" => "\u{8}\u{c}\n\r\t";
controls_python: "\"\\b\\f\\n\\r\\t\"" => "\x08\x0c\n\r\t"; // python notation for the same thing
}
Expand Down Expand Up @@ -335,6 +338,16 @@ fn good_high_order_string() {
}
}

#[test]
fn udb_string() {
let bytes: Vec<u8> = vec![34, 92, 117, 100, 66, 100, 100, 92, 117, 100, 70, 100, 100, 34];
let v = JsonValue::parse(&bytes).unwrap();
match v {
JsonValue::String(s) => assert_eq!(s.as_bytes(), [244, 135, 159, 157]),
_ => panic!("unexpected valid {v:?}"),
}
}

#[test]
fn parse_object() {
let json = r#"{"foo": "bar", "spam": [1, null, true]}"#;
Expand Down Expand Up @@ -428,11 +441,11 @@ fn jiter_object() {
assert_eq!(jiter.next_key().unwrap(), Some("spam".to_string()));
assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1')));
assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1));
assert_eq!(jiter.array_step().unwrap(), true);
assert!(jiter.array_step().unwrap());
assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(2));
assert_eq!(jiter.array_step().unwrap(), true);
assert!(jiter.array_step().unwrap());
assert_eq!(jiter.next_bytes().unwrap(), b"x");
assert_eq!(jiter.array_step().unwrap(), false);
assert!(!jiter.array_step().unwrap());
assert_eq!(jiter.next_key().unwrap(), None);
jiter.finish().unwrap();
}
Expand All @@ -449,7 +462,7 @@ fn jiter_trailing_bracket() {
let mut jiter = Jiter::new(b"[1]]");
assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1')));
assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1));
assert_eq!(jiter.array_step().unwrap(), false);
assert!(!jiter.array_step().unwrap());
let result = jiter.finish();
match result {
Ok(t) => panic!("unexpectedly valid: {:?}", t),
Expand Down

0 comments on commit cc14776

Please sign in to comment.