From 069df4c6cb6e221f0a53af0f114544ab57f9d0fb Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 24 Jan 2024 17:18:16 +0100 Subject: [PATCH 01/16] Checkpoint --- .gitignore | 2 - Cargo.lock | 16 + Cargo.toml | 7 + src/de.rs | 1069 +++++++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 193 +++++++++ src/lib.rs | 536 +++++++++++++++++++++++++ src/tokens.rs | 534 ++++++++++++++++++++++++ 7 files changed, 2355 insertions(+), 2 deletions(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/de.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/tokens.rs diff --git a/.gitignore b/.gitignore index 2f88dba..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1 @@ /target -**/*.rs.bk -Cargo.lock \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..839e01b --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + +[[package]] +name = "toml-file" +version = "0.1.0" +dependencies = [ + "smallvec", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f3b62cd --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "toml-file" +version = "0.1.0" +edition = "2021" + +[dependencies] +smallvec = "1.13" diff --git a/src/de.rs b/src/de.rs new file mode 100644 index 0000000..3c391d2 --- /dev/null +++ b/src/de.rs @@ -0,0 +1,1069 @@ +use super::{ + error::{Error, ErrorKind}, + tokens::{Error as TokenError, Token, Tokenizer}, + Key, Span, Value, ValueInner, +}; +use smallvec::SmallVec; +use std::{ + borrow::Cow, + collections::{btree_map::Entry, BTreeMap, BTreeSet}, + fmt::Display, + iter, str, vec, +}; + +type DeStr<'de> = Cow<'de, str>; +type TablePair<'de> = (Key<'de>, Val<'de>); +type InlineVec = SmallVec<[T; 5]>; + +pub fn from_str(s: &str) -> Result, Error> { + let mut de = Deserializer::new(s); + + let mut tables = de.tables()?; + let table_indices = build_table_indices(&tables); + let table_pindices = build_table_pindices(&tables); + + let root_ctx = Ctx { + depth: 0, + cur: 0, + cur_parent: 0, + table_indices: &table_indices, + table_pindices: &table_pindices, + de: &de, + values: None, + }; + + let mut root = super::Table::new(); + deserialize_table(root_ctx, &mut tables, &mut root)?; + + Ok(root) +} + +struct Deserializer<'a> { + input: &'a str, + tokens: Tokenizer<'a>, +} + +struct Ctx<'de, 'b> { + depth: usize, + cur: usize, + cur_parent: usize, + //max: usize, + table_indices: &'b BTreeMap>, Vec>, + table_pindices: &'b BTreeMap>, Vec>, + de: &'b Deserializer<'de>, + values: Option>>, + //array: bool, +} + +impl<'de, 'b> Ctx<'de, 'b> { + #[inline] + fn error(&self, at: usize, kind: ErrorKind) -> Error { + self.de.error(at, kind) + } +} + +fn deserialize_table<'de, 'b>( + mut ctx: Ctx<'de, 'b>, + tables: &'b mut [Table<'de>], + table: &mut super::Table<'de>, +) -> Result { + let max = tables.len(); + + while ctx.cur_parent < max && ctx.cur < max { + if let Some(values) = ctx.values.take() { + for (key, val) in values { + table_insert(table, key, val)?; + } + } + + let next_table = { + let prefix_stripped: InlineVec<_> = tables[ctx.cur_parent].header[..ctx.depth] + .iter() + .map(|v| v.name.clone()) + .collect::>(); + ctx.table_pindices + .get(&prefix_stripped) + .and_then(|entries| { + let start = entries.binary_search(&ctx.cur).unwrap_or_else(|v| v); + if start == entries.len() || entries[start] < ctx.cur { + return None; + } + entries[start..].iter().find_map(|i| { + let i = *i; + (i < max && tables[i].values.is_some()).then_some(i) + }) + }) + }; + + let Some(pos) = next_table else { + break; + }; + + ctx.cur = dbg!(pos); + + // Test to see if we're duplicating our parent's table, and if so + // then this is an error in the toml format + if ctx.cur_parent != pos { + if tables[ctx.cur_parent].header == tables[pos].header { + let at = tables[pos].at; + let name = tables[pos].header.iter().fold(String::new(), |mut s, k| { + if !s.is_empty() { + s.push('.'); + } + s.push_str(&k.name); + s + }); + return Err(ctx.error(at, ErrorKind::DuplicateTable(name))); + } + + // If we're here we know we should share the same prefix, and if + // the longer table was defined first then we want to narrow + // down our parent's length if possible to ensure that we catch + // duplicate tables defined afterwards. + let parent_len = tables[ctx.cur_parent].header.len(); + let cur_len = tables[pos].header.len(); + if cur_len < parent_len { + ctx.cur_parent = pos; + } + } + + let ttable = &mut tables[pos]; + + // If we're not yet at the appropriate depth for this table then we + // just next the next portion of its header and then continue + // decoding. + if ctx.depth != ttable.header.len() { + let key = ttable.header[ctx.depth].clone(); + dbg!(table.keys().map(|k| k.name.as_ref()).collect::>()); + if table.contains_key(dbg!(&key)) { + return Err(Error::from_kind( + Some(key.span.start), + ErrorKind::DuplicateKey(key.name.to_string()), + )); + } + + let array = dbg!(ttable.array && ctx.depth == ttable.header.len() - 1); + + ctx.cur += 1; + dbg!(ctx.cur); + + let cctx = Ctx { + depth: ctx.depth + 1, //if array { 0 } else { 1 }, + cur: 0, + cur_parent: dbg!(pos), + table_indices: ctx.table_indices, + table_pindices: ctx.table_pindices, + de: ctx.de, + values: array.then(|| ttable.values.take().unwrap()), + }; + + let value = if array { + let mut arr = Vec::new(); + deserialize_array(cctx, tables, &mut arr)?; + ValueInner::Array(arr) + } else { + let mut tab = super::Table::new(); + deserialize_table(cctx, tables, &mut tab)?; + ValueInner::Table(tab) + }; + + table.insert( + key, + Value { + value, + span: Span::new(0, 0), + }, + ); + continue; + } + + // Rule out cases like: + // + // [[foo.bar]] + // [[foo]] + if ttable.array { + return Err(ctx.error(ttable.at, ErrorKind::RedefineAsArray)); + } + + ctx.values = ttable.values.take(); + } + + Ok(ctx.cur_parent) +} + +fn to_value(val: Val<'_>) -> Result, Error> { + let value = match val.e { + E::String(s) => ValueInner::String(s), + E::Boolean(b) => ValueInner::Boolean(b), + E::Integer(i) => ValueInner::Integer(i), + E::Float(f) => ValueInner::Float(f), + E::Array(arr) => { + let mut varr = Vec::new(); + for val in arr { + varr.push(to_value(val)?); + } + ValueInner::Array(varr) + } + E::DottedTable(tab) | E::InlineTable(tab) => { + let mut ntable = super::Table::new(); + + for (k, v) in tab { + table_insert(&mut ntable, k, v)?; + } + + ValueInner::Table(ntable) + } + }; + + Ok(Value { + value, + span: Span::new(val.start, val.end), + }) +} + +fn table_insert<'de>( + table: &mut super::Table<'de>, + key: Key<'de>, + val: Val<'de>, +) -> Result<(), Error> { + match table.entry(key.clone()) { + Entry::Occupied(occ) => { + return Err(Error::from_kind( + Some(key.span.start), + ErrorKind::DuplicateKey(key.name.to_string()), + )); + } + Entry::Vacant(vac) => { + vac.insert(to_value(val)?); + } + } + + Ok(()) +} + +fn deserialize_array<'de, 'b>( + mut ctx: Ctx<'de, 'b>, + tables: &'b mut [Table<'de>], + arr: &mut Vec>, +) -> Result { + let max = tables.len(); + + if let Some(values) = ctx.values.take() { + for (key, val) in values { + dbg!(key); + arr.push(to_value(val)?); + } + } + + while ctx.cur_parent < max { + let header_stripped = tables[ctx.cur_parent] + .header + .iter() + .map(|v| v.name.clone()) + .collect::>(); + let start_idx = ctx.cur_parent + 1; + let next = ctx + .table_indices + .get(&header_stripped) + .and_then(|entries| { + let start = entries.binary_search(&start_idx).unwrap_or_else(|v| v); + if start == entries.len() || entries[start] < start_idx { + return None; + } + entries[start..] + .iter() + .filter_map(|i| if *i < max { Some(*i) } else { None }) + .map(|i| (i, &tables[i])) + .find(|(_, table)| table.array) + .map(|p| p.0) + }) + .unwrap_or(max); + + if let Some(values) = tables[ctx.cur_parent].values.take() { + for (key, val) in values { + dbg!(key); + arr.push(to_value(val)?); + } + } + + ctx.cur_parent = dbg!(next); + } + + Ok(ctx.cur_parent) +} + +// Builds a datastructure that allows for efficient sublinear lookups. The +// returned BTreeMap contains a mapping from table header (like [a.b.c]) to list +// of tables with that precise name. The tables are being identified by their +// index in the passed slice. We use a list as the implementation uses this data +// structure for arrays as well as tables, so if any top level [[name]] array +// contains multiple entries, there are multiple entries in the list. The lookup +// is performed in the `SeqAccess` implementation of `MapVisitor`. The lists are +// ordered, which we exploit in the search code by using bisection. +fn build_table_indices<'de>(tables: &[Table<'de>]) -> BTreeMap>, Vec> { + let mut res = BTreeMap::new(); + for (i, table) in tables.iter().enumerate() { + let header = table + .header + .iter() + .map(|v| v.name.clone()) + .collect::>(); + res.entry(header).or_insert_with(Vec::new).push(i); + } + res +} + +// Builds a datastructure that allows for efficient sublinear lookups. The +// returned BTreeMap contains a mapping from table header (like [a.b.c]) to list +// of tables whose name at least starts with the specified name. So searching +// for [a.b] would give both [a.b.c.d] as well as [a.b.e]. The tables are being +// identified by their index in the passed slice. +// +// A list is used for two reasons: First, the implementation also stores arrays +// in the same data structure and any top level array of size 2 or greater +// creates multiple entries in the list with the same shared name. Second, there +// can be multiple tables sharing the same prefix. +// +// The lookup is performed in the `MapAccess` implementation of `MapVisitor`. +// The lists are ordered, which we exploit in the search code by using +// bisection. +fn build_table_pindices<'de>(tables: &[Table<'de>]) -> BTreeMap>, Vec> { + let mut res = BTreeMap::new(); + for (i, table) in tables.iter().enumerate() { + let header = table + .header + .iter() + .map(|v| v.name.clone()) + .collect::>(); + for len in 0..=header.len() { + res.entry(header[..len].into()) + .or_insert_with(Vec::new) + .push(i); + } + } + res +} + +// fn headers_equal(hdr_a: &[Key<'_>], hdr_b: &[Key<'_>]) -> bool { +// if hdr_a.len() != hdr_b.len() { +// return false; +// } +// hdr_a.iter().zip(hdr_b.iter()).all(|(h1, h2)| h1.1 == h2.1) +// } + +#[derive(Debug)] +struct Table<'de> { + at: usize, + header: InlineVec>, + values: Option>>, + array: bool, +} + +impl<'a> Deserializer<'a> { + fn new(input: &'a str) -> Deserializer<'a> { + Deserializer { + tokens: Tokenizer::new(input), + input, + } + } + + fn tables(&mut self) -> Result>, Error> { + let mut tables = Vec::new(); + let mut cur_table = Table { + at: 0, + header: InlineVec::new(), + values: None, + array: false, + }; + + while let Some(line) = self.line()? { + match line { + Line::Table { + at, + mut header, + array, + } => { + if !cur_table.header.is_empty() || cur_table.values.is_some() { + tables.push(cur_table); + } + cur_table = Table { + at, + header: InlineVec::new(), + values: Some(Vec::new()), + array, + }; + loop { + let part = header.next().map_err(|e| self.token_error(e)); + match part? { + Some(part) => cur_table.header.push(part), + None => break, + } + } + } + Line::KeyValue(key, value) => { + if cur_table.values.is_none() { + cur_table.values = Some(Vec::new()); + } + self.add_dotted_key(key, value, cur_table.values.as_mut().unwrap())?; + } + } + } + if !cur_table.header.is_empty() || cur_table.values.is_some() { + tables.push(cur_table); + } + Ok(tables) + } + + fn line(&mut self) -> Result>, Error> { + loop { + self.eat_whitespace(); + if self.eat_comment()? { + continue; + } + if self.eat(Token::Newline)? { + continue; + } + break; + } + + match self.peek()? { + Some((_, Token::LeftBracket)) => self.table_header().map(Some), + Some(_) => self.key_value().map(Some), + None => Ok(None), + } + } + + fn table_header(&mut self) -> Result, Error> { + let start = self.tokens.current(); + self.expect(Token::LeftBracket)?; + let array = self.eat(Token::LeftBracket)?; + let ret = Header::new(self.tokens.clone(), array); + self.tokens.skip_to_newline(); + Ok(Line::Table { + at: start, + header: ret, + array, + }) + } + + fn key_value(&mut self) -> Result, Error> { + let key = self.dotted_key()?; + self.eat_whitespace(); + self.expect(Token::Equals)?; + self.eat_whitespace(); + + let value = self.value()?; + self.eat_whitespace(); + if !self.eat_comment()? { + self.eat_newline_or_eof()?; + } + + Ok(Line::KeyValue(key, value)) + } + + fn value(&mut self) -> Result, Error> { + let at = self.tokens.current(); + let value = match self.next()? { + Some((Span { start, end }, Token::String { val, .. })) => Val { + e: E::String(val), + start, + end, + }, + Some((Span { start, end }, Token::Keylike("true"))) => Val { + e: E::Boolean(true), + start, + end, + }, + Some((Span { start, end }, Token::Keylike("false"))) => Val { + e: E::Boolean(false), + start, + end, + }, + Some((span, Token::Keylike(key))) => self.parse_keylike(at, span, key)?, + Some((span, Token::Plus)) => self.number_leading_plus(span)?, + Some((Span { start, .. }, Token::LeftBrace)) => { + self.inline_table().map(|(Span { end, .. }, table)| Val { + e: E::InlineTable(table), + start, + end, + })? + } + Some((Span { start, .. }, Token::LeftBracket)) => { + self.array().map(|(Span { end, .. }, array)| Val { + e: E::Array(array), + start, + end, + })? + } + Some(token) => { + return Err(self.error( + at, + ErrorKind::Wanted { + expected: "a value", + found: token.1.describe(), + }, + )); + } + None => return Err(self.eof()), + }; + Ok(value) + } + + fn parse_keylike(&mut self, at: usize, span: Span, key: &'a str) -> Result, Error> { + if key == "inf" || key == "nan" { + return self.number(span, key); + } + + let first_char = key.chars().next().expect("key should not be empty here"); + match first_char { + '-' | '0'..='9' => self.number(span, key), + _ => Err(self.error(at, ErrorKind::UnquotedString)), + } + } + + fn number(&mut self, Span { start, end }: Span, s: &'a str) -> Result, Error> { + let to_integer = |f| Val { + e: E::Integer(f), + start, + end, + }; + if let Some(s) = s.strip_prefix("0x") { + self.integer(s, 16).map(to_integer) + } else if let Some(s) = s.strip_prefix("0o") { + self.integer(s, 8).map(to_integer) + } else if let Some(s) = s.strip_prefix("0b") { + self.integer(s, 2).map(to_integer) + } else if s.contains('e') || s.contains('E') { + self.float(s, None).map(|f| Val { + e: E::Float(f), + start, + end, + }) + } else if self.eat(Token::Period)? { + let at = self.tokens.current(); + match self.next()? { + Some((Span { start, end }, Token::Keylike(after))) => { + self.float(s, Some(after)).map(|f| Val { + e: E::Float(f), + start, + end, + }) + } + _ => Err(self.error(at, ErrorKind::NumberInvalid)), + } + } else if s == "inf" { + Ok(Val { + e: E::Float(f64::INFINITY), + start, + end, + }) + } else if s == "-inf" { + Ok(Val { + e: E::Float(f64::NEG_INFINITY), + start, + end, + }) + } else if s == "nan" { + Ok(Val { + e: E::Float(f64::NAN.copysign(1.0)), + start, + end, + }) + } else if s == "-nan" { + Ok(Val { + e: E::Float(f64::NAN.copysign(-1.0)), + start, + end, + }) + } else { + self.integer(s, 10).map(to_integer) + } + } + + fn number_leading_plus(&mut self, Span { start, .. }: Span) -> Result, Error> { + let start_token = self.tokens.current(); + match self.next()? { + Some((Span { end, .. }, Token::Keylike(s))) => self.number(Span { start, end }, s), + _ => Err(self.error(start_token, ErrorKind::NumberInvalid)), + } + } + + fn integer(&self, s: &'a str, radix: u32) -> Result { + let allow_sign = radix == 10; + let allow_leading_zeros = radix != 10; + let (prefix, suffix) = self.parse_integer(s, allow_sign, allow_leading_zeros, radix)?; + let start = self.tokens.substr_offset(s); + if !suffix.is_empty() { + return Err(self.error(start, ErrorKind::NumberInvalid)); + } + i64::from_str_radix(prefix.replace('_', "").trim_start_matches('+'), radix) + .map_err(|_e| self.error(start, ErrorKind::NumberInvalid)) + } + + fn parse_integer( + &self, + s: &'a str, + allow_sign: bool, + allow_leading_zeros: bool, + radix: u32, + ) -> Result<(&'a str, &'a str), Error> { + let start = self.tokens.substr_offset(s); + + let mut first = true; + let mut first_zero = false; + let mut underscore = false; + let mut end = s.len(); + for (i, c) in s.char_indices() { + let at = i + start; + if i == 0 && (c == '+' || c == '-') && allow_sign { + continue; + } + + if c == '0' && first { + first_zero = true; + } else if c.is_digit(radix) { + if !first && first_zero && !allow_leading_zeros { + return Err(self.error(at, ErrorKind::NumberInvalid)); + } + underscore = false; + } else if c == '_' && first { + return Err(self.error(at, ErrorKind::NumberInvalid)); + } else if c == '_' && !underscore { + underscore = true; + } else { + end = i; + break; + } + first = false; + } + if first || underscore { + return Err(self.error(start, ErrorKind::NumberInvalid)); + } + Ok((&s[..end], &s[end..])) + } + + fn float(&mut self, s: &'a str, after_decimal: Option<&'a str>) -> Result { + let (integral, mut suffix) = self.parse_integer(s, true, false, 10)?; + let start = self.tokens.substr_offset(integral); + + let mut fraction = None; + if let Some(after) = after_decimal { + if !suffix.is_empty() { + return Err(self.error(start, ErrorKind::NumberInvalid)); + } + let (a, b) = self.parse_integer(after, false, true, 10)?; + fraction = Some(a); + suffix = b; + } + + let mut exponent = None; + if suffix.starts_with('e') || suffix.starts_with('E') { + let (a, b) = if suffix.len() == 1 { + self.eat(Token::Plus)?; + match self.next()? { + Some((_, Token::Keylike(s))) => self.parse_integer(s, false, true, 10)?, + _ => return Err(self.error(start, ErrorKind::NumberInvalid)), + } + } else { + self.parse_integer(&suffix[1..], true, true, 10)? + }; + if !b.is_empty() { + return Err(self.error(start, ErrorKind::NumberInvalid)); + } + exponent = Some(a); + } else if !suffix.is_empty() { + return Err(self.error(start, ErrorKind::NumberInvalid)); + } + + let mut number = integral + .trim_start_matches('+') + .chars() + .filter(|c| *c != '_') + .collect::(); + if let Some(fraction) = fraction { + number.push('.'); + number.extend(fraction.chars().filter(|c| *c != '_')); + } + if let Some(exponent) = exponent { + number.push('E'); + number.extend(exponent.chars().filter(|c| *c != '_')); + } + number + .parse() + .map_err(|_e| self.error(start, ErrorKind::NumberInvalid)) + .and_then(|n: f64| { + if n.is_finite() { + Ok(n) + } else { + Err(self.error(start, ErrorKind::NumberInvalid)) + } + }) + } + + // TODO(#140): shouldn't buffer up this entire table in memory, it'd be + // great to defer parsing everything until later. + fn inline_table(&mut self) -> Result<(Span, Vec>), Error> { + let mut ret = Vec::new(); + self.eat_whitespace(); + if let Some(span) = self.eat_spanned(Token::RightBrace)? { + return Ok((span, ret)); + } + loop { + let key = self.dotted_key()?; + self.eat_whitespace(); + self.expect(Token::Equals)?; + self.eat_whitespace(); + let value = self.value()?; + self.add_dotted_key(key, value, &mut ret)?; + + self.eat_whitespace(); + if let Some(span) = self.eat_spanned(Token::RightBrace)? { + return Ok((span, ret)); + } + self.expect(Token::Comma)?; + self.eat_whitespace(); + } + } + + // TODO(#140): shouldn't buffer up this entire array in memory, it'd be + // great to defer parsing everything until later. + fn array(&mut self) -> Result<(Span, Vec>), Error> { + let mut ret = Vec::new(); + + let intermediate = |me: &mut Deserializer<'_>| -> Result<(), Error> { + loop { + me.eat_whitespace(); + if !me.eat(Token::Newline)? && !me.eat_comment()? { + break; + } + } + Ok(()) + }; + + loop { + intermediate(self)?; + if let Some(span) = self.eat_spanned(Token::RightBracket)? { + return Ok((span, ret)); + } + let value = self.value()?; + ret.push(value); + intermediate(self)?; + if !self.eat(Token::Comma)? { + break; + } + } + intermediate(self)?; + let span = self.expect_spanned(Token::RightBracket)?; + Ok((span, ret)) + } + + fn table_key(&mut self) -> Result, Error> { + self.tokens.table_key().map_err(|e| self.token_error(e)) + } + + fn dotted_key(&mut self) -> Result>, Error> { + let mut result = Vec::new(); + result.push(self.table_key()?); + self.eat_whitespace(); + while self.eat(Token::Period)? { + self.eat_whitespace(); + result.push(self.table_key()?); + self.eat_whitespace(); + } + Ok(result) + } + + /// Stores a value in the appropriate hierarchical structure positioned based on the dotted key. + /// + /// Given the following definition: `multi.part.key = "value"`, `multi` and `part` are + /// intermediate parts which are mapped to the relevant fields in the deserialized type's data + /// hierarchy. + /// + /// # Parameters + /// + /// * `key_parts`: Each segment of the dotted key, e.g. `part.one` maps to + /// `vec![Cow::Borrowed("part"), Cow::Borrowed("one")].` + /// * `value`: The parsed value. + /// * `values`: The `Vec` to store the value in. + fn add_dotted_key( + &self, + mut key_parts: Vec>, + value: Val<'a>, + values: &mut Vec>, + ) -> Result<(), Error> { + let key = key_parts.remove(0); + if key_parts.is_empty() { + values.push((key, value)); + return Ok(()); + } + match values + .iter_mut() + .find(|&&mut (ref k, _)| k.name == key.name) + { + Some(&mut ( + _, + Val { + e: E::DottedTable(ref mut v), + .. + }, + )) => { + return self.add_dotted_key(key_parts, value, v); + } + Some(&mut (_, Val { start, .. })) => { + return Err(self.error(start, ErrorKind::DottedKeyInvalidType)); + } + None => {} + } + // The start/end value is somewhat misleading here. + let table_values = Val { + e: E::DottedTable(Vec::new()), + start: value.start, + end: value.end, + }; + values.push((key, table_values)); + let last_i = values.len() - 1; + if let ( + _, + Val { + e: E::DottedTable(ref mut v), + .. + }, + ) = values[last_i] + { + self.add_dotted_key(key_parts, value, v)?; + } + Ok(()) + } + + fn eat_whitespace(&mut self) { + self.tokens.eat_whitespace(); + } + + fn eat_comment(&mut self) -> Result { + self.tokens.eat_comment().map_err(|e| self.token_error(e)) + } + + fn eat_newline_or_eof(&mut self) -> Result<(), Error> { + self.tokens + .eat_newline_or_eof() + .map_err(|e| self.token_error(e)) + } + + fn eat(&mut self, expected: Token<'a>) -> Result { + self.tokens.eat(expected).map_err(|e| self.token_error(e)) + } + + fn eat_spanned(&mut self, expected: Token<'a>) -> Result, Error> { + self.tokens + .eat_spanned(expected) + .map_err(|e| self.token_error(e)) + } + + fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> { + self.tokens + .expect(expected) + .map_err(|e| self.token_error(e)) + } + + fn expect_spanned(&mut self, expected: Token<'a>) -> Result { + self.tokens + .expect_spanned(expected) + .map_err(|e| self.token_error(e)) + } + + fn next(&mut self) -> Result)>, Error> { + self.tokens.next().map_err(|e| self.token_error(e)) + } + + fn peek(&mut self) -> Result)>, Error> { + self.tokens.peek().map_err(|e| self.token_error(e)) + } + + fn eof(&self) -> Error { + self.error(self.input.len(), ErrorKind::UnexpectedEof) + } + + fn token_error(&self, error: TokenError) -> Error { + match error { + TokenError::InvalidCharInString(at, ch) => { + self.error(at, ErrorKind::InvalidCharInString(ch)) + } + TokenError::InvalidEscape(at, ch) => self.error(at, ErrorKind::InvalidEscape(ch)), + TokenError::InvalidEscapeValue(at, v) => { + self.error(at, ErrorKind::InvalidEscapeValue(v)) + } + TokenError::InvalidHexEscape(at, ch) => self.error(at, ErrorKind::InvalidHexEscape(ch)), + TokenError::NewlineInString(at) => self.error(at, ErrorKind::NewlineInString), + TokenError::Unexpected(at, ch) => self.error(at, ErrorKind::Unexpected(ch)), + TokenError::UnterminatedString(at) => self.error(at, ErrorKind::UnterminatedString), + TokenError::NewlineInTableKey(at) => self.error(at, ErrorKind::NewlineInTableKey), + TokenError::Wanted { + at, + expected, + found, + } => self.error(at, ErrorKind::Wanted { expected, found }), + TokenError::MultilineStringKey(at) => self.error(at, ErrorKind::MultilineStringKey), + } + } + + fn error(&self, at: usize, kind: ErrorKind) -> Error { + let mut err = Error::from_kind(Some(at), kind); + err.fix_linecol(|at| self.to_linecol(at)); + err + } + + /// Converts a byte offset from an error message to a (line, column) pair + /// + /// All indexes are 0-based. + fn to_linecol(&self, offset: usize) -> (usize, usize) { + let mut cur = 0; + // Use split_terminator instead of lines so that if there is a `\r`, it + // is included in the offset calculation. The `+1` values below account + // for the `\n`. + for (i, line) in self.input.split_terminator('\n').enumerate() { + if cur + line.len() + 1 > offset { + return (i, offset - cur); + } + cur += line.len() + 1; + } + (self.input.lines().count(), 0) + } +} + +impl Error { + pub(crate) fn line_col(&self) -> Option<(usize, usize)> { + self.line.map(|line| (line, self.col)) + } + + fn from_kind(at: Option, kind: ErrorKind) -> Self { + Error { + kind, + line: None, + col: 0, + at, + message: String::new(), + key: Vec::new(), + } + } + + fn custom(at: Option, s: String) -> Self { + Error { + kind: ErrorKind::Custom, + line: None, + col: 0, + at, + message: s, + key: Vec::new(), + } + } + + pub(crate) fn add_key_context(&mut self, key: &str) { + self.key.insert(0, key.to_string()); + } + + fn fix_offset(&mut self, f: F) + where + F: FnOnce() -> Option, + { + // An existing offset is always better positioned than anything we might + // want to add later. + if self.at.is_none() { + self.at = f(); + } + } + + fn fix_linecol(&mut self, f: F) + where + F: FnOnce(usize) -> (usize, usize), + { + if let Some(at) = self.at { + let (line, col) = f(at); + self.line = Some(line); + self.col = col; + } + } +} + +impl std::convert::From for std::io::Error { + fn from(e: Error) -> Self { + std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()) + } +} + +enum Line<'a> { + Table { + at: usize, + header: Header<'a>, + array: bool, + }, + KeyValue(Vec>, Val<'a>), +} + +struct Header<'a> { + first: bool, + array: bool, + tokens: Tokenizer<'a>, +} + +impl<'a> Header<'a> { + fn new(tokens: Tokenizer<'a>, array: bool) -> Header<'a> { + Header { + first: true, + array, + tokens, + } + } + + fn next(&mut self) -> Result>, TokenError> { + self.tokens.eat_whitespace(); + + if self.first || self.tokens.eat(Token::Period)? { + self.first = false; + self.tokens.eat_whitespace(); + self.tokens.table_key().map(Some) + } else { + self.tokens.expect(Token::RightBracket)?; + if self.array { + self.tokens.expect(Token::RightBracket)?; + } + + self.tokens.eat_whitespace(); + if !self.tokens.eat_comment()? { + self.tokens.eat_newline_or_eof()?; + } + Ok(None) + } + } +} + +#[derive(Debug)] +struct Val<'a> { + e: E<'a>, + start: usize, + end: usize, +} + +#[derive(Debug)] +enum E<'a> { + Integer(i64), + Float(f64), + Boolean(bool), + String(DeStr<'a>), + Array(Vec>), + InlineTable(Vec>), + DottedTable(Vec>), +} + +impl<'a> E<'a> { + fn type_name(&self) -> &'static str { + match *self { + E::String(..) => "string", + E::Integer(..) => "integer", + E::Float(..) => "float", + E::Boolean(..) => "boolean", + E::Array(..) => "array", + E::InlineTable(..) => "inline table", + E::DottedTable(..) => "dotted table", + } + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..7ddbec9 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,193 @@ +use std::fmt::{self, Debug, Display}; + +/// Error that can occur when deserializing TOML. +#[derive(Debug)] +pub(super) struct Error { + pub(super) kind: ErrorKind, + pub(super) line: Option, + pub(super) col: usize, + pub(super) at: Option, + pub(super) message: String, + pub(super) key: Vec, +} + +impl std::error::Error for Error {} + +/// Errors that can occur when deserializing a type. +#[derive(Debug)] +pub(super) enum ErrorKind { + /// EOF was reached when looking for a value. + UnexpectedEof, + + /// An invalid character not allowed in a string was found. + InvalidCharInString(char), + + /// An invalid character was found as an escape. + InvalidEscape(char), + + /// An invalid character was found in a hex escape. + InvalidHexEscape(char), + + /// An invalid escape value was specified in a hex escape in a string. + /// + /// Valid values are in the plane of unicode codepoints. + InvalidEscapeValue(u32), + + /// A newline in a string was encountered when one was not allowed. + NewlineInString, + + /// An unexpected character was encountered, typically when looking for a + /// value. + Unexpected(char), + + /// An unterminated string was found where EOF was found before the ending + /// EOF mark. + UnterminatedString, + + /// A newline was found in a table key. + NewlineInTableKey, + + /// A number failed to parse. + NumberInvalid, + + /// Wanted one sort of token, but found another. + Wanted { + /// Expected token type. + expected: &'static str, + /// Actually found token type. + found: &'static str, + }, + + /// A duplicate table definition was found. + DuplicateTable(String), + + /// Duplicate key in table. + DuplicateKey(String), + + /// A previously defined table was redefined as an array. + RedefineAsArray, + + /// Multiline strings are not allowed for key. + MultilineStringKey, + + /// A custom error which could be generated when deserializing a particular + /// type. + Custom, + + /// A tuple with a certain number of elements was expected but something + /// else was found. + ExpectedTuple(usize), + + /// Expected table keys to be in increasing tuple index order, but something + /// else was found. + ExpectedTupleIndex { + /// Expected index. + expected: usize, + /// Key that was specified. + found: String, + }, + + /// An empty table was expected but entries were found. + ExpectedEmptyTable, + + /// Dotted key attempted to extend something that is not a table. + DottedKeyInvalidType, + + /// An unexpected key was encountered. + /// + /// Used when deserializing a struct with a limited set of fields. + UnexpectedKeys { + /// The unexpected keys. + keys: Vec, + /// Keys that may be specified. + available: &'static [&'static str], + }, + + /// Unquoted string was found when quoted one was expected. + UnquotedString, +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.kind { + ErrorKind::UnexpectedEof => f.write_str("unexpected eof encountered")?, + ErrorKind::InvalidCharInString(c) => write!( + f, + "invalid character in string: `{}`", + c.escape_default().collect::() + )?, + ErrorKind::InvalidEscape(c) => write!( + f, + "invalid escape character in string: `{}`", + c.escape_default().collect::() + )?, + ErrorKind::InvalidHexEscape(c) => write!( + f, + "invalid hex escape character in string: `{}`", + c.escape_default().collect::() + )?, + ErrorKind::InvalidEscapeValue(c) => write!(f, "invalid escape value: `{}`", c)?, + ErrorKind::NewlineInString => f.write_str("newline in string found")?, + ErrorKind::Unexpected(ch) => write!( + f, + "unexpected character found: `{}`", + ch.escape_default().collect::() + )?, + ErrorKind::UnterminatedString => f.write_str("unterminated string")?, + ErrorKind::NewlineInTableKey => f.write_str("found newline in table key")?, + ErrorKind::Wanted { expected, found } => { + write!(f, "expected {}, found {}", expected, found)?; + } + ErrorKind::NumberInvalid => f.write_str("invalid number")?, + ErrorKind::DuplicateTable(ref s) => { + write!(f, "redefinition of table `{}`", s)?; + } + ErrorKind::DuplicateKey(ref s) => { + write!(f, "duplicate key: `{}`", s)?; + } + ErrorKind::RedefineAsArray => f.write_str("table redefined as array")?, + ErrorKind::MultilineStringKey => { + f.write_str("multiline strings are not allowed for key")? + } + ErrorKind::Custom => f.write_str(&self.message)?, + ErrorKind::ExpectedTuple(l) => write!(f, "expected table with length {}", l)?, + ErrorKind::ExpectedTupleIndex { + expected, + ref found, + } => write!(f, "expected table key `{}`, but was `{}`", expected, found)?, + ErrorKind::ExpectedEmptyTable => f.write_str("expected empty table")?, + ErrorKind::DottedKeyInvalidType => { + f.write_str("dotted key attempted to extend non-table type")?; + } + ErrorKind::UnexpectedKeys { + ref keys, + available, + } => write!( + f, + "unexpected keys in table: `{:?}`, available keys: `{:?}`", + keys, available + )?, + ErrorKind::UnquotedString => write!( + f, + "invalid TOML value, did you mean to use a quoted string?" + )?, + } + + if !self.key.is_empty() { + write!(f, " for key `")?; + for (i, k) in self.key.iter().enumerate() { + if i > 0 { + write!(f, ".")?; + } + write!(f, "{}", k)?; + } + write!(f, "`")?; + } + + if let Some(line) = self.line { + write!(f, " at line {} column {}", line + 1, self.col + 1)?; + } + + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..895bd0a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,536 @@ +#![allow( + clippy::all, + dead_code, + unused_variables, + unreachable_code, + unused_imports +)] + +mod de; +mod error; +mod tokens; + +// use serde::{ +// de::{self as des, IntoDeserializer}, +// Deserialize, +// }; +use std::{borrow::Cow, fmt}; + +#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + #[inline] + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.start == 0 && self.end == 0 + } +} + +impl From for (usize, usize) { + fn from(Span { start, end }: Span) -> (usize, usize) { + (start, end) + } +} + +impl From> for Span { + fn from(s: std::ops::Range) -> Self { + Self { + start: s.start, + end: s.end, + } + } +} + +impl From for std::ops::Range { + fn from(s: Span) -> Self { + Self { + start: s.start, + end: s.end, + } + } +} + +#[derive(Debug)] +pub struct Value<'de> { + pub value: ValueInner<'de>, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub struct Key<'de> { + pub name: Cow<'de, str>, + pub span: Span, +} + +impl<'de> Ord for Key<'de> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.name.cmp(&other.name) + } +} + +impl<'de> PartialOrd for Key<'de> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'de> PartialEq for Key<'de> { + fn eq(&self, other: &Self) -> bool { + self.name.eq(&other.name) + } +} + +impl<'de> Eq for Key<'de> {} + +pub type Table<'de> = std::collections::BTreeMap, Value<'de>>; +pub type Array<'de> = Vec>; + +#[derive(Debug)] +pub enum ValueInner<'de> { + String(Cow<'de, str>), + Integer(i64), + Float(f64), + Boolean(bool), + Array(Array<'de>), + Table(Table<'de>), +} + +impl<'de> ValueInner<'de> { + pub fn type_str(&self) -> &'static str { + match self { + Self::String(..) => "string", + Self::Integer(..) => "integer", + Self::Float(..) => "float", + Self::Boolean(..) => "boolean", + Self::Array(..) => "array", + Self::Table(..) => "table", + } + } +} + +// impl<'de> des::Deserializer<'de> for Value<'de> { +// type Error = DesErr; + +// fn deserialize_any(self, visitor: V) -> Result +// where +// V: des::Visitor<'de>, +// { +// match self.value { +// ValueInner::Boolean(v) => visitor.visit_bool(v), +// ValueInner::Integer(n) => visitor.visit_i64(n), +// ValueInner::Float(n) => visitor.visit_f64(n), +// ValueInner::String(v) => visitor.visit_str(v.as_ref()), +// ValueInner::Array(v) => { +// let len = v.len(); +// let mut deserializer = SeqDeserializer::new(v); +// let seq = visitor.visit_seq(&mut deserializer)?; +// let remaining = deserializer.iter.len(); +// if remaining == 0 { +// Ok(seq) +// } else { +// Err(des::Error::invalid_length(len, &"fewer elements in array")) +// } +// } +// ValueInner::Table(v) => { +// let len = v.len(); +// let mut deserializer = MapDeserializer::new(v); +// let map = visitor.visit_map(&mut deserializer)?; +// let remaining = deserializer.iter.len(); +// if remaining == 0 { +// Ok(map) +// } else { +// Err(des::Error::invalid_length(len, &"fewer elements in map")) +// } +// } +// } +// } + +// #[inline] +// fn deserialize_enum( +// self, +// _name: &'static str, +// _variants: &'static [&'static str], +// visitor: V, +// ) -> Result +// where +// V: des::Visitor<'de>, +// { +// match self.value { +// ValueInner::String(variant) => visitor.visit_enum(variant.into_deserializer()), +// ValueInner::Table(variant) => { +// if variant.is_empty() { +// Err(des::Error::custom( +// "wanted exactly 1 element, found 0 elements", +// )) +// } else if variant.len() != 1 { +// Err(des::Error::custom( +// "wanted exactly 1 element, more than 1 element", +// )) +// } else { +// let deserializer = MapDeserializer::new(variant); +// visitor.visit_enum(deserializer) +// } +// } +// _ => Err(des::Error::invalid_type( +// des::Unexpected::UnitVariant, +// &"string only", +// )), +// } +// } + +// // `None` is interpreted as a missing field so be sure to implement `Some` +// // as a present field. +// fn deserialize_option(self, visitor: V) -> Result +// where +// V: des::Visitor<'de>, +// { +// visitor.visit_some(self) +// } + +// fn deserialize_newtype_struct( +// self, +// _name: &'static str, +// visitor: V, +// ) -> Result +// where +// V: des::Visitor<'de>, +// { +// visitor.visit_newtype_struct(self) +// } + +// fn deserialize_struct( +// self, +// name: &'static str, +// _fields: &'static [&'static str], +// visitor: V, +// ) -> Result +// where +// V: des::Visitor<'de>, +// { +// if name == super::span_tags::NAME { +// let mut sd = SpanDeserializer::new(self); +// visitor.visit_map(&mut sd) +// } else { +// self.deserialize_any(visitor) +// } +// } + +// serde::forward_to_deserialize_any! { +// bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq +// bytes byte_buf map unit_struct tuple_struct tuple ignored_any identifier +// } +// } + +// struct SeqDeserializer<'de> { +// iter: std::vec::IntoIter>, +// } + +// impl<'de> SeqDeserializer<'de> { +// fn new(vec: Vec>) -> Self { +// SeqDeserializer { +// iter: vec.into_iter(), +// } +// } +// } + +// impl<'de> des::SeqAccess<'de> for SeqDeserializer<'de> { +// type Error = DesErr; + +// fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> +// where +// T: des::DeserializeSeed<'de>, +// { +// match self.iter.next() { +// Some(value) => seed.deserialize(value).map(Some), +// None => Ok(None), +// } +// } + +// fn size_hint(&self) -> Option { +// match self.iter.size_hint() { +// (lower, Some(upper)) if lower == upper => Some(upper), +// _ => None, +// } +// } +// } + +// struct MapDeserializer<'de> { +// iter: as IntoIterator>::IntoIter, +// value: Option>, +// } + +// impl<'de> MapDeserializer<'de> { +// fn new(map: Table<'de>) -> Self { +// MapDeserializer { +// iter: map.into_iter(), +// value: None, +// } +// } +// } + +// impl<'de> des::MapAccess<'de> for MapDeserializer<'de> { +// type Error = DesErr; + +// fn next_key_seed(&mut self, seed: T) -> Result, Self::Error> +// where +// T: des::DeserializeSeed<'de>, +// { +// match self.iter.next() { +// Some((key, value)) => { +// self.value = Some(value); +// seed.deserialize(Value { +// value: ValueInner::String(key.into()), +// span: Default::default(), +// }) +// .map(Some) +// } +// None => Ok(None), +// } +// } + +// fn next_value_seed(&mut self, seed: T) -> Result +// where +// T: des::DeserializeSeed<'de>, +// { +// match self.value.take() { +// Some(value) => seed.deserialize(value), +// None => Err(des::Error::custom("value is missing")), +// } +// } + +// fn size_hint(&self) -> Option { +// match self.iter.size_hint() { +// (lower, Some(upper)) if lower == upper => Some(upper), +// _ => None, +// } +// } +// } + +// impl<'de> des::EnumAccess<'de> for MapDeserializer<'de> { +// type Error = DesErr; +// type Variant = MapEnumDeserializer<'de>; + +// fn variant_seed(mut self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> +// where +// V: des::DeserializeSeed<'de>, +// { +// use des::Error; +// let (key, value) = match self.iter.next() { +// Some(pair) => pair, +// None => { +// return Err(Error::custom( +// "expected table with exactly 1 entry, found empty table", +// )); +// } +// }; + +// let val = seed.deserialize(key.into_deserializer())?; +// let variant = MapEnumDeserializer { value }; + +// Ok((val, variant)) +// } +// } + +// struct SpanDeserializer<'de> { +// value: Option>, +// key: usize, +// } + +// impl<'de> SpanDeserializer<'de> { +// fn new(value: Value<'de>) -> Self { +// Self { +// value: Some(value), +// key: 0, +// } +// } +// } + +// impl<'de> des::MapAccess<'de> for SpanDeserializer<'de> { +// type Error = DesErr; + +// fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> +// where +// K: des::DeserializeSeed<'de>, +// { +// if self.key < super::span_tags::FIELDS.len() { +// seed.deserialize(Value { +// value: ValueInner::String(super::span_tags::FIELDS[self.key].into()), +// span: Default::default(), +// }) +// .map(Some) +// } else { +// Ok(None) +// } +// } + +// fn next_value_seed(&mut self, seed: V) -> Result +// where +// V: des::DeserializeSeed<'de>, +// { +// let res = match self.key { +// 0 => seed.deserialize(Value { +// value: ValueInner::Integer(self.value.as_ref().unwrap().span.start as _), +// span: Default::default(), +// }), +// 1 => seed.deserialize(Value { +// value: ValueInner::Integer(self.value.as_ref().unwrap().span.end as _), +// span: Default::default(), +// }), +// 2 => seed.deserialize(self.value.take().unwrap().into_deserializer()), +// _ => unreachable!(), +// }; + +// self.key += 1; +// res +// } + +// fn size_hint(&self) -> Option { +// Some(super::span_tags::FIELDS.len() - self.key) +// } +// } + +// pub struct MapEnumDeserializer<'de> { +// value: Value<'de>, +// } + +// impl<'de> des::VariantAccess<'de> for MapEnumDeserializer<'de> { +// type Error = DesErr; + +// fn unit_variant(self) -> Result<(), Self::Error> { +// use des::Error; +// match self.value.0.value { +// ValueInner::Array(values) => { +// if values.is_empty() { +// Ok(()) +// } else { +// Err(Error::custom("expected empty array")) +// } +// } +// ValueInner::Table(values) => { +// if values.is_empty() { +// Ok(()) +// } else { +// Err(Error::custom("expected empty table")) +// } +// } +// e => Err(Error::custom(format!( +// "expected table, found {}", +// e.type_str() +// ))), +// } +// } + +// fn newtype_variant_seed(self, seed: T) -> Result +// where +// T: des::DeserializeSeed<'de>, +// { +// seed.deserialize(self.value.into_deserializer()) +// } + +// fn tuple_variant(self, len: usize, visitor: V) -> Result +// where +// V: des::Visitor<'de>, +// { +// use des::Error; +// match self.value.0.value { +// ValueInner::Array(values) => { +// if values.len() == len { +// serde::de::Deserializer::deserialize_seq(values.into_deserializer(), visitor) +// } else { +// Err(Error::custom(format!("expected tuple with length {}", len))) +// } +// } +// ValueInner::Table(values) => { +// let tuple_values: Result, _> = values +// .into_iter() +// .enumerate() +// .map(|(index, (key, value))| match key.parse::() { +// Ok(key_index) if key_index == index => Ok(value), +// Ok(_) | Err(_) => Err(Error::custom(format!( +// "expected table key `{}`, but was `{}`", +// index, key +// ))), +// }) +// .collect(); +// let tuple_values = tuple_values?; + +// if tuple_values.len() == len { +// serde::de::Deserializer::deserialize_seq( +// tuple_values.into_deserializer(), +// visitor, +// ) +// } else { +// Err(Error::custom(format!("expected tuple with length {}", len))) +// } +// } +// e => Err(Error::custom(format!( +// "expected table, found {}", +// e.type_str() +// ))), +// } +// } + +// fn struct_variant( +// self, +// fields: &'static [&'static str], +// visitor: V, +// ) -> Result +// where +// V: des::Visitor<'de>, +// { +// des::Deserializer::deserialize_struct( +// self.value.into_deserializer(), +// "", // TODO: this should be the variant name +// fields, +// visitor, +// ) +// } +// } + +// impl<'de> des::IntoDeserializer<'de, DesErr> for Value<'de> { +// type Deserializer = Self; + +// fn into_deserializer(self) -> Self { +// self +// } +// } + +// pub fn deserialize_spanned(doc: &str) -> anyhow::Result { +// let root = de::from_str(doc)?; +// Ok(T::deserialize(root.into_deserializer())?) +// } + +#[test] +fn oh_god_please_work() { + let s = r#" +[[fruit]] +name = "apple" + +[fruit.physical] +color = "red" +shape = "round" + +[[fruit.variety]] +name = "red delicious" + +[[fruit.variety]] +name = "granny smith" + +[[fruit]] +name = "banana" + +[[fruit.variety]] +name = "plantain" +"#; + let table = de::from_str(s).unwrap(); + + panic!("{table:?}"); +} diff --git a/src/tokens.rs b/src/tokens.rs new file mode 100644 index 0000000..98a32fb --- /dev/null +++ b/src/tokens.rs @@ -0,0 +1,534 @@ +use super::Key; +use crate::Span; +use std::{borrow::Cow, char, str}; + +#[derive(Eq, PartialEq, Debug)] +pub enum Token<'a> { + Whitespace(&'a str), + Newline, + Comment(&'a str), + + Equals, + Period, + Comma, + Colon, + Plus, + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + + Keylike(&'a str), + String { + src: &'a str, + val: Cow<'a, str>, + multiline: bool, + }, +} + +#[derive(Eq, PartialEq, Debug)] +pub enum Error { + InvalidCharInString(usize, char), + InvalidEscape(usize, char), + InvalidHexEscape(usize, char), + InvalidEscapeValue(usize, u32), + NewlineInString(usize), + Unexpected(usize, char), + UnterminatedString(usize), + NewlineInTableKey(usize), + MultilineStringKey(usize), + Wanted { + at: usize, + expected: &'static str, + found: &'static str, + }, +} + +#[derive(Clone)] +pub struct Tokenizer<'a> { + input: &'a str, + chars: CrlfFold<'a>, +} + +#[derive(Clone)] +struct CrlfFold<'a> { + chars: str::CharIndices<'a>, +} + +#[derive(Debug)] +enum MaybeString { + NotEscaped(usize), + Owned(String), +} + +impl<'a> Tokenizer<'a> { + pub fn new(input: &'a str) -> Tokenizer<'a> { + let mut t = Tokenizer { + input, + chars: CrlfFold { + chars: input.char_indices(), + }, + }; + // Eat utf-8 BOM + t.eatc('\u{feff}'); + t + } + + pub fn next(&mut self) -> Result)>, Error> { + let (start, token) = match self.one() { + Some((start, '\n')) => (start, Token::Newline), + Some((start, ' ' | '\t')) => (start, self.whitespace_token(start)), + Some((start, '#')) => (start, self.comment_token(start)), + Some((start, '=')) => (start, Token::Equals), + Some((start, '.')) => (start, Token::Period), + Some((start, ',')) => (start, Token::Comma), + Some((start, ':')) => (start, Token::Colon), + Some((start, '+')) => (start, Token::Plus), + Some((start, '{')) => (start, Token::LeftBrace), + Some((start, '}')) => (start, Token::RightBrace), + Some((start, '[')) => (start, Token::LeftBracket), + Some((start, ']')) => (start, Token::RightBracket), + Some((start, '\'')) => { + return self + .literal_string(start) + .map(|t| Some((self.step_span(start), t))) + } + Some((start, '"')) => { + return self + .basic_string(start) + .map(|t| Some((self.step_span(start), t))) + } + Some((start, ch)) if is_keylike(ch) => (start, self.keylike(start)), + + Some((start, ch)) => return Err(Error::Unexpected(start, ch)), + None => return Ok(None), + }; + + let span = self.step_span(start); + Ok(Some((span, token))) + } + + pub fn peek(&mut self) -> Result)>, Error> { + self.clone().next() + } + + pub fn eat(&mut self, expected: Token<'a>) -> Result { + self.eat_spanned(expected).map(|s| s.is_some()) + } + + /// Eat a value, returning it's span if it was consumed. + pub fn eat_spanned(&mut self, expected: Token<'a>) -> Result, Error> { + let span = match self.peek()? { + Some((span, ref found)) if expected == *found => span, + Some(_) | None => return Ok(None), + }; + + drop(self.next()); + Ok(Some(span)) + } + + pub fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> { + // ignore span + let _ = self.expect_spanned(expected)?; + Ok(()) + } + + /// Expect the given token returning its span. + pub fn expect_spanned(&mut self, expected: Token<'a>) -> Result { + let current = self.current(); + match self.next()? { + Some((span, found)) => { + if expected == found { + Ok(span) + } else { + Err(Error::Wanted { + at: current, + expected: expected.describe(), + found: found.describe(), + }) + } + } + None => Err(Error::Wanted { + at: self.input.len(), + expected: expected.describe(), + found: "eof", + }), + } + } + + pub fn table_key(&mut self) -> Result, Error> { + let current = self.current(); + match self.next()? { + Some((span, Token::Keylike(k))) => Ok(Key { + span, + name: k.into(), + }), + Some(( + span, + Token::String { + src, + val, + multiline, + }, + )) => { + let offset = self.substr_offset(src); + if multiline { + return Err(Error::MultilineStringKey(offset)); + } + match src.find('\n') { + None => Ok(Key { span, name: val }), + Some(i) => Err(Error::NewlineInTableKey(offset + i)), + } + } + Some((_, other)) => Err(Error::Wanted { + at: current, + expected: "a table key", + found: other.describe(), + }), + None => Err(Error::Wanted { + at: self.input.len(), + expected: "a table key", + found: "eof", + }), + } + } + + pub fn eat_whitespace(&mut self) { + while self.eatc(' ') || self.eatc('\t') { + // ... + } + } + + pub fn eat_comment(&mut self) -> Result { + if !self.eatc('#') { + return Ok(false); + } + drop(self.comment_token(0)); + self.eat_newline_or_eof().map(|()| true) + } + + pub fn eat_newline_or_eof(&mut self) -> Result<(), Error> { + let current = self.current(); + match self.next()? { + None | Some((_, Token::Newline)) => Ok(()), + Some((_, other)) => Err(Error::Wanted { + at: current, + expected: "newline", + found: other.describe(), + }), + } + } + + pub fn skip_to_newline(&mut self) { + loop { + match self.one() { + Some((_, '\n')) | None => break, + _ => {} + } + } + } + + fn eatc(&mut self, ch: char) -> bool { + match self.chars.clone().next() { + Some((_, ch2)) if ch == ch2 => { + self.one(); + true + } + _ => false, + } + } + + pub fn current(&mut self) -> usize { + match self.chars.clone().next() { + Some(i) => i.0, + None => self.input.len(), + } + } + + fn whitespace_token(&mut self, start: usize) -> Token<'a> { + while self.eatc(' ') || self.eatc('\t') { + // ... + } + Token::Whitespace(&self.input[start..self.current()]) + } + + fn comment_token(&mut self, start: usize) -> Token<'a> { + while let Some((_, ch)) = self.chars.clone().next() { + if ch != '\t' && (ch < '\u{20}' || ch > '\u{10ffff}') { + break; + } + self.one(); + } + Token::Comment(&self.input[start..self.current()]) + } + + fn read_string( + &mut self, + delim: char, + start: usize, + new_ch: &mut dyn FnMut( + &mut Tokenizer<'_>, + &mut MaybeString, + bool, + usize, + char, + ) -> Result<(), Error>, + ) -> Result, Error> { + let mut multiline = false; + if self.eatc(delim) { + if self.eatc(delim) { + multiline = true; + } else { + return Ok(Token::String { + src: &self.input[start..start + 2], + val: Cow::Borrowed(""), + multiline: false, + }); + } + } + let mut val = MaybeString::NotEscaped(self.current()); + let mut n = 0; + loop { + n += 1; + match self.one() { + Some((i, '\n')) => { + if multiline { + if self.input.as_bytes()[i] == b'\r' { + val.make_owned(&self.input[..i]); + } + if n == 1 { + val = MaybeString::NotEscaped(self.current()); + } else { + val.push('\n'); + } + } else { + return Err(Error::NewlineInString(i)); + } + } + Some((mut i, ch)) if ch == delim => { + if multiline { + if !self.eatc(delim) { + val.push(delim); + continue; + } + if !self.eatc(delim) { + val.push(delim); + val.push(delim); + continue; + } + if self.eatc(delim) { + val.push(delim); + i += 1; + } + if self.eatc(delim) { + val.push(delim); + i += 1; + } + } + return Ok(Token::String { + src: &self.input[start..self.current()], + val: val.into_cow(&self.input[..i]), + multiline, + }); + } + Some((i, c)) => new_ch(self, &mut val, multiline, i, c)?, + None => return Err(Error::UnterminatedString(start)), + } + } + } + + fn literal_string(&mut self, start: usize) -> Result, Error> { + self.read_string('\'', start, &mut |_me, val, _multi, i, ch| { + if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') { + val.push(ch); + Ok(()) + } else { + Err(Error::InvalidCharInString(i, ch)) + } + }) + } + + fn basic_string(&mut self, start: usize) -> Result, Error> { + self.read_string('"', start, &mut |me, val, multi, i, ch| match ch { + '\\' => { + val.make_owned(&me.input[..i]); + match me.chars.next() { + Some((_, '"')) => val.push('"'), + Some((_, '\\')) => val.push('\\'), + Some((_, 'b')) => val.push('\u{8}'), + Some((_, 'f')) => val.push('\u{c}'), + Some((_, 'n')) => val.push('\n'), + Some((_, 'r')) => val.push('\r'), + Some((_, 't')) => val.push('\t'), + Some((i, c @ ('u' | 'U'))) => { + let len = if c == 'u' { 4 } else { 8 }; + val.push(me.hex(start, i, len)?); + } + Some((i, c @ (' ' | '\t' | '\n'))) if multi => { + if c != '\n' { + while let Some((_, ch)) = me.chars.clone().next() { + match ch { + ' ' | '\t' => { + me.chars.next(); + continue; + } + '\n' => { + me.chars.next(); + break; + } + _ => return Err(Error::InvalidEscape(i, c)), + } + } + } + while let Some((_, ch)) = me.chars.clone().next() { + match ch { + ' ' | '\t' | '\n' => { + me.chars.next(); + } + _ => break, + } + } + } + Some((i, c)) => return Err(Error::InvalidEscape(i, c)), + None => return Err(Error::UnterminatedString(start)), + } + Ok(()) + } + ch if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') => { + val.push(ch); + Ok(()) + } + _ => Err(Error::InvalidCharInString(i, ch)), + }) + } + + fn hex(&mut self, start: usize, i: usize, len: usize) -> Result { + let mut buf = String::with_capacity(len); + for _ in 0..len { + match self.one() { + Some((_, ch)) if ch as u32 <= 0x7F && ch.is_ascii_hexdigit() => buf.push(ch), + Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)), + None => return Err(Error::UnterminatedString(start)), + } + } + let val = u32::from_str_radix(&buf, 16).unwrap(); + match char::from_u32(val) { + Some(ch) => Ok(ch), + None => Err(Error::InvalidEscapeValue(i, val)), + } + } + + fn keylike(&mut self, start: usize) -> Token<'a> { + while let Some((_, ch)) = self.peek_one() { + if !is_keylike(ch) { + break; + } + self.one(); + } + Token::Keylike(&self.input[start..self.current()]) + } + + pub fn substr_offset(&self, s: &'a str) -> usize { + assert!(s.len() <= self.input.len()); + let a = self.input.as_ptr() as usize; + let b = s.as_ptr() as usize; + assert!(a <= b); + b - a + } + + /// Calculate the span of a single character. + fn step_span(&mut self, start: usize) -> Span { + let end = match self.peek_one() { + Some(t) => t.0, + None => self.input.len(), + }; + Span { start, end } + } + + /// Peek one char without consuming it. + fn peek_one(&mut self) -> Option<(usize, char)> { + self.chars.clone().next() + } + + /// Take one char. + pub fn one(&mut self) -> Option<(usize, char)> { + self.chars.next() + } +} + +impl<'a> Iterator for CrlfFold<'a> { + type Item = (usize, char); + + fn next(&mut self) -> Option<(usize, char)> { + self.chars.next().map(|(i, c)| { + if c == '\r' { + let mut attempt = self.chars.clone(); + if let Some((_, '\n')) = attempt.next() { + self.chars = attempt; + return (i, '\n'); + } + } + (i, c) + }) + } +} + +impl MaybeString { + fn push(&mut self, ch: char) { + match *self { + MaybeString::NotEscaped(..) => {} + MaybeString::Owned(ref mut s) => s.push(ch), + } + } + + fn make_owned(&mut self, input: &str) { + match *self { + MaybeString::NotEscaped(start) => { + *self = MaybeString::Owned(input[start..].to_owned()); + } + MaybeString::Owned(..) => {} + } + } + + fn into_cow(self, input: &str) -> Cow<'_, str> { + match self { + MaybeString::NotEscaped(start) => Cow::Borrowed(&input[start..]), + MaybeString::Owned(s) => Cow::Owned(s), + } + } +} + +fn is_keylike(ch: char) -> bool { + ('A' <= ch && ch <= 'Z') + || ('a' <= ch && ch <= 'z') + || ('0' <= ch && ch <= '9') + || ch == '-' + || ch == '_' +} + +impl<'a> Token<'a> { + pub fn describe(&self) -> &'static str { + match *self { + Token::Keylike(_) => "an identifier", + Token::Equals => "an equals", + Token::Period => "a period", + Token::Comment(_) => "a comment", + Token::Newline => "a newline", + Token::Whitespace(_) => "whitespace", + Token::Comma => "a comma", + Token::RightBrace => "a right brace", + Token::LeftBrace => "a left brace", + Token::RightBracket => "a right bracket", + Token::LeftBracket => "a left bracket", + Token::String { multiline, .. } => { + if multiline { + "a multiline string" + } else { + "a string" + } + } + Token::Colon => "a colon", + Token::Plus => "a plus", + } + } +} From 7e1285662a7f10bfce4ffcf847f54d084f2754f2 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 30 Jan 2024 09:38:20 +0100 Subject: [PATCH 02/16] Checkpoint --- .gitignore | 1 + Cargo.lock | 330 +++++++++++ Cargo.toml | 20 +- integ-tests/Cargo.toml | 12 + integ-tests/data/booleans.toml | 2 + integ-tests/data/comments.toml | 24 + integ-tests/data/evil.toml | 21 + integ-tests/data/floats.toml | 14 + integ-tests/data/fruit.toml | 18 + integ-tests/data/inline_tables.toml | 13 + integ-tests/data/integers.toml | 20 + integ-tests/data/key_names.toml | 14 + integ-tests/data/strings.toml | 68 +++ integ-tests/data/table_names.toml | 6 + integ-tests/data/tables_in_arrays.toml | 9 + integ-tests/data/underscores.toml | 5 + integ-tests/src/lib.rs | 122 ++++ integ-tests/tests/de.rs | 23 + integ-tests/tests/parser.rs | 288 ++++++++++ .../tests/snapshots/de__basic_table.snap | 10 + .../parser__bad_booleans__false_trailing.snap | 11 + .../parser__bad_booleans__leading_f.snap | 11 + .../parser__bad_booleans__leading_t.snap | 11 + .../parser__bad_booleans__true_trailing.snap | 11 + .../snapshots/parser__bad_codepoint.snap | 11 + .../parser__bad_floats__trailing_dec.snap | 11 + .../parser__bad_floats__trailing_exp.snap | 11 + .../parser__bad_floats__trailing_exp2.snap | 11 + .../parser__bad_floats__trailing_exp3.snap | 11 + .../parser__bad_floats__trailing_exp4.snap | 11 + .../parser__bad_floats__trailing_neg.snap | 11 + .../parser__bad_floats__trailing_pos.snap | 11 + ...ser__bad_inline_tables__duplicate_key.snap | 13 + .../parser__bad_inline_tables__eof.snap | 11 + .../parser__bad_inline_tables__newline.snap | 13 + ...parser__bad_inline_tables__only_comma.snap | 11 + ...er__bad_inline_tables__trailing_comma.snap | 11 + .../tests/snapshots/parser__bad_keys__cr.snap | 11 + .../parser__bad_keys__empty_pipe.snap | 11 + .../parser__bad_keys__multi_line2.snap | 11 + .../parser__bad_keys__multi_line3.snap | 11 + .../parser__bad_keys__multi_line4.snap | 11 + .../parser__bad_keys__mutli_line.snap | 11 + .../snapshots/parser__bad_keys__newline.snap | 13 + .../snapshots/parser__bad_keys__newline2.snap | 13 + ...parser__bad_keys__newline_after_equal.snap | 13 + .../snapshots/parser__bad_keys__none.snap | 11 + .../snapshots/parser__bad_keys__pipe.snap | 11 + ...ser__bad_leading_zeros__neg_two_zeros.snap | 11 + ...rser__bad_leading_zeros__neg_with_dec.snap | 11 + ...ser__bad_leading_zeros__pos_two_zeros.snap | 11 + ...rser__bad_leading_zeros__pos_with_dec.snap | 11 + .../parser__bad_leading_zeros__two_zeros.snap | 11 + .../parser__bad_leading_zeros__with_dec.snap | 11 + ...arser__bad_nesting__array_then_dotted.snap | 13 + ..._bad_nesting__array_then_dotted_array.snap | 13 + .../parser__bad_nesting__inline.snap | 13 + .../parser__bad_nesting__key_then_array.snap | 13 + .../parser__bad_nesting__key_then_dotted.snap | 13 + .../snapshots/parser__bad_strings__hex.snap | 11 + .../snapshots/parser__bad_strings__hex2.snap | 11 + .../parser__bad_strings__unterminated.snap | 11 + ...er__bad_strings__unterminated_literal.snap | 11 + ...parser__bad_table_names__crlf_literal.snap | 11 + .../parser__bad_table_names__empty.snap | 11 + .../parser__bad_table_names__exclamation.snap | 11 + .../parser__bad_table_names__multi_bar.snap | 11 + .../parser__bad_table_names__multi_empty.snap | 11 + .../parser__bad_table_names__multi_foo.snap | 11 + .../parser__bad_table_names__newline.snap | 13 + ...ser__bad_table_names__newline_literal.snap | 13 + .../parser__bad_table_names__period.snap | 11 + ...parser__bad_table_names__redefinition.snap | 13 + ...ser__bad_table_names__trailing_period.snap | 11 + ...er__bad_table_names__unterminated_one.snap | 11 + ...__bad_table_names__unterminated_three.snap | 11 + .../parser__bad_underscores__double.snap | 11 + ...rser__bad_underscores__double_leading.snap | 11 + .../parser__bad_underscores__trailing.snap | 11 + .../parser__bad_underscores__trailing2.snap | 11 + .../snapshots/parser__bare_key_names.snap | 16 + .../tests/snapshots/parser__bare_number.snap | 11 + .../snapshots/parser__blank_literal.snap | 7 + .../tests/snapshots/parser__booleans.snap | 8 + integ-tests/tests/snapshots/parser__crlf.snap | 20 + .../snapshots/parser__datetimes__tz.snap | 11 + .../snapshots/parser__datetimes__tz2.snap | 11 + .../snapshots/parser__datetimes__tz_neg.snap | 11 + .../snapshots/parser__datetimes__tz_neg2.snap | 11 + .../snapshots/parser__datetimes__tz_neg3.snap | 11 + .../snapshots/parser__datetimes__utc.snap | 11 + .../parser__datetimes__utc_invalid.snap | 11 + .../parser__datetimes__utc_punkt.snap | 11 + .../parser__datetimes__utc_trailing_dot.snap | 11 + .../tests/snapshots/parser__empty_string.snap | 7 + .../tests/snapshots/parser__empty_table.snap | 7 + .../tests/snapshots/parser__floats.snap | 16 + .../tests/snapshots/parser__fruit.snap | 31 + .../snapshots/parser__inline_tables.snap | 32 ++ .../parser__integer_range_negative.snap | 11 + .../parser__integer_range_positive.snap | 11 + .../tests/snapshots/parser__key_names.snap | 20 + .../tests/snapshots/parser__key_no_space.snap | 7 + .../snapshots/parser__literal_eats_crlf.snap | 8 + .../tests/snapshots/parser__many_blank.snap | 7 + .../snapshots/parser__newline_literal.snap | 13 + .../snapshots/parser__newline_string.snap | 13 + .../parser__redefine__table_then_dotted.snap | 13 + ...e__table_then_dotted_then_table_again.snap | 16 + .../parser__redefine__table_then_inline.snap | 15 + .../parser__redefine__table_then_table.snap | 16 + .../parser__require_newlines__arrays.snap | 11 + .../parser__require_newlines__basic.snap | 11 + .../parser__require_newlines__basic2.snap | 11 + .../parser__require_newlines__basic3.snap | 11 + .../parser__require_newlines__strings.snap | 11 + .../parser__require_newlines__tables.snap | 13 + .../parser__stray_cr__array_value.snap | 11 + .../snapshots/parser__stray_cr__ml_basic.snap | 11 + .../parser__stray_cr__ml_basic2.snap | 11 + .../parser__stray_cr__ml_more_whitespace.snap | 11 + .../snapshots/parser__stray_cr__single.snap | 11 + .../parser__stray_cr__value_literal.snap | 11 + .../parser__stray_cr__value_str.snap | 11 + .../tests/snapshots/parser__strings.snap | 49 ++ .../tests/snapshots/parser__table_names.snap | 14 + .../snapshots/parser__tables_in_arrays.snap | 14 + .../tests/snapshots/parser__underscores.snap | 11 + .../tests/snapshots/valid__arrays__empty.snap | 15 + .../valid__arrays__heterogenous.snap | 20 + .../valid__arrays__ints_and_arrays.snap | 12 + .../valid__arrays__ints_and_floats.snap | 10 + .../snapshots/valid__arrays__nested.snap | 14 + .../snapshots/valid__arrays__no_spaces.snap | 11 + .../tests/snapshots/valid__arrays__one.snap | 12 + .../valid__arrays__strings_and_ints.snap | 10 + .../tests/snapshots/valid__comments.snap | 13 + integ-tests/tests/snapshots/valid__evil.snap | 27 + .../snapshots/valid__numbers__floats.snap | 20 + .../snapshots/valid__numbers__integers.snap | 18 + .../snapshots/valid__tables__array_many.snap | 20 + ...__tables__implicit_and_explicit_after.snap | 14 + ..._tables__implicit_and_explicit_before.snap | 14 + .../valid__tables__implicit_array.snap | 13 + .../valid__tables__implicit_groups.snap | 13 + .../valid__tables__nested_arrays.snap | 30 + .../snapshots/valid__tables__sub_empty.snap | 9 + integ-tests/tests/tokens.rs | 168 ++++++ integ-tests/tests/valid.rs | 96 ++++ src/error.rs | 193 ------- src/lib.rs | 536 ------------------ toml-file-derive/Cargo.toml | 12 + toml-file-derive/src/de.rs | 5 + toml-file-derive/src/lib.rs | 9 + toml-file/Cargo.toml | 14 + {src => toml-file/src}/de.rs | 388 +++++++------ toml-file/src/de_helpers.rs | 140 +++++ toml-file/src/error.rs | 294 ++++++++++ toml-file/src/lib.rs | 26 + toml-file/src/span.rs | 148 +++++ {src => toml-file/src}/tokens.rs | 11 +- toml-file/src/value.rs | 111 ++++ toml-file/src/value/impl_serde.rs | 33 ++ 163 files changed, 3921 insertions(+), 901 deletions(-) create mode 100644 integ-tests/Cargo.toml create mode 100644 integ-tests/data/booleans.toml create mode 100644 integ-tests/data/comments.toml create mode 100644 integ-tests/data/evil.toml create mode 100644 integ-tests/data/floats.toml create mode 100644 integ-tests/data/fruit.toml create mode 100644 integ-tests/data/inline_tables.toml create mode 100644 integ-tests/data/integers.toml create mode 100644 integ-tests/data/key_names.toml create mode 100644 integ-tests/data/strings.toml create mode 100644 integ-tests/data/table_names.toml create mode 100644 integ-tests/data/tables_in_arrays.toml create mode 100644 integ-tests/data/underscores.toml create mode 100644 integ-tests/src/lib.rs create mode 100644 integ-tests/tests/de.rs create mode 100644 integ-tests/tests/parser.rs create mode 100644 integ-tests/tests/snapshots/de__basic_table.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_booleans__false_trailing.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_booleans__leading_f.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_booleans__leading_t.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_booleans__true_trailing.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_codepoint.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_dec.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_exp.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_exp2.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_exp3.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_exp4.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_neg.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_floats__trailing_pos.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_inline_tables__duplicate_key.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_inline_tables__eof.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_inline_tables__newline.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_inline_tables__only_comma.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_inline_tables__trailing_comma.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__cr.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__empty_pipe.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__multi_line2.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__multi_line3.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__multi_line4.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__mutli_line.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__newline.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__newline2.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__newline_after_equal.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__none.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__pipe.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_two_zeros.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_with_dec.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_two_zeros.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_with_dec.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_leading_zeros__two_zeros.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_leading_zeros__with_dec.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted_array.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_nesting__inline.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_nesting__key_then_array.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_nesting__key_then_dotted.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_strings__hex.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_strings__hex2.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_strings__unterminated.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_strings__unterminated_literal.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__crlf_literal.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__empty.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__exclamation.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__multi_bar.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__multi_empty.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__multi_foo.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__newline.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__newline_literal.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__period.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__redefinition.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__trailing_period.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__unterminated_one.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_table_names__unterminated_three.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_underscores__double.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_underscores__double_leading.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_underscores__trailing.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_underscores__trailing2.snap create mode 100644 integ-tests/tests/snapshots/parser__bare_key_names.snap create mode 100644 integ-tests/tests/snapshots/parser__bare_number.snap create mode 100644 integ-tests/tests/snapshots/parser__blank_literal.snap create mode 100644 integ-tests/tests/snapshots/parser__booleans.snap create mode 100644 integ-tests/tests/snapshots/parser__crlf.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__tz.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__tz2.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__tz_neg.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__tz_neg2.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__tz_neg3.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__utc.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__utc_invalid.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__utc_punkt.snap create mode 100644 integ-tests/tests/snapshots/parser__datetimes__utc_trailing_dot.snap create mode 100644 integ-tests/tests/snapshots/parser__empty_string.snap create mode 100644 integ-tests/tests/snapshots/parser__empty_table.snap create mode 100644 integ-tests/tests/snapshots/parser__floats.snap create mode 100644 integ-tests/tests/snapshots/parser__fruit.snap create mode 100644 integ-tests/tests/snapshots/parser__inline_tables.snap create mode 100644 integ-tests/tests/snapshots/parser__integer_range_negative.snap create mode 100644 integ-tests/tests/snapshots/parser__integer_range_positive.snap create mode 100644 integ-tests/tests/snapshots/parser__key_names.snap create mode 100644 integ-tests/tests/snapshots/parser__key_no_space.snap create mode 100644 integ-tests/tests/snapshots/parser__literal_eats_crlf.snap create mode 100644 integ-tests/tests/snapshots/parser__many_blank.snap create mode 100644 integ-tests/tests/snapshots/parser__newline_literal.snap create mode 100644 integ-tests/tests/snapshots/parser__newline_string.snap create mode 100644 integ-tests/tests/snapshots/parser__redefine__table_then_dotted.snap create mode 100644 integ-tests/tests/snapshots/parser__redefine__table_then_dotted_then_table_again.snap create mode 100644 integ-tests/tests/snapshots/parser__redefine__table_then_inline.snap create mode 100644 integ-tests/tests/snapshots/parser__redefine__table_then_table.snap create mode 100644 integ-tests/tests/snapshots/parser__require_newlines__arrays.snap create mode 100644 integ-tests/tests/snapshots/parser__require_newlines__basic.snap create mode 100644 integ-tests/tests/snapshots/parser__require_newlines__basic2.snap create mode 100644 integ-tests/tests/snapshots/parser__require_newlines__basic3.snap create mode 100644 integ-tests/tests/snapshots/parser__require_newlines__strings.snap create mode 100644 integ-tests/tests/snapshots/parser__require_newlines__tables.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__array_value.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__ml_basic.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__ml_basic2.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__ml_more_whitespace.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__single.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__value_literal.snap create mode 100644 integ-tests/tests/snapshots/parser__stray_cr__value_str.snap create mode 100644 integ-tests/tests/snapshots/parser__strings.snap create mode 100644 integ-tests/tests/snapshots/parser__table_names.snap create mode 100644 integ-tests/tests/snapshots/parser__tables_in_arrays.snap create mode 100644 integ-tests/tests/snapshots/parser__underscores.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__empty.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__heterogenous.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__ints_and_arrays.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__ints_and_floats.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__nested.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__no_spaces.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__one.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__strings_and_ints.snap create mode 100644 integ-tests/tests/snapshots/valid__comments.snap create mode 100644 integ-tests/tests/snapshots/valid__evil.snap create mode 100644 integ-tests/tests/snapshots/valid__numbers__floats.snap create mode 100644 integ-tests/tests/snapshots/valid__numbers__integers.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__array_many.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_array.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_groups.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__nested_arrays.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__sub_empty.snap create mode 100644 integ-tests/tests/tokens.rs create mode 100644 integ-tests/tests/valid.rs delete mode 100644 src/error.rs delete mode 100644 src/lib.rs create mode 100644 toml-file-derive/Cargo.toml create mode 100644 toml-file-derive/src/de.rs create mode 100644 toml-file-derive/src/lib.rs create mode 100644 toml-file/Cargo.toml rename {src => toml-file/src}/de.rs (75%) create mode 100644 toml-file/src/de_helpers.rs create mode 100644 toml-file/src/error.rs create mode 100644 toml-file/src/lib.rs create mode 100644 toml-file/src/span.rs rename {src => toml-file/src}/tokens.rs (98%) create mode 100644 toml-file/src/value.rs create mode 100644 toml-file/src/value/impl_serde.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..116f0d5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +**.snap.new \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 839e01b..91c0c4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,345 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "basic-toml" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2db21524cad41c5591204d22d75e1970a2d1f71060214ca931dc7d5afe2c14e5" +dependencies = [ + "serde", +] + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "windows-sys", +] + +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "identconv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02df3085f97750c1f8deb1b56aeb168f242f303e363aee16284821c0a14ff90e" +dependencies = [ + "convert_case", + "litrs", +] + +[[package]] +name = "insta" +version = "1.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d64600be34b2fcfc267740a243fa7744441bb4947a619ac4e5bb6507f35fbfc" +dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "serde", + "similar", + "yaml-rust", +] + +[[package]] +name = "integ-tests" +version = "0.1.0" +dependencies = [ + "basic-toml", + "codespan-reporting", + "insta", + "pretty_assertions", + "toml-file", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "litrs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "pretty_assertions" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.195" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "similar" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32fea41aca09ee824cc9724996433064c89f7777e60762749a4170a14abbfa21" + [[package]] name = "smallvec" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "toml-file" version = "0.1.0" dependencies = [ + "codespan-reporting", + "identconv", + "serde", "smallvec", ] + +[[package]] +name = "toml-file-derive" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/Cargo.toml b/Cargo.toml index f3b62cd..1b7a52b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,15 @@ -[package] -name = "toml-file" -version = "0.1.0" -edition = "2021" +[workspace] +resolver = "2" +members = ["integ-tests", "toml-file", "toml-file-derive"] -[dependencies] -smallvec = "1.13" +[workspace.dependencies] +toml-file = { path = "toml-file" } +proc-macro2 = "1.0" +quote = "1.0" +syn = "2.0" + +[profile.dev.package.insta] +opt-level = 3 + +[profile.dev.package.similar] +opt-level = 3 diff --git a/integ-tests/Cargo.toml b/integ-tests/Cargo.toml new file mode 100644 index 0000000..4ca8e49 --- /dev/null +++ b/integ-tests/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "integ-tests" +version = "0.1.0" +edition = "2021" +publish = false + +[dependencies] +basic-toml = "0.1" +codespan-reporting = "0.11" +insta = { version = "1.34", features = ["json"] } +pretty_assertions = "1.4" +toml-file = { workspace = true, features = ["reporting", "serde"] } diff --git a/integ-tests/data/booleans.toml b/integ-tests/data/booleans.toml new file mode 100644 index 0000000..fa9a37d --- /dev/null +++ b/integ-tests/data/booleans.toml @@ -0,0 +1,2 @@ +"true" = true +"false" = false diff --git a/integ-tests/data/comments.toml b/integ-tests/data/comments.toml new file mode 100644 index 0000000..a13951d --- /dev/null +++ b/integ-tests/data/comments.toml @@ -0,0 +1,24 @@ +# Top comment. + # Top comment. +# Top comment. + +# [no-extraneous-groups-please] + +[group] # Comment +answer = 42 # Comment +# no-extraneous-keys-please = 999 +# In between comment. +more = [ # Comment + # What about multiple # comments? + # Can you handle it? + # + # Evil. +# Evil. + 42, 42, # Comments within arrays are fun. + # What about multiple # comments? + # Can you handle it? + # + # Evil. +# Evil. +# ] Did I fool you? +] # Hopefully not. diff --git a/integ-tests/data/evil.toml b/integ-tests/data/evil.toml new file mode 100644 index 0000000..802af46 --- /dev/null +++ b/integ-tests/data/evil.toml @@ -0,0 +1,21 @@ +# Test file for TOML +# Only this one tries to emulate a TOML file written by a user of the kind of parser writers probably hate +# This part you'll really hate + +[the] +test_string = "You'll hate me after this - #" # " Annoying, isn't it? + + [the.hard] + test_array = [ "] ", " # "] # ] There you go, parse this! + test_array2 = [ "Test #11 ]proved that", "Experiment #9 was a success" ] + # You didn't think it'd as easy as chucking out the last #, did you? + another_test_string = " Same thing, but with a string #" + harder_test_string = " And when \"'s are in the string, along with # \"" # "and comments are there too" + # Things will get harder + + [the.hard."bit#"] + "what?" = "You don't think some user won't do that?" + multi_line_array = [ + "]", + # ] Oh yes I did + ] diff --git a/integ-tests/data/floats.toml b/integ-tests/data/floats.toml new file mode 100644 index 0000000..c3f9e10 --- /dev/null +++ b/integ-tests/data/floats.toml @@ -0,0 +1,14 @@ +normal = 1.0 +normal-exp = 1.0e0 +normal-pos-exp = 1.0e+0 +normal-neg-exp = 1.0e-0 +neg-exp = 1E-0 +multi-dec-exp = 1.001e-0 +two-exp = 2e10 +two-pos-exp = 2e+10 +two-neg-exp = 2e-10 +twenty = 2_0.0 +twenty-exp = 2_0.0_0e1_0 +twenty-punkt-ett = 2_0.1_0e1_0 +longpi = 3.141592653589793 +neglongpi = -3.141592653589793 diff --git a/integ-tests/data/fruit.toml b/integ-tests/data/fruit.toml new file mode 100644 index 0000000..5f590a0 --- /dev/null +++ b/integ-tests/data/fruit.toml @@ -0,0 +1,18 @@ +[[fruit]] +name = "apple" + +[fruit.physical] +color = "red" +shape = "round" + +[[fruit.variety]] +name = "red delicious" + +[[fruit.variety]] +name = "granny smith" + +[[fruit]] +name = "banana" + +[[fruit.variety]] +name = "plantain" diff --git a/integ-tests/data/inline_tables.toml b/integ-tests/data/inline_tables.toml new file mode 100644 index 0000000..1e54edb --- /dev/null +++ b/integ-tests/data/inline_tables.toml @@ -0,0 +1,13 @@ +a = {} +b = {b=1} +c = { b = 1 } +d = {a=1,b=2} +e = {a=1,b=2,c={}} +f = {a=[ +]} +g = {"a"=[ +]} +h = [ + {}, + {}, +] \ No newline at end of file diff --git a/integ-tests/data/integers.toml b/integ-tests/data/integers.toml new file mode 100644 index 0000000..2139202 --- /dev/null +++ b/integ-tests/data/integers.toml @@ -0,0 +1,20 @@ +answer = 42 +neganswer = -42 + +neg_zero = -0 +pos_zero = +0 + +# hexadecimal with prefix `0x` +hex1 = 0xDEADBEEF +hex2 = 0xdeadbeef +hex3 = 0xdead_beef + +# octal with prefix `0o` +oct1 = 0o01234567 +oct2 = 0o755 # useful for Unix file permissions + +# binary with prefix `0b` +bin1 = 0b11010110 + +long-answer = 9223372036854775807 +long-neganswer = -9223372036854775808 diff --git a/integ-tests/data/key_names.toml b/integ-tests/data/key_names.toml new file mode 100644 index 0000000..82a7126 --- /dev/null +++ b/integ-tests/data/key_names.toml @@ -0,0 +1,14 @@ +foo = 3 +foo_3 = 3 +foo_-2--3--r23f--4-f2-4 = 3 +_ = 3 +- = 3 +8 = 8 +"a" = 3 +"!" = 3 +"a^b" = 3 +"\"" = 3 +"character encoding" = "value" +'ʎǝʞ' = "value" +"key#name" = 5 +"~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'" = 1 diff --git a/integ-tests/data/strings.toml b/integ-tests/data/strings.toml new file mode 100644 index 0000000..d7a682e --- /dev/null +++ b/integ-tests/data/strings.toml @@ -0,0 +1,68 @@ +bar = "\U00000000" +unicode = "δ" +key1 = "One\nTwo" +key2 = """One\nTwo""" +empty = "" +key3 = """ +One +Two""" + +key4 = "The quick brown fox jumps over the lazy dog." +key5 = """ +The quick brown \ + + +fox jumps over \ +the lazy dog.""" +key6 = """\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """ +# What you see is what you get. +winpath = 'C:\Users\nodejs\templates' +winpath2 = '\\ServerX\admin$\system32\' +quoted = 'Tom "Dubs" Preston-Werner' +regex = '<\i\c*\s*>' + +regex2 = '''I [dw]on't need \d{2} apples''' +lines = ''' +The first newline is +trimmed in raw strings. +All other whitespace +is preserved. +''' +firstnl = ''' +This string has a ' quote character.''' +# literals with various escapes +lbackspace = 'This string has a \b backspace character.' +ltab = 'This string has a \t tab character.' +lnewline = 'This string has a \n new line character.' +lformfeed = 'This string has a \f form feed character.' +lcarriage = 'This string has a \r carriage return character.' +lslash = 'This string has a \/ slash character.' +lbackslash = 'This string has a \\ backslash character.' + +# non-literals with various escapes +backspace = "This string has a \b backspace character." +tab = "This string has a \t tab character." +newline = "This string has a \n new line character." +formfeed = "This string has a \f form feed character." +carriage = "This string has a \r carriage return character." +quote = "This string has a \" quote character." +slash = "This string has a / slash character." +backslash = "This string has a \\ backslash character." +notunicode1 = "This string does not have a unicode \\u escape." +notunicode2 = "This string does not have a unicode \u005Cu escape." +notunicode3 = "This string does not have a unicode \\u0075 escape." +notunicode4 = "This string does not have a unicode \\\u0075 escape." +delete = "This string has a \u007F delete control code." +unitseparator = "This string has a \u001F unit separator control code." + +# unicode escapes +answer1 = "\u000B" +answer4 = "\u03B4α" +answer8 = "\U000003B4β" +answer9 = "\uc0de" +answer10 = "\u03b4α" +answer11 = "\U0000abc1" diff --git a/integ-tests/data/table_names.toml b/integ-tests/data/table_names.toml new file mode 100644 index 0000000..fa26aa7 --- /dev/null +++ b/integ-tests/data/table_names.toml @@ -0,0 +1,6 @@ +[a."b"] +["f f"] +["f.f"] +["\""] +['a.a'] +['""'] diff --git a/integ-tests/data/tables_in_arrays.toml b/integ-tests/data/tables_in_arrays.toml new file mode 100644 index 0000000..c058e07 --- /dev/null +++ b/integ-tests/data/tables_in_arrays.toml @@ -0,0 +1,9 @@ +[[foo]] +#… +[foo.bar] +#… + +[[foo]] # ... +#… +[foo.bar] +#... diff --git a/integ-tests/data/underscores.toml b/integ-tests/data/underscores.toml new file mode 100644 index 0000000..7780dc7 --- /dev/null +++ b/integ-tests/data/underscores.toml @@ -0,0 +1,5 @@ +ten = 1_0 +hundred = 1_0_0 +thousand = 1_000 +thousand-pos = +1_000 +thousand-neg =-1_000 diff --git a/integ-tests/src/lib.rs b/integ-tests/src/lib.rs new file mode 100644 index 0000000..d511976 --- /dev/null +++ b/integ-tests/src/lib.rs @@ -0,0 +1,122 @@ +/// Loads a valid toml file and does a snapshot assertion against `toml` +#[macro_export] +macro_rules! valid { + ($name:ident) => { + #[test] + fn $name() { + let toml_str = + std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) + .expect(concat!("failed to load ", stringify!($name), ".toml")); + let valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); + insta::assert_json_snapshot!(valid_toml); + } + }; + ($name:ident, $toml:literal) => { + #[test] + fn $name() { + let valid_toml = toml_file::parse($toml).expect("failed to parse toml"); + insta::assert_json_snapshot!(valid_toml); + } + }; +} + +#[macro_export] +macro_rules! unexpected { + ($name:ident, $err:expr, $toml:expr) => {{ + let file = $crate::File::new(stringify!($name), $toml); + let error = $crate::emit_error(&file, $err); + + panic!("unexpected toml deserialization errors:\n{error}"); + }}; +} + +/// Loads a valid toml file, deserializes it to the specified type and asserts +/// the debug snapshot matches +#[macro_export] +macro_rules! valid_de { + ($name:ident, $kind:ty) => { + #[test] + fn $name() { + let toml_str = + std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) + .expect(concat!("failed to load ", stringify!($name), ".toml")); + let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); + + match $kind::deserialize(&mut valid_toml) { + Ok(de) => { + insta::assert_debug_snapshot!(de); + } + Err(err) => { + $crate::unexpected!($name, err.errors, toml_str); + } + } + } + }; + ($name:ident, $kind:ty, $toml:literal) => { + #[test] + fn $name() { + let mut valid_toml = toml_file::parse($toml).expect("failed to parse toml"); + + match <$kind>::deserialize(&mut valid_toml) { + Ok(de) => { + insta::assert_debug_snapshot!(de); + } + Err(err) => { + $crate::unexpected!($name, err.errors, $toml); + } + } + } + }; +} + +pub type File<'s> = codespan_reporting::files::SimpleFile<&'static str, &'s str>; + +pub fn emit_error(f: &File, error: impl IntoIterator) -> String { + let mut output = codespan_reporting::term::termcolor::NoColor::new(Vec::new()); + + for err in error { + let diag = err.to_diagnostic(()); + codespan_reporting::term::emit( + &mut output, + &codespan_reporting::term::Config::default(), + f, + &diag, + ) + .expect("uhm...oops?"); + } + + String::from_utf8(output.into_inner()).unwrap() +} + +/// Creates a codespan diagnostic for an error and asserts the emitted diagnostic +/// matches a snapshot +#[macro_export] +macro_rules! error_snapshot { + ($name:ident, $err:expr, $toml:expr) => { + let file = $crate::File::new(stringify!($name), $toml); + let error = $crate::emit_error(&file, $err); + insta::assert_snapshot!(error); + }; +} + +/// Loads an invalid toml file and does a snapshot assertion on the error +#[macro_export] +macro_rules! invalid { + ($name:ident) => { + #[test] + fn $name() { + let toml_str = + std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) + .expect(concat!("failed to load ", stringify!($name), ".toml")); + let error = toml_file::parse(toml_str).unwrap_err(); + $crate::error_snapshot!($name, error, &toml_str); + } + }; + ($name:ident, $toml:expr) => { + #[test] + fn $name() { + let error = toml_file::parse($toml).unwrap_err(); + $crate::error_snapshot!($name, Some(error), $toml); + } + }; +} diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs new file mode 100644 index 0000000..c811317 --- /dev/null +++ b/integ-tests/tests/de.rs @@ -0,0 +1,23 @@ +use integ_tests::valid_de; +use toml_file::{de_helpers::*, value::Value, DeserError, Deserialize, Error}; + +#[derive(Debug)] +struct Boop { + s: String, + os: Option, +} + +impl Deserialize for Boop { + fn deserialize<'de>(value: &mut Value<'de>) -> Result { + let mut mh = TableHelper::new(value)?; + + let s = mh.required("s"); + let os = mh.optional("os"); + + mh.finalize(true)?; + + Ok(Self { s, os }) + } +} + +valid_de!(basic_table, Boop, "s = 'boop string'\nos = 20"); diff --git a/integ-tests/tests/parser.rs b/integ-tests/tests/parser.rs new file mode 100644 index 0000000..2ab14f9 --- /dev/null +++ b/integ-tests/tests/parser.rs @@ -0,0 +1,288 @@ +use integ_tests::{invalid, valid}; + +valid!(fruit); +valid!(strings); +valid!(empty_table, "\n[foo]\n"); +valid!(tables_in_arrays); +valid!(blank_literal, "foo = ''"); +// Normally this would be in a file, but it's inlined to avoid git shenanigans +valid!( + crlf, + "\ +[project]\r\n\ +\r\n\ +name = \"splay\"\r\n\ +version = \"0.1.0\"\r\n\ +authors = [\"alex@crichton.co\"]\r\n\ +\r\n\ +[[lib]]\r\n\ +\r\n\ +path = \"lib.rs\"\r\n\ +name = \"splay\"\r\n\ +description = \"\"\"\ +A Rust implementation of a TAR file reader and writer. This library does not\r\n\ +currently handle compression, but it is abstract over all I/O readers and\r\n\ +writers. Additionally, great lengths are taken to ensure that the entire\r\n\ +contents are never required to be entirely resident in memory all at once.\r\n\ +\"\"\"\ +" +); +valid!(many_blank, "foo = \"\"\"\n\n\n\"\"\""); +valid!( + literal_eats_crlf, + "foo = \"\"\"\\\r\n\"\"\" +bar = \"\"\"\\\r\n \r\n \r\n a\"\"\"" +); +invalid!(newline_string, "a = \"\n\""); +invalid!(newline_literal, "a = '\n'"); +valid!(key_names); + +mod stray_cr { + use super::invalid; + + invalid!(single, "\r"); + invalid!(array_value, "a = [ \r ]"); + invalid!(ml_basic, "\"\"\"\r\"\"\""); + invalid!(ml_more_whitespace, "\"\"\" \r \"\"\""); + invalid!(ml_basic2, "'''\r'''"); + invalid!(value_literal, "a = '\r'"); + invalid!(value_str, "a = \"\r\""); +} + +mod bad_leading_zeros { + use super::invalid; + + invalid!(two_zeros, "a = 00"); + invalid!(neg_two_zeros, "a = -00"); + invalid!(pos_two_zeros, "a = +00"); + invalid!(with_dec, "a = 00.0"); + invalid!(neg_with_dec, "a = -00.0"); + invalid!(pos_with_dec, "a = +00.0"); +} + +mod bad_floats { + use super::invalid; + + invalid!(trailing_dec, "a = 0."); + invalid!(trailing_exp, "a = 0.e"); + invalid!(trailing_exp2, "a = 0.E"); + invalid!(trailing_exp3, "a = 0.0E"); + invalid!(trailing_exp4, "a = 0.0e"); + invalid!(trailing_neg, "a = 0.0e-"); + invalid!(trailing_pos, "a = 0.0e+"); +} + +mod bad_keys { + use super::invalid; + + invalid!(newline, "key\n=3"); + invalid!(newline_after_equal, "key=\n3"); + invalid!(pipe, "key|=3"); + invalid!(none, "=3"); + invalid!(empty_pipe, "\"\"|=3"); + invalid!(newline2, "\"\n\"|=3"); + invalid!(cr, "\"\r\"|=3"); + invalid!(mutli_line, "''''''=3"); + invalid!(multi_line2, r#"""""""=3"#); + invalid!(multi_line3, "'''key'''=3"); + invalid!(multi_line4, r#""""key"""=3"#); +} + +valid!(table_names); + +mod bad_table_names { + use super::invalid; + + invalid!(empty, "[]"); + invalid!(period, "[.]"); + invalid!(trailing_period, "[a.]"); + invalid!(exclamation, "[!]"); + invalid!(newline, "[\"\n\"]"); + invalid!(redefinition, "[a.b]\n[a.\"b\"]"); + invalid!(unterminated_one, "[']"); + invalid!(unterminated_three, "[''']"); + invalid!(multi_empty, "['''''']"); + invalid!(multi_foo, "['''foo''']"); + invalid!(multi_bar, r#"["""bar"""]"#); + invalid!(newline_literal, "['\n']"); + invalid!(crlf_literal, "['\r\n']"); +} + +// Outside the positive range of an i64 +invalid!(integer_range_positive, "a = 9223372036854775808"); +// Outside negative range of an i64 +invalid!(integer_range_negative, "a = -9223372036854775809"); +invalid!(bare_number, "4"); + +valid!(inline_tables); + +mod bad_inline_tables { + use super::invalid; + + invalid!(trailing_comma, "a = {a=1,}"); + invalid!(only_comma, "a = {,}"); + invalid!(duplicate_key, "a = {a=1,a=1}"); + invalid!(newline, "a = {\n}"); + invalid!(eof, "a = {"); +} + +valid!(underscores); + +mod bad_underscores { + use super::invalid; + + invalid!(trailing, "foo = 0_"); + invalid!(trailing2, "foo = 1_0_"); + invalid!(double, "foo = 0__0"); + invalid!(double_leading, "foo = __0"); +} + +invalid!(bad_codepoint, "foo = \"\\uD800\""); +valid!(empty_string, r#"foo = """#); +valid!(key_no_space, "foo=42"); + +mod bad_strings { + use super::invalid; + + invalid!(hex, "foo = \"\\uxx\""); + invalid!(hex2, "foo = \"\\u\""); + invalid!(unterminated, r#"foo = "\"#); + invalid!(unterminated_literal, "foo = '"); +} + +valid!(booleans); + +mod bad_booleans { + use super::invalid; + + invalid!(true_trailing, "foo = true2"); + invalid!(false_trailing, "foo = false2"); + invalid!(leading_t, "foo = t2"); + invalid!(leading_f, "foo = f2"); +} + +mod bad_nesting { + use super::invalid; + + invalid!( + key_then_array, + " + a = [2] + [[a]] + b = 5 + " + ); + invalid!( + key_then_dotted, + " + a = 1 + [a.b] + " + ); + invalid!( + array_then_dotted, + " + a = [] + [a.b] + " + ); + invalid!( + array_then_dotted_array, + " + a = [] + [[a.b]] + " + ); + invalid!( + inline, + " + [a] + b = { c = 2, d = {} } + [a.b] + c = 2 + " + ); +} + +mod redefine { + use super::invalid; + + invalid!( + table_then_dotted_then_table_again, + r#" +[a] +foo="bar" +[a.b] +foo="bar" +[a] +"# + ); + invalid!( + table_then_table, + r#" +[a] +foo="bar" +b = { foo = "bar" } +[a] +"# + ); + invalid!( + table_then_dotted, + " + [a] + b = {} + [a.b] + " + ); + invalid!( + table_then_inline, + " + [a] + b = {} + [a] + " + ); +} + +mod datetimes { + use super::invalid; + + invalid!(utc, "utc = 2016-09-09T09:09:09Z"); + invalid!(utc_punkt, "utc = 2016-09-09T09:09:09.1Z"); + invalid!(tz, "tz = 2016-09-09T09:09:09.2+10:00"); + invalid!(tz_neg, "tz = 2016-09-09T09:09:09.123456789-02:00"); + invalid!(utc_trailing_dot, "utc = 2016-09-09T09:09:09.Z"); + invalid!(utc_invalid, "utc = 2016-9-09T09:09:09Z"); + invalid!(tz2, "tz = 2016-09-09T09:09:09+2:00"); + invalid!(tz_neg2, "tz = 2016-09-09T09:09:09-2:00"); + invalid!(tz_neg3, "tz = 2016-09-09T09:09:09Z-2:00"); +} + +mod require_newlines { + use super::invalid; + + invalid!(basic, "0=0r=false"); + invalid!( + strings, + r#" +0=""o=""m=""r=""00="0"q="""0"""e="""0""" +"# + ); + invalid!( + tables, + r#" +[[0000l0]] +0="0"[[0000l0]] +0="0"[[0000l0]] +0="0"l="0" +"# + ); + invalid!( + arrays, + r#" +0=[0]00=[0,0,0]t=["0","0","0"]s=[1000-00-00T00:00:00Z,2000-00-00T00:00:00Z] +"# + ); + invalid!(basic2, "0=0r0=0r=false"); + invalid!(basic3, "0=0r0=0r=falsefal=false"); +} diff --git a/integ-tests/tests/snapshots/de__basic_table.snap b/integ-tests/tests/snapshots/de__basic_table.snap new file mode 100644 index 0000000..bdca267 --- /dev/null +++ b/integ-tests/tests/snapshots/de__basic_table.snap @@ -0,0 +1,10 @@ +--- +source: integ-tests/tests/de.rs +expression: de +--- +Boop { + s: "boop string", + os: Some( + 20, + ), +} diff --git a/integ-tests/tests/snapshots/parser__bad_booleans__false_trailing.snap b/integ-tests/tests/snapshots/parser__bad_booleans__false_trailing.snap new file mode 100644 index 0000000..071160c --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_booleans__false_trailing.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unquoted-string]: + ┌─ false_trailing:1:7 + │ +1 │ foo = false2 + │ ^^^^^^ string is not quoted + + diff --git a/integ-tests/tests/snapshots/parser__bad_booleans__leading_f.snap b/integ-tests/tests/snapshots/parser__bad_booleans__leading_f.snap new file mode 100644 index 0000000..4b9b687 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_booleans__leading_f.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unquoted-string]: + ┌─ leading_f:1:7 + │ +1 │ foo = f2 + │ ^^ string is not quoted + + diff --git a/integ-tests/tests/snapshots/parser__bad_booleans__leading_t.snap b/integ-tests/tests/snapshots/parser__bad_booleans__leading_t.snap new file mode 100644 index 0000000..41083a6 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_booleans__leading_t.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unquoted-string]: + ┌─ leading_t:1:7 + │ +1 │ foo = t2 + │ ^^ string is not quoted + + diff --git a/integ-tests/tests/snapshots/parser__bad_booleans__true_trailing.snap b/integ-tests/tests/snapshots/parser__bad_booleans__true_trailing.snap new file mode 100644 index 0000000..c427956 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_booleans__true_trailing.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unquoted-string]: + ┌─ true_trailing:1:7 + │ +1 │ foo = true2 + │ ^^^^^ string is not quoted + + diff --git a/integ-tests/tests/snapshots/parser__bad_codepoint.snap b/integ-tests/tests/snapshots/parser__bad_codepoint.snap new file mode 100644 index 0000000..1321810 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_codepoint.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-escape-value]: + ┌─ bad_codepoint:1:9 + │ +1 │ foo = "\uD800" + │ ^^^^ invalid escape value + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_dec.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_dec.snap new file mode 100644 index 0000000..9690ad5 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_dec.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_dec:1:7 + │ +1 │ a = 0. + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp.snap new file mode 100644 index 0000000..d4e893c --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_exp:1:7 + │ +1 │ a = 0.e + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp2.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp2.snap new file mode 100644 index 0000000..115222a --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_exp2:1:7 + │ +1 │ a = 0.E + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp3.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp3.snap new file mode 100644 index 0000000..4af3d5f --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp3.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_exp3:1:5 + │ +1 │ a = 0.0E + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp4.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp4.snap new file mode 100644 index 0000000..ca64f3f --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_exp4.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_exp4:1:5 + │ +1 │ a = 0.0e + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_neg.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_neg.snap new file mode 100644 index 0000000..c71b460 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_neg.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_neg:1:9 + │ +1 │ a = 0.0e- + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_floats__trailing_pos.snap b/integ-tests/tests/snapshots/parser__bad_floats__trailing_pos.snap new file mode 100644 index 0000000..17006a0 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_floats__trailing_pos.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing_pos:1:5 + │ +1 │ a = 0.0e+ + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_inline_tables__duplicate_key.snap b/integ-tests/tests/snapshots/parser__bad_inline_tables__duplicate_key.snap new file mode 100644 index 0000000..0acec9e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_inline_tables__duplicate_key.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ duplicate_key:1:10 + │ +1 │ a = {a=1,a=1} + │ - ^ duplicate key + │ │ + │ first key instance + + diff --git a/integ-tests/tests/snapshots/parser__bad_inline_tables__eof.snap b/integ-tests/tests/snapshots/parser__bad_inline_tables__eof.snap new file mode 100644 index 0000000..77ff0a8 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_inline_tables__eof.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ eof:1:6 + │ +1 │ a = { + │ ^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_inline_tables__newline.snap b/integ-tests/tests/snapshots/parser__bad_inline_tables__newline.snap new file mode 100644 index 0000000..70363ec --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_inline_tables__newline.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ newline:1:6 + │ +1 │ a = { + │ ╭─────^ +2 │ │ } + │ ╰─^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_inline_tables__only_comma.snap b/integ-tests/tests/snapshots/parser__bad_inline_tables__only_comma.snap new file mode 100644 index 0000000..629c5a3 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_inline_tables__only_comma.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ only_comma:1:6 + │ +1 │ a = {,} + │ ^^^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_inline_tables__trailing_comma.snap b/integ-tests/tests/snapshots/parser__bad_inline_tables__trailing_comma.snap new file mode 100644 index 0000000..aca19f5 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_inline_tables__trailing_comma.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ trailing_comma:1:10 + │ +1 │ a = {a=1,} + │ ^^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__cr.snap b/integ-tests/tests/snapshots/parser__bad_keys__cr.snap new file mode 100644 index 0000000..b34bdda --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__cr.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ cr:1:2 + │ +1 │ " "|=3 + │ invalid character '\r' in string + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__empty_pipe.snap b/integ-tests/tests/snapshots/parser__bad_keys__empty_pipe.snap new file mode 100644 index 0000000..0cb1d3b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__empty_pipe.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unexpected]: + ┌─ empty_pipe:1:3 + │ +1 │ ""|=3 + │ ^ unexpected character '|' + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__multi_line2.snap b/integ-tests/tests/snapshots/parser__bad_keys__multi_line2.snap new file mode 100644 index 0000000..b78a41f --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__multi_line2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ multi_line2:1:1 + │ +1 │ """"""=3 + │ ^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__multi_line3.snap b/integ-tests/tests/snapshots/parser__bad_keys__multi_line3.snap new file mode 100644 index 0000000..d161da3 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__multi_line3.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ multi_line3:1:1 + │ +1 │ '''key'''=3 + │ ^^^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__multi_line4.snap b/integ-tests/tests/snapshots/parser__bad_keys__multi_line4.snap new file mode 100644 index 0000000..0f869ab --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__multi_line4.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ multi_line4:1:1 + │ +1 │ """key"""=3 + │ ^^^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__mutli_line.snap b/integ-tests/tests/snapshots/parser__bad_keys__mutli_line.snap new file mode 100644 index 0000000..50b6e03 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__mutli_line.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ mutli_line:1:1 + │ +1 │ ''''''=3 + │ ^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__newline.snap b/integ-tests/tests/snapshots/parser__bad_keys__newline.snap new file mode 100644 index 0000000..2f2ef3e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__newline.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ newline:1:4 + │ +1 │ key + │ ╭───^ +2 │ │ =3 + │ ╰──^ expected an equals + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__newline2.snap b/integ-tests/tests/snapshots/parser__bad_keys__newline2.snap new file mode 100644 index 0000000..3f6f930 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__newline2.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ newline2:1:2 + │ +1 │ " + │ ╭─^ +2 │ │ "|=3 + │ ╰^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__newline_after_equal.snap b/integ-tests/tests/snapshots/parser__bad_keys__newline_after_equal.snap new file mode 100644 index 0000000..3718b8e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__newline_after_equal.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ newline_after_equal:1:5 + │ +1 │ key= + │ ╭────^ +2 │ │ 3 + │ ╰^ expected a value + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__none.snap b/integ-tests/tests/snapshots/parser__bad_keys__none.snap new file mode 100644 index 0000000..4fd1697 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__none.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ none:1:1 + │ +1 │ =3 + │ ^^^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__pipe.snap b/integ-tests/tests/snapshots/parser__bad_keys__pipe.snap new file mode 100644 index 0000000..104666b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__pipe.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unexpected]: + ┌─ pipe:1:4 + │ +1 │ key|=3 + │ ^ unexpected character '|' + + diff --git a/integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_two_zeros.snap b/integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_two_zeros.snap new file mode 100644 index 0000000..a995c96 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_two_zeros.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ neg_two_zeros:1:7 + │ +1 │ a = -00 + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_with_dec.snap b/integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_with_dec.snap new file mode 100644 index 0000000..8087b2e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_leading_zeros__neg_with_dec.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ neg_with_dec:1:7 + │ +1 │ a = -00.0 + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_two_zeros.snap b/integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_two_zeros.snap new file mode 100644 index 0000000..1ecfc6b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_two_zeros.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ pos_two_zeros:1:7 + │ +1 │ a = +00 + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_with_dec.snap b/integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_with_dec.snap new file mode 100644 index 0000000..06169bf --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_leading_zeros__pos_with_dec.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ pos_with_dec:1:7 + │ +1 │ a = +00.0 + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_leading_zeros__two_zeros.snap b/integ-tests/tests/snapshots/parser__bad_leading_zeros__two_zeros.snap new file mode 100644 index 0000000..aa8562d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_leading_zeros__two_zeros.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ two_zeros:1:6 + │ +1 │ a = 00 + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_leading_zeros__with_dec.snap b/integ-tests/tests/snapshots/parser__bad_leading_zeros__with_dec.snap new file mode 100644 index 0000000..cb6848c --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_leading_zeros__with_dec.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ with_dec:1:6 + │ +1 │ a = 00.0 + │ ^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted.snap b/integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted.snap new file mode 100644 index 0000000..7024b00 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ array_then_dotted:3:10 + │ +2 │ a = [] + │ - first key instance +3 │ [a.b] + │ ^ duplicate key + + diff --git a/integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted_array.snap b/integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted_array.snap new file mode 100644 index 0000000..674c243 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_nesting__array_then_dotted_array.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ array_then_dotted_array:3:11 + │ +2 │ a = [] + │ - first key instance +3 │ [[a.b]] + │ ^ duplicate key + + diff --git a/integ-tests/tests/snapshots/parser__bad_nesting__inline.snap b/integ-tests/tests/snapshots/parser__bad_nesting__inline.snap new file mode 100644 index 0000000..92aadf9 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_nesting__inline.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ inline:4:12 + │ +3 │ b = { c = 2, d = {} } + │ - first key instance +4 │ [a.b] + │ ^ duplicate key + + diff --git a/integ-tests/tests/snapshots/parser__bad_nesting__key_then_array.snap b/integ-tests/tests/snapshots/parser__bad_nesting__key_then_array.snap new file mode 100644 index 0000000..c34b530 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_nesting__key_then_array.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ key_then_array:3:11 + │ +2 │ a = [2] + │ - first key instance +3 │ [[a]] + │ ^ duplicate key + + diff --git a/integ-tests/tests/snapshots/parser__bad_nesting__key_then_dotted.snap b/integ-tests/tests/snapshots/parser__bad_nesting__key_then_dotted.snap new file mode 100644 index 0000000..f5a9fe6 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_nesting__key_then_dotted.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ key_then_dotted:3:10 + │ +2 │ a = 1 + │ - first key instance +3 │ [a.b] + │ ^ duplicate key + + diff --git a/integ-tests/tests/snapshots/parser__bad_strings__hex.snap b/integ-tests/tests/snapshots/parser__bad_strings__hex.snap new file mode 100644 index 0000000..630bf27 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_strings__hex.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-hex-escape]: + ┌─ hex:1:10 + │ +1 │ foo = "\uxx" + │ ^ invalid hex escape 'x' + + diff --git a/integ-tests/tests/snapshots/parser__bad_strings__hex2.snap b/integ-tests/tests/snapshots/parser__bad_strings__hex2.snap new file mode 100644 index 0000000..5cdd597 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_strings__hex2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-hex-escape]: + ┌─ hex2:1:10 + │ +1 │ foo = "\u" + │ ^ invalid hex escape '"' + + diff --git a/integ-tests/tests/snapshots/parser__bad_strings__unterminated.snap b/integ-tests/tests/snapshots/parser__bad_strings__unterminated.snap new file mode 100644 index 0000000..7d041c1 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_strings__unterminated.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unterminated-string]: + ┌─ unterminated:1:7 + │ +1 │ foo = "\ + │ ^ eof reached before string terminator + + diff --git a/integ-tests/tests/snapshots/parser__bad_strings__unterminated_literal.snap b/integ-tests/tests/snapshots/parser__bad_strings__unterminated_literal.snap new file mode 100644 index 0000000..c70e9d0 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_strings__unterminated_literal.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unterminated-string]: + ┌─ unterminated_literal:1:7 + │ +1 │ foo = ' + │ ^ eof reached before string terminator + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__crlf_literal.snap b/integ-tests/tests/snapshots/parser__bad_table_names__crlf_literal.snap new file mode 100644 index 0000000..1bf8651 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__crlf_literal.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ crlf_literal:1:3 + │ +1 │ [' + │ ^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__empty.snap b/integ-tests/tests/snapshots/parser__bad_table_names__empty.snap new file mode 100644 index 0000000..105d996 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__empty.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ empty:1:2 + │ +1 │ [] + │ ^^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__exclamation.snap b/integ-tests/tests/snapshots/parser__bad_table_names__exclamation.snap new file mode 100644 index 0000000..4ec6428 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__exclamation.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unexpected]: + ┌─ exclamation:1:2 + │ +1 │ [!] + │ ^ unexpected character '!' + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__multi_bar.snap b/integ-tests/tests/snapshots/parser__bad_table_names__multi_bar.snap new file mode 100644 index 0000000..2fd4502 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__multi_bar.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ multi_bar:1:2 + │ +1 │ ["""bar"""] + │ ^^^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__multi_empty.snap b/integ-tests/tests/snapshots/parser__bad_table_names__multi_empty.snap new file mode 100644 index 0000000..8c14a85 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__multi_empty.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ multi_empty:1:2 + │ +1 │ [''''''] + │ ^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__multi_foo.snap b/integ-tests/tests/snapshots/parser__bad_table_names__multi_foo.snap new file mode 100644 index 0000000..7a72636 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__multi_foo.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[multiline-string-key]: + ┌─ multi_foo:1:2 + │ +1 │ ['''foo'''] + │ ^^^ multiline keys are not allowed + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__newline.snap b/integ-tests/tests/snapshots/parser__bad_table_names__newline.snap new file mode 100644 index 0000000..2fd381d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__newline.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ newline:1:3 + │ +1 │ [" + │ ╭──^ +2 │ │ "] + │ ╰^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__newline_literal.snap b/integ-tests/tests/snapshots/parser__bad_table_names__newline_literal.snap new file mode 100644 index 0000000..3686ebb --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__newline_literal.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ newline_literal:1:3 + │ +1 │ [' + │ ╭──^ +2 │ │ '] + │ ╰^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__period.snap b/integ-tests/tests/snapshots/parser__bad_table_names__period.snap new file mode 100644 index 0000000..017d060 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__period.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ period:1:2 + │ +1 │ [.] + │ ^^^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__redefinition.snap b/integ-tests/tests/snapshots/parser__bad_table_names__redefinition.snap new file mode 100644 index 0000000..9e6f09d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__redefinition.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-table]: + ┌─ redefinition:2:1 + │ +1 │ ╭ [a.b] +2 │ │ [a."b"] + │ │ ^^^^^^^ duplicate table + │ ╰' first table instance + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__trailing_period.snap b/integ-tests/tests/snapshots/parser__bad_table_names__trailing_period.snap new file mode 100644 index 0000000..e07dcde --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__trailing_period.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ trailing_period:1:4 + │ +1 │ [a.] + │ ^^ expected a table key + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__unterminated_one.snap b/integ-tests/tests/snapshots/parser__bad_table_names__unterminated_one.snap new file mode 100644 index 0000000..a6f0f4e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__unterminated_one.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unterminated-string]: + ┌─ unterminated_one:1:2 + │ +1 │ ['] + │ ^ eof reached before string terminator + + diff --git a/integ-tests/tests/snapshots/parser__bad_table_names__unterminated_three.snap b/integ-tests/tests/snapshots/parser__bad_table_names__unterminated_three.snap new file mode 100644 index 0000000..4c66e5d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_table_names__unterminated_three.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unterminated-string]: + ┌─ unterminated_three:1:2 + │ +1 │ ['''] + │ ^ eof reached before string terminator + + diff --git a/integ-tests/tests/snapshots/parser__bad_underscores__double.snap b/integ-tests/tests/snapshots/parser__bad_underscores__double.snap new file mode 100644 index 0000000..b33dd67 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_underscores__double.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ double:1:7 + │ +1 │ foo = 0__0 + │ ^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_underscores__double_leading.snap b/integ-tests/tests/snapshots/parser__bad_underscores__double_leading.snap new file mode 100644 index 0000000..cb29a6b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_underscores__double_leading.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unquoted-string]: + ┌─ double_leading:1:7 + │ +1 │ foo = __0 + │ ^^^ string is not quoted + + diff --git a/integ-tests/tests/snapshots/parser__bad_underscores__trailing.snap b/integ-tests/tests/snapshots/parser__bad_underscores__trailing.snap new file mode 100644 index 0000000..07443ff --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_underscores__trailing.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing:1:7 + │ +1 │ foo = 0_ + │ ^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bad_underscores__trailing2.snap b/integ-tests/tests/snapshots/parser__bad_underscores__trailing2.snap new file mode 100644 index 0000000..acc53a2 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_underscores__trailing2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ trailing2:1:7 + │ +1 │ foo = 1_0_ + │ ^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__bare_key_names.snap b/integ-tests/tests/snapshots/parser__bare_key_names.snap new file mode 100644 index 0000000..89b3387 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bare_key_names.snap @@ -0,0 +1,16 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +"!" = 3 +"\"" = 3 +- = 3 +8 = 8 +_ = 3 +a = 3 +"a^b" = 3 +"character encoding" = 'value' +foo = 3 +foo_-2--3--r23f--4-f2-4 = 3 +foo_3 = 3 +"ʎǝʞ" = 'value' diff --git a/integ-tests/tests/snapshots/parser__bare_number.snap b/integ-tests/tests/snapshots/parser__bare_number.snap new file mode 100644 index 0000000..fdf2653 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bare_number.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ bare_number:1:2 + │ +1 │ 4 + │ ^ expected an equals + + diff --git a/integ-tests/tests/snapshots/parser__blank_literal.snap b/integ-tests/tests/snapshots/parser__blank_literal.snap new file mode 100644 index 0000000..6e287ae --- /dev/null +++ b/integ-tests/tests/snapshots/parser__blank_literal.snap @@ -0,0 +1,7 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "foo": "" +} diff --git a/integ-tests/tests/snapshots/parser__booleans.snap b/integ-tests/tests/snapshots/parser__booleans.snap new file mode 100644 index 0000000..7fe2138 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__booleans.snap @@ -0,0 +1,8 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "false": false, + "true": true +} diff --git a/integ-tests/tests/snapshots/parser__crlf.snap b/integ-tests/tests/snapshots/parser__crlf.snap new file mode 100644 index 0000000..9249499 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__crlf.snap @@ -0,0 +1,20 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "lib": [ + { + "description": "A Rust implementation of a TAR file reader and writer. This library does not\ncurrently handle compression, but it is abstract over all I/O readers and\nwriters. Additionally, great lengths are taken to ensure that the entire\ncontents are never required to be entirely resident in memory all at once.\n", + "name": "splay", + "path": "lib.rs" + } + ], + "project": { + "authors": [ + "alex@crichton.co" + ], + "name": "splay", + "version": "0.1.0" + } +} diff --git a/integ-tests/tests/snapshots/parser__datetimes__tz.snap b/integ-tests/tests/snapshots/parser__datetimes__tz.snap new file mode 100644 index 0000000..0a19bd7 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__tz.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ tz:1:6 + │ +1 │ tz = 2016-09-09T09:09:09.2+10:00 + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__tz2.snap b/integ-tests/tests/snapshots/parser__datetimes__tz2.snap new file mode 100644 index 0000000..68620de --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__tz2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ tz2:1:6 + │ +1 │ tz = 2016-09-09T09:09:09+2:00 + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__tz_neg.snap b/integ-tests/tests/snapshots/parser__datetimes__tz_neg.snap new file mode 100644 index 0000000..ca3d668 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__tz_neg.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ tz_neg:1:6 + │ +1 │ tz = 2016-09-09T09:09:09.123456789-02:00 + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__tz_neg2.snap b/integ-tests/tests/snapshots/parser__datetimes__tz_neg2.snap new file mode 100644 index 0000000..75c187e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__tz_neg2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ tz_neg2:1:6 + │ +1 │ tz = 2016-09-09T09:09:09-2:00 + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__tz_neg3.snap b/integ-tests/tests/snapshots/parser__datetimes__tz_neg3.snap new file mode 100644 index 0000000..2150c9d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__tz_neg3.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ tz_neg3:1:6 + │ +1 │ tz = 2016-09-09T09:09:09Z-2:00 + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__utc.snap b/integ-tests/tests/snapshots/parser__datetimes__utc.snap new file mode 100644 index 0000000..805f774 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__utc.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ utc:1:7 + │ +1 │ utc = 2016-09-09T09:09:09Z + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__utc_invalid.snap b/integ-tests/tests/snapshots/parser__datetimes__utc_invalid.snap new file mode 100644 index 0000000..ddf9c2b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__utc_invalid.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ utc_invalid:1:7 + │ +1 │ utc = 2016-9-09T09:09:09Z + │ ^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__utc_punkt.snap b/integ-tests/tests/snapshots/parser__datetimes__utc_punkt.snap new file mode 100644 index 0000000..8549c72 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__utc_punkt.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ utc_punkt:1:7 + │ +1 │ utc = 2016-09-09T09:09:09.1Z + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__datetimes__utc_trailing_dot.snap b/integ-tests/tests/snapshots/parser__datetimes__utc_trailing_dot.snap new file mode 100644 index 0000000..e9eddc9 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__datetimes__utc_trailing_dot.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ utc_trailing_dot:1:7 + │ +1 │ utc = 2016-09-09T09:09:09.Z + │ ^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__empty_string.snap b/integ-tests/tests/snapshots/parser__empty_string.snap new file mode 100644 index 0000000..6e287ae --- /dev/null +++ b/integ-tests/tests/snapshots/parser__empty_string.snap @@ -0,0 +1,7 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "foo": "" +} diff --git a/integ-tests/tests/snapshots/parser__empty_table.snap b/integ-tests/tests/snapshots/parser__empty_table.snap new file mode 100644 index 0000000..56da90f --- /dev/null +++ b/integ-tests/tests/snapshots/parser__empty_table.snap @@ -0,0 +1,7 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "foo": {} +} diff --git a/integ-tests/tests/snapshots/parser__floats.snap b/integ-tests/tests/snapshots/parser__floats.snap new file mode 100644 index 0000000..6971dc6 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__floats.snap @@ -0,0 +1,16 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +multi-dec-exp = 1.001 +neg-exp = 1.0 +normal = 1.0 +normal-exp = 1.0 +normal-neg-exp = 1.0 +normal-pos-exp = 1.0 +twenty = 20.0 +twenty-exp = 200000000000.0 +twenty-punkt-ett = 201000000000.0 +two-exp = 20000000000.0 +two-neg-exp = 0.0000000002 +two-pos-exp = 20000000000.0 diff --git a/integ-tests/tests/snapshots/parser__fruit.snap b/integ-tests/tests/snapshots/parser__fruit.snap new file mode 100644 index 0000000..33df175 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__fruit.snap @@ -0,0 +1,31 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "fruit": [ + { + "name": "apple", + "physical": { + "color": "red", + "shape": "round" + }, + "variety": [ + { + "name": "red delicious" + }, + { + "name": "granny smith" + } + ] + }, + { + "name": "banana", + "variety": [ + { + "name": "plantain" + } + ] + } + ] +} diff --git a/integ-tests/tests/snapshots/parser__inline_tables.snap b/integ-tests/tests/snapshots/parser__inline_tables.snap new file mode 100644 index 0000000..2e754aa --- /dev/null +++ b/integ-tests/tests/snapshots/parser__inline_tables.snap @@ -0,0 +1,32 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "a": {}, + "b": { + "b": 1 + }, + "c": { + "b": 1 + }, + "d": { + "a": 1, + "b": 2 + }, + "e": { + "a": 1, + "b": 2, + "c": {} + }, + "f": { + "a": [] + }, + "g": { + "a": [] + }, + "h": [ + {}, + {} + ] +} diff --git a/integ-tests/tests/snapshots/parser__integer_range_negative.snap b/integ-tests/tests/snapshots/parser__integer_range_negative.snap new file mode 100644 index 0000000..bbaa8fa --- /dev/null +++ b/integ-tests/tests/snapshots/parser__integer_range_negative.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ integer_range_negative:1:5 + │ +1 │ a = -9223372036854775809 + │ ^^^^^^^^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__integer_range_positive.snap b/integ-tests/tests/snapshots/parser__integer_range_positive.snap new file mode 100644 index 0000000..3553ae1 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__integer_range_positive.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ integer_range_positive:1:5 + │ +1 │ a = 9223372036854775808 + │ ^^^^^^^^^^^^^^^^^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__key_names.snap b/integ-tests/tests/snapshots/parser__key_names.snap new file mode 100644 index 0000000..5e4d988 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__key_names.snap @@ -0,0 +1,20 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "!": 3, + "\"": 3, + "-": 3, + "8": 8, + "_": 3, + "a": 3, + "a^b": 3, + "character encoding": "value", + "foo": 3, + "foo_-2--3--r23f--4-f2-4": 3, + "foo_3": 3, + "key#name": 5, + "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'": 1, + "ʎǝʞ": "value" +} diff --git a/integ-tests/tests/snapshots/parser__key_no_space.snap b/integ-tests/tests/snapshots/parser__key_no_space.snap new file mode 100644 index 0000000..94729bf --- /dev/null +++ b/integ-tests/tests/snapshots/parser__key_no_space.snap @@ -0,0 +1,7 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "foo": 42 +} diff --git a/integ-tests/tests/snapshots/parser__literal_eats_crlf.snap b/integ-tests/tests/snapshots/parser__literal_eats_crlf.snap new file mode 100644 index 0000000..722055f --- /dev/null +++ b/integ-tests/tests/snapshots/parser__literal_eats_crlf.snap @@ -0,0 +1,8 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "bar": "a", + "foo": "" +} diff --git a/integ-tests/tests/snapshots/parser__many_blank.snap b/integ-tests/tests/snapshots/parser__many_blank.snap new file mode 100644 index 0000000..1ad11f1 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__many_blank.snap @@ -0,0 +1,7 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "foo": "\n\n" +} diff --git a/integ-tests/tests/snapshots/parser__newline_literal.snap b/integ-tests/tests/snapshots/parser__newline_literal.snap new file mode 100644 index 0000000..3a8bfee --- /dev/null +++ b/integ-tests/tests/snapshots/parser__newline_literal.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ newline_literal:1:6 + │ +1 │ a = ' + │ ╭─────^ +2 │ │ ' + │ ╰^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__newline_string.snap b/integ-tests/tests/snapshots/parser__newline_string.snap new file mode 100644 index 0000000..ee2e7ec --- /dev/null +++ b/integ-tests/tests/snapshots/parser__newline_string.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ newline_string:1:6 + │ +1 │ a = " + │ ╭─────^ +2 │ │ " + │ ╰^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__redefine__table_then_dotted.snap b/integ-tests/tests/snapshots/parser__redefine__table_then_dotted.snap new file mode 100644 index 0000000..4feb500 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__redefine__table_then_dotted.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-key]: + ┌─ table_then_dotted:4:12 + │ +3 │ b = {} + │ - first key instance +4 │ [a.b] + │ ^ duplicate key + + diff --git a/integ-tests/tests/snapshots/parser__redefine__table_then_dotted_then_table_again.snap b/integ-tests/tests/snapshots/parser__redefine__table_then_dotted_then_table_again.snap new file mode 100644 index 0000000..11f12f7 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__redefine__table_then_dotted_then_table_again.snap @@ -0,0 +1,16 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-table]: + ┌─ table_then_dotted_then_table_again:6:1 + │ +2 │ ╭ [a] +3 │ │ foo="bar" + │ ╰──' first table instance + · │ +6 │ ╭ [a] +7 │ │ + │ ╰^ duplicate table + + diff --git a/integ-tests/tests/snapshots/parser__redefine__table_then_inline.snap b/integ-tests/tests/snapshots/parser__redefine__table_then_inline.snap new file mode 100644 index 0000000..9de3fcc --- /dev/null +++ b/integ-tests/tests/snapshots/parser__redefine__table_then_inline.snap @@ -0,0 +1,15 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-table]: + ┌─ table_then_inline:4:9 + │ +2 │ ╭ [a] +3 │ │ b = {} + │ ╰──' first table instance +4 │ ╭ [a] +5 │ │ + │ ╰^ duplicate table + + diff --git a/integ-tests/tests/snapshots/parser__redefine__table_then_table.snap b/integ-tests/tests/snapshots/parser__redefine__table_then_table.snap new file mode 100644 index 0000000..8eef671 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__redefine__table_then_table.snap @@ -0,0 +1,16 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[duplicate-table]: + ┌─ table_then_table:5:1 + │ +2 │ ╭ [a] +3 │ │ foo="bar" + │ ╰──' first table instance +4 │ b = { foo = "bar" } +5 │ ╭ [a] +6 │ │ + │ ╰^ duplicate table + + diff --git a/integ-tests/tests/snapshots/parser__require_newlines__arrays.snap b/integ-tests/tests/snapshots/parser__require_newlines__arrays.snap new file mode 100644 index 0000000..8a15b8b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__require_newlines__arrays.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ arrays:2:6 + │ +2 │ 0=[0]00=[0,0,0]t=["0","0","0"]s=[1000-00-00T00:00:00Z,2000-00-00T00:00:00Z] + │ ^^^^^^^^^^^^^ expected newline + + diff --git a/integ-tests/tests/snapshots/parser__require_newlines__basic.snap b/integ-tests/tests/snapshots/parser__require_newlines__basic.snap new file mode 100644 index 0000000..57a24cc --- /dev/null +++ b/integ-tests/tests/snapshots/parser__require_newlines__basic.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ basic:1:3 + │ +1 │ 0=0r=false + │ ^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__require_newlines__basic2.snap b/integ-tests/tests/snapshots/parser__require_newlines__basic2.snap new file mode 100644 index 0000000..8c6e37c --- /dev/null +++ b/integ-tests/tests/snapshots/parser__require_newlines__basic2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ basic2:1:3 + │ +1 │ 0=0r0=0r=false + │ ^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__require_newlines__basic3.snap b/integ-tests/tests/snapshots/parser__require_newlines__basic3.snap new file mode 100644 index 0000000..99b8ae7 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__require_newlines__basic3.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-number]: + ┌─ basic3:1:3 + │ +1 │ 0=0r0=0r=falsefal=false + │ ^^^ unable to parse number + + diff --git a/integ-tests/tests/snapshots/parser__require_newlines__strings.snap b/integ-tests/tests/snapshots/parser__require_newlines__strings.snap new file mode 100644 index 0000000..3e45d05 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__require_newlines__strings.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ strings:2:5 + │ +2 │ 0=""o=""m=""r=""00="0"q="""0"""e="""0""" + │ ^^^^^^^^^^^^^ expected newline + + diff --git a/integ-tests/tests/snapshots/parser__require_newlines__tables.snap b/integ-tests/tests/snapshots/parser__require_newlines__tables.snap new file mode 100644 index 0000000..57656b1 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__require_newlines__tables.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[wanted]: + ┌─ tables:3:6 + │ +3 │ 0="0"[[0000l0]] + │ ╭──────^ +4 │ │ 0="0"[[0000l0]] + │ ╰───^ expected newline + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__array_value.snap b/integ-tests/tests/snapshots/parser__stray_cr__array_value.snap new file mode 100644 index 0000000..3d6665e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__array_value.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unexpected]: + ┌─ array_value:1:7 + │ +1 │ a = [ ] + │ unexpected character '\r' + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__ml_basic.snap b/integ-tests/tests/snapshots/parser__stray_cr__ml_basic.snap new file mode 100644 index 0000000..1ea546a --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__ml_basic.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ ml_basic:1:4 + │ +1 │ """ """ + │ invalid character '\r' in string + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__ml_basic2.snap b/integ-tests/tests/snapshots/parser__stray_cr__ml_basic2.snap new file mode 100644 index 0000000..436da78 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__ml_basic2.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ ml_basic2:1:4 + │ +1 │ ''' ''' + │ invalid character '\r' in string + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__ml_more_whitespace.snap b/integ-tests/tests/snapshots/parser__stray_cr__ml_more_whitespace.snap new file mode 100644 index 0000000..876126b --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__ml_more_whitespace.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ ml_more_whitespace:1:6 + │ +1 │ """ """ + │ invalid character '\r' in string + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__single.snap b/integ-tests/tests/snapshots/parser__stray_cr__single.snap new file mode 100644 index 0000000..78206a8 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__single.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unexpected]: + ┌─ single:1:1 + │ +1 │ + │ ^ unexpected character '\r' + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__value_literal.snap b/integ-tests/tests/snapshots/parser__stray_cr__value_literal.snap new file mode 100644 index 0000000..a468d29 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__value_literal.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ value_literal:1:6 + │ +1 │ a = ' ' + │ invalid character '\r' in string + + diff --git a/integ-tests/tests/snapshots/parser__stray_cr__value_str.snap b/integ-tests/tests/snapshots/parser__stray_cr__value_str.snap new file mode 100644 index 0000000..71e0500 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__stray_cr__value_str.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ value_str:1:6 + │ +1 │ a = " " + │ invalid character '\r' in string + + diff --git a/integ-tests/tests/snapshots/parser__strings.snap b/integ-tests/tests/snapshots/parser__strings.snap new file mode 100644 index 0000000..8d61e3c --- /dev/null +++ b/integ-tests/tests/snapshots/parser__strings.snap @@ -0,0 +1,49 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "answer1": "\u000b", + "answer10": "δα", + "answer11": "ꯁ", + "answer4": "δα", + "answer8": "δβ", + "answer9": "샞", + "backslash": "This string has a \\ backslash character.", + "backspace": "This string has a \b backspace character.", + "bar": "\u0000", + "carriage": "This string has a \r carriage return character.", + "delete": "This string has a  delete control code.", + "empty": "", + "firstnl": "This string has a ' quote character.", + "formfeed": "This string has a \f form feed character.", + "key1": "One\nTwo", + "key2": "One\nTwo", + "key3": "One\nTwo", + "key4": "The quick brown fox jumps over the lazy dog.", + "key5": "The quick brown fox jumps over the lazy dog.", + "key6": "The quick brown fox jumps over the lazy dog.", + "lbackslash": "This string has a \\\\ backslash character.", + "lbackspace": "This string has a \\b backspace character.", + "lcarriage": "This string has a \\r carriage return character.", + "lformfeed": "This string has a \\f form feed character.", + "lines": "The first newline is\ntrimmed in raw strings.\nAll other whitespace\nis preserved.\n", + "lnewline": "This string has a \\n new line character.", + "lslash": "This string has a \\/ slash character.", + "ltab": "This string has a \\t tab character.", + "newline": "This string has a \n new line character.", + "notunicode1": "This string does not have a unicode \\u escape.", + "notunicode2": "This string does not have a unicode \\u escape.", + "notunicode3": "This string does not have a unicode \\u0075 escape.", + "notunicode4": "This string does not have a unicode \\u escape.", + "quote": "This string has a \" quote character.", + "quoted": "Tom \"Dubs\" Preston-Werner", + "regex": "<\\i\\c*\\s*>", + "regex2": "I [dw]on't need \\d{2} apples", + "slash": "This string has a / slash character.", + "tab": "This string has a \t tab character.", + "unicode": "δ", + "unitseparator": "This string has a \u001f unit separator control code.", + "winpath": "C:\\Users\\nodejs\\templates", + "winpath2": "\\\\ServerX\\admin$\\system32\\" +} diff --git a/integ-tests/tests/snapshots/parser__table_names.snap b/integ-tests/tests/snapshots/parser__table_names.snap new file mode 100644 index 0000000..a5f2925 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__table_names.snap @@ -0,0 +1,14 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "\"": {}, + "\"\"": {}, + "a": { + "b": {} + }, + "a.a": {}, + "f f": {}, + "f.f": {} +} diff --git a/integ-tests/tests/snapshots/parser__tables_in_arrays.snap b/integ-tests/tests/snapshots/parser__tables_in_arrays.snap new file mode 100644 index 0000000..73d06a2 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__tables_in_arrays.snap @@ -0,0 +1,14 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "foo": [ + { + "bar": {} + }, + { + "bar": {} + } + ] +} diff --git a/integ-tests/tests/snapshots/parser__underscores.snap b/integ-tests/tests/snapshots/parser__underscores.snap new file mode 100644 index 0000000..8e95e71 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__underscores.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: valid_toml +--- +{ + "hundred": 100, + "ten": 10, + "thousand": 1000, + "thousand-neg": -1000, + "thousand-pos": 1000 +} diff --git a/integ-tests/tests/snapshots/valid__arrays__empty.snap b/integ-tests/tests/snapshots/valid__arrays__empty.snap new file mode 100644 index 0000000..aeae8b6 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__empty.snap @@ -0,0 +1,15 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "thevoid": [ + [ + [ + [ + [] + ] + ] + ] + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__heterogenous.snap b/integ-tests/tests/snapshots/valid__arrays__heterogenous.snap new file mode 100644 index 0000000..46197c5 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__heterogenous.snap @@ -0,0 +1,20 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "mixed": [ + [ + 1, + 2 + ], + [ + "a", + "b" + ], + [ + 1.1, + 2.1 + ] + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__ints_and_arrays.snap b/integ-tests/tests/snapshots/valid__arrays__ints_and_arrays.snap new file mode 100644 index 0000000..4cf80fc --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__ints_and_arrays.snap @@ -0,0 +1,12 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "arrays-and-ints": [ + 1, + [ + "Arrays are not integers." + ] + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__ints_and_floats.snap b/integ-tests/tests/snapshots/valid__arrays__ints_and_floats.snap new file mode 100644 index 0000000..e3f8f66 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__ints_and_floats.snap @@ -0,0 +1,10 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "ints-and-floats": [ + 1, + 1.1 + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__nested.snap b/integ-tests/tests/snapshots/valid__arrays__nested.snap new file mode 100644 index 0000000..ecc26b8 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__nested.snap @@ -0,0 +1,14 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "nest": [ + [ + "a" + ], + [ + "b" + ] + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__no_spaces.snap b/integ-tests/tests/snapshots/valid__arrays__no_spaces.snap new file mode 100644 index 0000000..acd277f --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__no_spaces.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "ints": [ + 1, + 2, + 3 + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__one.snap b/integ-tests/tests/snapshots/valid__arrays__one.snap new file mode 100644 index 0000000..d0c39c0 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__one.snap @@ -0,0 +1,12 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "people": [ + { + "first_name": "Bruce", + "last_name": "Springsteen" + } + ] +} diff --git a/integ-tests/tests/snapshots/valid__arrays__strings_and_ints.snap b/integ-tests/tests/snapshots/valid__arrays__strings_and_ints.snap new file mode 100644 index 0000000..4e0c13c --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__strings_and_ints.snap @@ -0,0 +1,10 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "strings-and-ints": [ + "hi", + 42 + ] +} diff --git a/integ-tests/tests/snapshots/valid__comments.snap b/integ-tests/tests/snapshots/valid__comments.snap new file mode 100644 index 0000000..5e848fe --- /dev/null +++ b/integ-tests/tests/snapshots/valid__comments.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "group": { + "answer": 42, + "more": [ + 42, + 42 + ] + } +} diff --git a/integ-tests/tests/snapshots/valid__evil.snap b/integ-tests/tests/snapshots/valid__evil.snap new file mode 100644 index 0000000..65291c6 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__evil.snap @@ -0,0 +1,27 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "the": { + "hard": { + "another_test_string": " Same thing, but with a string #", + "bit#": { + "multi_line_array": [ + "]" + ], + "what?": "You don't think some user won't do that?" + }, + "harder_test_string": " And when \"'s are in the string, along with # \"", + "test_array": [ + "] ", + " # " + ], + "test_array2": [ + "Test #11 ]proved that", + "Experiment #9 was a success" + ] + }, + "test_string": "You'll hate me after this - #" + } +} diff --git a/integ-tests/tests/snapshots/valid__numbers__floats.snap b/integ-tests/tests/snapshots/valid__numbers__floats.snap new file mode 100644 index 0000000..a3bcaa8 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__numbers__floats.snap @@ -0,0 +1,20 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "longpi": 3.141592653589793, + "multi-dec-exp": 1.001, + "neg-exp": 1.0, + "neglongpi": -3.141592653589793, + "normal": 1.0, + "normal-exp": 1.0, + "normal-neg-exp": 1.0, + "normal-pos-exp": 1.0, + "twenty": 20.0, + "twenty-exp": 200000000000.0, + "twenty-punkt-ett": 201000000000.0, + "two-exp": 20000000000.0, + "two-neg-exp": 0.0000000002, + "two-pos-exp": 20000000000.0 +} diff --git a/integ-tests/tests/snapshots/valid__numbers__integers.snap b/integ-tests/tests/snapshots/valid__numbers__integers.snap new file mode 100644 index 0000000..5b963a2 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__numbers__integers.snap @@ -0,0 +1,18 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "answer": 42, + "bin1": 214, + "hex1": 3735928559, + "hex2": 3735928559, + "hex3": 3735928559, + "long-answer": 9223372036854775807, + "long-neganswer": -9223372036854775808, + "neg_zero": 0, + "neganswer": -42, + "oct1": 342391, + "oct2": 493, + "pos_zero": 0 +} diff --git a/integ-tests/tests/snapshots/valid__tables__array_many.snap b/integ-tests/tests/snapshots/valid__tables__array_many.snap new file mode 100644 index 0000000..421f022 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__array_many.snap @@ -0,0 +1,20 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "people": [ + { + "first_name": "Bruce", + "last_name": "Springsteen" + }, + { + "first_name": "Eric", + "last_name": "Clapton" + }, + { + "first_name": "Bob", + "last_name": "Seger" + } + ] +} diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after.snap b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after.snap new file mode 100644 index 0000000..c6949bb --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after.snap @@ -0,0 +1,14 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "a": { + "b": { + "c": { + "answer": 42 + } + }, + "better": 43 + } +} diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before.snap b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before.snap new file mode 100644 index 0000000..c6949bb --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before.snap @@ -0,0 +1,14 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "a": { + "b": { + "c": { + "answer": 42 + } + }, + "better": 43 + } +} diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_array.snap b/integ-tests/tests/snapshots/valid__tables__implicit_array.snap new file mode 100644 index 0000000..5e556b7 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_array.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "albums": { + "songs": [ + { + "name": "Glory Days" + } + ] + } +} diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_groups.snap b/integ-tests/tests/snapshots/valid__tables__implicit_groups.snap new file mode 100644 index 0000000..6d91307 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_groups.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "a": { + "b": { + "c": { + "answer": 42 + } + } + } +} diff --git a/integ-tests/tests/snapshots/valid__tables__nested_arrays.snap b/integ-tests/tests/snapshots/valid__tables__nested_arrays.snap new file mode 100644 index 0000000..cf442ad --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__nested_arrays.snap @@ -0,0 +1,30 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "albums": [ + { + "name": "Born to Run", + "songs": [ + { + "name": "Jungleland" + }, + { + "name": "Meeting Across the River" + } + ] + }, + { + "name": "Born in the USA", + "songs": [ + { + "name": "Glory Days" + }, + { + "name": "Dancing in the Dark" + } + ] + } + ] +} diff --git a/integ-tests/tests/snapshots/valid__tables__sub_empty.snap b/integ-tests/tests/snapshots/valid__tables__sub_empty.snap new file mode 100644 index 0000000..c8bdaf7 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__sub_empty.snap @@ -0,0 +1,9 @@ +--- +source: integ-tests/tests/valid.rs +expression: valid_toml +--- +{ + "a": { + "b": {} + } +} diff --git a/integ-tests/tests/tokens.rs b/integ-tests/tests/tokens.rs new file mode 100644 index 0000000..79e9287 --- /dev/null +++ b/integ-tests/tests/tokens.rs @@ -0,0 +1,168 @@ +use pretty_assertions::assert_eq; +use std::borrow::Cow; +use toml_file::tokens::{Error, Token, Tokenizer}; + +fn err(input: &str, err: Error) { + let mut t = Tokenizer::new(input); + let token = t.next().unwrap_err(); + assert_eq!(token, err); + assert!(t.next().unwrap().is_none()); +} + +#[test] +fn strings() { + fn t(input: &str, val: &str, multiline: bool) { + let mut t = Tokenizer::new(input); + let (_, token) = t.next().unwrap().unwrap(); + assert_eq!( + token, + Token::String { + src: input, + val: Cow::Borrowed(val), + multiline, + } + ); + assert!(t.next().unwrap().is_none()); + } + + // Literal strings + t("''", "", false); + t("''''''", "", true); + t("'''\n'''", "", true); + t("'a'", "a", false); + t("'\"a'", "\"a", false); + t("''''a'''", "'a", true); + t("'''\n'a\n'''", "'a\n", true); + t("'''a\n'a\r\n'''", "a\n'a\n", true); + + // Basic strings + t(r#""""#, "", false); + t(r#""""""""#, "", true); + t(r#""a""#, "a", false); + t(r#""""a""""#, "a", true); + t(r#""\t""#, "\t", false); + t(r#""\u0000""#, "\0", false); + t(r#""\U00000000""#, "\0", false); + t(r#""\U000A0000""#, "\u{A0000}", false); + t(r#""\\t""#, "\\t", false); + t("\"\t\"", "\t", false); + t("\"\"\"\n\t\"\"\"", "\t", true); + t("\"\"\"\\\n\"\"\"", "", true); + t( + "\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", + "", + true, + ); + t(r#""\r""#, "\r", false); + t(r#""\n""#, "\n", false); + t(r#""\b""#, "\u{8}", false); + t(r#""a\fa""#, "a\u{c}a", false); + t(r#""\"a""#, "\"a", false); + t("\"\"\"\na\"\"\"", "a", true); + t("\"\"\"\n\"\"\"", "", true); + t(r#""""a\"""b""""#, "a\"\"\"b", true); + + // Invalid strings + err(r#""\a"#, Error::InvalidEscape(2, 'a')); + err("\"\\\n", Error::InvalidEscape(2, '\n')); + err("\"\\\r\n", Error::InvalidEscape(2, '\n')); + err("\"\\", Error::UnterminatedString(0)); + err("\"\u{0}", Error::InvalidCharInString(1, '\u{0}')); + err(r#""\U00""#, Error::InvalidHexEscape(5, '"')); + err(r#""\U00"#, Error::UnterminatedString(0)); + err(r#""\uD800"#, Error::InvalidEscapeValue(2, 4, 0xd800)); + err( + r#""\UFFFFFFFF"#, + Error::InvalidEscapeValue(2, 8, 0xffff_ffff), + ); +} + +#[test] +fn keylike() { + fn t(input: &str) { + let mut t = Tokenizer::new(input); + let (_, token) = t.next().unwrap().unwrap(); + assert_eq!(token, Token::Keylike(input)); + assert!(t.next().unwrap().is_none()); + } + t("foo"); + t("0bar"); + t("bar0"); + t("1234"); + t("a-b"); + t("a_B"); + t("-_-"); + t("___"); +} + +#[test] +fn all() { + fn t(input: &str, expected: &[((usize, usize), Token, &str)]) { + let mut tokens = Tokenizer::new(input); + let mut actual: Vec<((usize, usize), Token, &str)> = Vec::new(); + while let Some((span, token)) = tokens.next().unwrap() { + actual.push((span.into(), token, &input[span.start..span.end])); + } + for (a, b) in actual.iter().zip(expected) { + assert_eq!(a, b); + } + assert_eq!(actual.len(), expected.len()); + } + + t( + " a ", + &[ + ((0, 1), Token::Whitespace(" "), " "), + ((1, 2), Token::Keylike("a"), "a"), + ((2, 3), Token::Whitespace(" "), " "), + ], + ); + + t( + " a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", + &[ + ((0, 1), Token::Whitespace(" "), " "), + ((1, 2), Token::Keylike("a"), "a"), + ((2, 4), Token::Whitespace("\t "), "\t "), + ((4, 5), Token::LeftBracket, "["), + ((5, 6), Token::LeftBracket, "["), + ((6, 7), Token::RightBracket, "]"), + ((7, 8), Token::RightBracket, "]"), + ((8, 11), Token::Whitespace(" \t "), " \t "), + ((11, 12), Token::LeftBracket, "["), + ((12, 13), Token::RightBracket, "]"), + ((13, 14), Token::Whitespace(" "), " "), + ((14, 15), Token::LeftBrace, "{"), + ((15, 16), Token::RightBrace, "}"), + ((16, 17), Token::Whitespace(" "), " "), + ((17, 18), Token::Comma, ","), + ((18, 19), Token::Whitespace(" "), " "), + ((19, 20), Token::Period, "."), + ((20, 21), Token::Whitespace(" "), " "), + ((21, 22), Token::Equals, "="), + ((22, 23), Token::Newline, "\n"), + ((23, 29), Token::Comment("# foo "), "# foo "), + ((29, 31), Token::Newline, "\r\n"), + ((31, 36), Token::Comment("#foo "), "#foo "), + ((36, 37), Token::Newline, "\n"), + ((37, 38), Token::Whitespace(" "), " "), + ], + ); +} + +#[test] +fn bare_cr_bad() { + err("\r", Error::Unexpected(0, '\r')); + err("'\n", Error::NewlineInString(1)); + err("'\u{0}", Error::InvalidCharInString(1, '\u{0}')); + err("'", Error::UnterminatedString(0)); + err("\u{0}", Error::Unexpected(0, '\u{0}')); +} + +#[test] +fn bad_comment() { + let mut t = Tokenizer::new("#\u{0}"); + t.next().unwrap().unwrap(); + assert_eq!(t.next(), Err(Error::Unexpected(1, '\u{0}'))); + assert!(t.next().unwrap().is_none()); +} diff --git a/integ-tests/tests/valid.rs b/integ-tests/tests/valid.rs new file mode 100644 index 0000000..9e8dbd1 --- /dev/null +++ b/integ-tests/tests/valid.rs @@ -0,0 +1,96 @@ +use integ_tests::valid; + +valid!(comments); +valid!(evil); + +mod arrays { + use super::valid; + + valid!(empty, "thevoid = [[[[[]]]]]"); + valid!(no_spaces, "ints = [1,2,3]"); + valid!(heterogenous, r#"mixed = [[1, 2], ["a", "b"], [1.1, 2.1]]"#); + valid!(nested, r#"nest = [["a"], ["b"]]"#); + valid!(ints_and_floats, "ints-and-floats = [1, 1.1]"); + valid!( + ints_and_arrays, + r#"arrays-and-ints = [1, ["Arrays are not integers."]]"# + ); + valid!(strings_and_ints, r#"strings-and-ints = ["hi", 42]"#); + valid!( + one, + "[[people]]\nfirst_name = \"Bruce\"\nlast_name = \"Springsteen\"" + ); +} + +mod tables { + use super::valid; + + valid!( + implicit_and_explicit_after, + r" +[a.b.c] +answer = 42 + +[a] +better = 43 +" + ); + valid!( + implicit_and_explicit_before, + r" +[a] +better = 43 + +[a.b.c] +answer = 42 +" + ); + valid!(implicit_groups, "[a.b.c]\nanswer = 42"); + valid!(implicit_array, "[[albums.songs]]\nname = \"Glory Days\""); + valid!( + array_many, + r#" +[[people]] +first_name = "Bruce" +last_name = "Springsteen" + +[[people]] +first_name = "Eric" +last_name = "Clapton" + +[[people]] +first_name = "Bob" +last_name = "Seger" +"# + ); + valid!( + nested_arrays, + r#" +[[albums]] +name = "Born to Run" + + [[albums.songs]] + name = "Jungleland" + + [[albums.songs]] + name = "Meeting Across the River" + +[[albums]] +name = "Born in the USA" + + [[albums.songs]] + name = "Glory Days" + + [[albums.songs]] + name = "Dancing in the Dark" +"# + ); + valid!(sub_empty, "[a]\n[a.b]"); +} + +mod numbers { + use super::valid; + + valid!(integers); + valid!(floats); +} diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index 7ddbec9..0000000 --- a/src/error.rs +++ /dev/null @@ -1,193 +0,0 @@ -use std::fmt::{self, Debug, Display}; - -/// Error that can occur when deserializing TOML. -#[derive(Debug)] -pub(super) struct Error { - pub(super) kind: ErrorKind, - pub(super) line: Option, - pub(super) col: usize, - pub(super) at: Option, - pub(super) message: String, - pub(super) key: Vec, -} - -impl std::error::Error for Error {} - -/// Errors that can occur when deserializing a type. -#[derive(Debug)] -pub(super) enum ErrorKind { - /// EOF was reached when looking for a value. - UnexpectedEof, - - /// An invalid character not allowed in a string was found. - InvalidCharInString(char), - - /// An invalid character was found as an escape. - InvalidEscape(char), - - /// An invalid character was found in a hex escape. - InvalidHexEscape(char), - - /// An invalid escape value was specified in a hex escape in a string. - /// - /// Valid values are in the plane of unicode codepoints. - InvalidEscapeValue(u32), - - /// A newline in a string was encountered when one was not allowed. - NewlineInString, - - /// An unexpected character was encountered, typically when looking for a - /// value. - Unexpected(char), - - /// An unterminated string was found where EOF was found before the ending - /// EOF mark. - UnterminatedString, - - /// A newline was found in a table key. - NewlineInTableKey, - - /// A number failed to parse. - NumberInvalid, - - /// Wanted one sort of token, but found another. - Wanted { - /// Expected token type. - expected: &'static str, - /// Actually found token type. - found: &'static str, - }, - - /// A duplicate table definition was found. - DuplicateTable(String), - - /// Duplicate key in table. - DuplicateKey(String), - - /// A previously defined table was redefined as an array. - RedefineAsArray, - - /// Multiline strings are not allowed for key. - MultilineStringKey, - - /// A custom error which could be generated when deserializing a particular - /// type. - Custom, - - /// A tuple with a certain number of elements was expected but something - /// else was found. - ExpectedTuple(usize), - - /// Expected table keys to be in increasing tuple index order, but something - /// else was found. - ExpectedTupleIndex { - /// Expected index. - expected: usize, - /// Key that was specified. - found: String, - }, - - /// An empty table was expected but entries were found. - ExpectedEmptyTable, - - /// Dotted key attempted to extend something that is not a table. - DottedKeyInvalidType, - - /// An unexpected key was encountered. - /// - /// Used when deserializing a struct with a limited set of fields. - UnexpectedKeys { - /// The unexpected keys. - keys: Vec, - /// Keys that may be specified. - available: &'static [&'static str], - }, - - /// Unquoted string was found when quoted one was expected. - UnquotedString, -} - -impl Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.kind { - ErrorKind::UnexpectedEof => f.write_str("unexpected eof encountered")?, - ErrorKind::InvalidCharInString(c) => write!( - f, - "invalid character in string: `{}`", - c.escape_default().collect::() - )?, - ErrorKind::InvalidEscape(c) => write!( - f, - "invalid escape character in string: `{}`", - c.escape_default().collect::() - )?, - ErrorKind::InvalidHexEscape(c) => write!( - f, - "invalid hex escape character in string: `{}`", - c.escape_default().collect::() - )?, - ErrorKind::InvalidEscapeValue(c) => write!(f, "invalid escape value: `{}`", c)?, - ErrorKind::NewlineInString => f.write_str("newline in string found")?, - ErrorKind::Unexpected(ch) => write!( - f, - "unexpected character found: `{}`", - ch.escape_default().collect::() - )?, - ErrorKind::UnterminatedString => f.write_str("unterminated string")?, - ErrorKind::NewlineInTableKey => f.write_str("found newline in table key")?, - ErrorKind::Wanted { expected, found } => { - write!(f, "expected {}, found {}", expected, found)?; - } - ErrorKind::NumberInvalid => f.write_str("invalid number")?, - ErrorKind::DuplicateTable(ref s) => { - write!(f, "redefinition of table `{}`", s)?; - } - ErrorKind::DuplicateKey(ref s) => { - write!(f, "duplicate key: `{}`", s)?; - } - ErrorKind::RedefineAsArray => f.write_str("table redefined as array")?, - ErrorKind::MultilineStringKey => { - f.write_str("multiline strings are not allowed for key")? - } - ErrorKind::Custom => f.write_str(&self.message)?, - ErrorKind::ExpectedTuple(l) => write!(f, "expected table with length {}", l)?, - ErrorKind::ExpectedTupleIndex { - expected, - ref found, - } => write!(f, "expected table key `{}`, but was `{}`", expected, found)?, - ErrorKind::ExpectedEmptyTable => f.write_str("expected empty table")?, - ErrorKind::DottedKeyInvalidType => { - f.write_str("dotted key attempted to extend non-table type")?; - } - ErrorKind::UnexpectedKeys { - ref keys, - available, - } => write!( - f, - "unexpected keys in table: `{:?}`, available keys: `{:?}`", - keys, available - )?, - ErrorKind::UnquotedString => write!( - f, - "invalid TOML value, did you mean to use a quoted string?" - )?, - } - - if !self.key.is_empty() { - write!(f, " for key `")?; - for (i, k) in self.key.iter().enumerate() { - if i > 0 { - write!(f, ".")?; - } - write!(f, "{}", k)?; - } - write!(f, "`")?; - } - - if let Some(line) = self.line { - write!(f, " at line {} column {}", line + 1, self.col + 1)?; - } - - Ok(()) - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 895bd0a..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,536 +0,0 @@ -#![allow( - clippy::all, - dead_code, - unused_variables, - unreachable_code, - unused_imports -)] - -mod de; -mod error; -mod tokens; - -// use serde::{ -// de::{self as des, IntoDeserializer}, -// Deserialize, -// }; -use std::{borrow::Cow, fmt}; - -#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)] -pub struct Span { - pub start: usize, - pub end: usize, -} - -impl Span { - #[inline] - pub fn new(start: usize, end: usize) -> Self { - Self { start, end } - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.start == 0 && self.end == 0 - } -} - -impl From for (usize, usize) { - fn from(Span { start, end }: Span) -> (usize, usize) { - (start, end) - } -} - -impl From> for Span { - fn from(s: std::ops::Range) -> Self { - Self { - start: s.start, - end: s.end, - } - } -} - -impl From for std::ops::Range { - fn from(s: Span) -> Self { - Self { - start: s.start, - end: s.end, - } - } -} - -#[derive(Debug)] -pub struct Value<'de> { - pub value: ValueInner<'de>, - pub span: Span, -} - -#[derive(Debug, Clone)] -pub struct Key<'de> { - pub name: Cow<'de, str>, - pub span: Span, -} - -impl<'de> Ord for Key<'de> { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.name.cmp(&other.name) - } -} - -impl<'de> PartialOrd for Key<'de> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl<'de> PartialEq for Key<'de> { - fn eq(&self, other: &Self) -> bool { - self.name.eq(&other.name) - } -} - -impl<'de> Eq for Key<'de> {} - -pub type Table<'de> = std::collections::BTreeMap, Value<'de>>; -pub type Array<'de> = Vec>; - -#[derive(Debug)] -pub enum ValueInner<'de> { - String(Cow<'de, str>), - Integer(i64), - Float(f64), - Boolean(bool), - Array(Array<'de>), - Table(Table<'de>), -} - -impl<'de> ValueInner<'de> { - pub fn type_str(&self) -> &'static str { - match self { - Self::String(..) => "string", - Self::Integer(..) => "integer", - Self::Float(..) => "float", - Self::Boolean(..) => "boolean", - Self::Array(..) => "array", - Self::Table(..) => "table", - } - } -} - -// impl<'de> des::Deserializer<'de> for Value<'de> { -// type Error = DesErr; - -// fn deserialize_any(self, visitor: V) -> Result -// where -// V: des::Visitor<'de>, -// { -// match self.value { -// ValueInner::Boolean(v) => visitor.visit_bool(v), -// ValueInner::Integer(n) => visitor.visit_i64(n), -// ValueInner::Float(n) => visitor.visit_f64(n), -// ValueInner::String(v) => visitor.visit_str(v.as_ref()), -// ValueInner::Array(v) => { -// let len = v.len(); -// let mut deserializer = SeqDeserializer::new(v); -// let seq = visitor.visit_seq(&mut deserializer)?; -// let remaining = deserializer.iter.len(); -// if remaining == 0 { -// Ok(seq) -// } else { -// Err(des::Error::invalid_length(len, &"fewer elements in array")) -// } -// } -// ValueInner::Table(v) => { -// let len = v.len(); -// let mut deserializer = MapDeserializer::new(v); -// let map = visitor.visit_map(&mut deserializer)?; -// let remaining = deserializer.iter.len(); -// if remaining == 0 { -// Ok(map) -// } else { -// Err(des::Error::invalid_length(len, &"fewer elements in map")) -// } -// } -// } -// } - -// #[inline] -// fn deserialize_enum( -// self, -// _name: &'static str, -// _variants: &'static [&'static str], -// visitor: V, -// ) -> Result -// where -// V: des::Visitor<'de>, -// { -// match self.value { -// ValueInner::String(variant) => visitor.visit_enum(variant.into_deserializer()), -// ValueInner::Table(variant) => { -// if variant.is_empty() { -// Err(des::Error::custom( -// "wanted exactly 1 element, found 0 elements", -// )) -// } else if variant.len() != 1 { -// Err(des::Error::custom( -// "wanted exactly 1 element, more than 1 element", -// )) -// } else { -// let deserializer = MapDeserializer::new(variant); -// visitor.visit_enum(deserializer) -// } -// } -// _ => Err(des::Error::invalid_type( -// des::Unexpected::UnitVariant, -// &"string only", -// )), -// } -// } - -// // `None` is interpreted as a missing field so be sure to implement `Some` -// // as a present field. -// fn deserialize_option(self, visitor: V) -> Result -// where -// V: des::Visitor<'de>, -// { -// visitor.visit_some(self) -// } - -// fn deserialize_newtype_struct( -// self, -// _name: &'static str, -// visitor: V, -// ) -> Result -// where -// V: des::Visitor<'de>, -// { -// visitor.visit_newtype_struct(self) -// } - -// fn deserialize_struct( -// self, -// name: &'static str, -// _fields: &'static [&'static str], -// visitor: V, -// ) -> Result -// where -// V: des::Visitor<'de>, -// { -// if name == super::span_tags::NAME { -// let mut sd = SpanDeserializer::new(self); -// visitor.visit_map(&mut sd) -// } else { -// self.deserialize_any(visitor) -// } -// } - -// serde::forward_to_deserialize_any! { -// bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq -// bytes byte_buf map unit_struct tuple_struct tuple ignored_any identifier -// } -// } - -// struct SeqDeserializer<'de> { -// iter: std::vec::IntoIter>, -// } - -// impl<'de> SeqDeserializer<'de> { -// fn new(vec: Vec>) -> Self { -// SeqDeserializer { -// iter: vec.into_iter(), -// } -// } -// } - -// impl<'de> des::SeqAccess<'de> for SeqDeserializer<'de> { -// type Error = DesErr; - -// fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> -// where -// T: des::DeserializeSeed<'de>, -// { -// match self.iter.next() { -// Some(value) => seed.deserialize(value).map(Some), -// None => Ok(None), -// } -// } - -// fn size_hint(&self) -> Option { -// match self.iter.size_hint() { -// (lower, Some(upper)) if lower == upper => Some(upper), -// _ => None, -// } -// } -// } - -// struct MapDeserializer<'de> { -// iter: as IntoIterator>::IntoIter, -// value: Option>, -// } - -// impl<'de> MapDeserializer<'de> { -// fn new(map: Table<'de>) -> Self { -// MapDeserializer { -// iter: map.into_iter(), -// value: None, -// } -// } -// } - -// impl<'de> des::MapAccess<'de> for MapDeserializer<'de> { -// type Error = DesErr; - -// fn next_key_seed(&mut self, seed: T) -> Result, Self::Error> -// where -// T: des::DeserializeSeed<'de>, -// { -// match self.iter.next() { -// Some((key, value)) => { -// self.value = Some(value); -// seed.deserialize(Value { -// value: ValueInner::String(key.into()), -// span: Default::default(), -// }) -// .map(Some) -// } -// None => Ok(None), -// } -// } - -// fn next_value_seed(&mut self, seed: T) -> Result -// where -// T: des::DeserializeSeed<'de>, -// { -// match self.value.take() { -// Some(value) => seed.deserialize(value), -// None => Err(des::Error::custom("value is missing")), -// } -// } - -// fn size_hint(&self) -> Option { -// match self.iter.size_hint() { -// (lower, Some(upper)) if lower == upper => Some(upper), -// _ => None, -// } -// } -// } - -// impl<'de> des::EnumAccess<'de> for MapDeserializer<'de> { -// type Error = DesErr; -// type Variant = MapEnumDeserializer<'de>; - -// fn variant_seed(mut self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> -// where -// V: des::DeserializeSeed<'de>, -// { -// use des::Error; -// let (key, value) = match self.iter.next() { -// Some(pair) => pair, -// None => { -// return Err(Error::custom( -// "expected table with exactly 1 entry, found empty table", -// )); -// } -// }; - -// let val = seed.deserialize(key.into_deserializer())?; -// let variant = MapEnumDeserializer { value }; - -// Ok((val, variant)) -// } -// } - -// struct SpanDeserializer<'de> { -// value: Option>, -// key: usize, -// } - -// impl<'de> SpanDeserializer<'de> { -// fn new(value: Value<'de>) -> Self { -// Self { -// value: Some(value), -// key: 0, -// } -// } -// } - -// impl<'de> des::MapAccess<'de> for SpanDeserializer<'de> { -// type Error = DesErr; - -// fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> -// where -// K: des::DeserializeSeed<'de>, -// { -// if self.key < super::span_tags::FIELDS.len() { -// seed.deserialize(Value { -// value: ValueInner::String(super::span_tags::FIELDS[self.key].into()), -// span: Default::default(), -// }) -// .map(Some) -// } else { -// Ok(None) -// } -// } - -// fn next_value_seed(&mut self, seed: V) -> Result -// where -// V: des::DeserializeSeed<'de>, -// { -// let res = match self.key { -// 0 => seed.deserialize(Value { -// value: ValueInner::Integer(self.value.as_ref().unwrap().span.start as _), -// span: Default::default(), -// }), -// 1 => seed.deserialize(Value { -// value: ValueInner::Integer(self.value.as_ref().unwrap().span.end as _), -// span: Default::default(), -// }), -// 2 => seed.deserialize(self.value.take().unwrap().into_deserializer()), -// _ => unreachable!(), -// }; - -// self.key += 1; -// res -// } - -// fn size_hint(&self) -> Option { -// Some(super::span_tags::FIELDS.len() - self.key) -// } -// } - -// pub struct MapEnumDeserializer<'de> { -// value: Value<'de>, -// } - -// impl<'de> des::VariantAccess<'de> for MapEnumDeserializer<'de> { -// type Error = DesErr; - -// fn unit_variant(self) -> Result<(), Self::Error> { -// use des::Error; -// match self.value.0.value { -// ValueInner::Array(values) => { -// if values.is_empty() { -// Ok(()) -// } else { -// Err(Error::custom("expected empty array")) -// } -// } -// ValueInner::Table(values) => { -// if values.is_empty() { -// Ok(()) -// } else { -// Err(Error::custom("expected empty table")) -// } -// } -// e => Err(Error::custom(format!( -// "expected table, found {}", -// e.type_str() -// ))), -// } -// } - -// fn newtype_variant_seed(self, seed: T) -> Result -// where -// T: des::DeserializeSeed<'de>, -// { -// seed.deserialize(self.value.into_deserializer()) -// } - -// fn tuple_variant(self, len: usize, visitor: V) -> Result -// where -// V: des::Visitor<'de>, -// { -// use des::Error; -// match self.value.0.value { -// ValueInner::Array(values) => { -// if values.len() == len { -// serde::de::Deserializer::deserialize_seq(values.into_deserializer(), visitor) -// } else { -// Err(Error::custom(format!("expected tuple with length {}", len))) -// } -// } -// ValueInner::Table(values) => { -// let tuple_values: Result, _> = values -// .into_iter() -// .enumerate() -// .map(|(index, (key, value))| match key.parse::() { -// Ok(key_index) if key_index == index => Ok(value), -// Ok(_) | Err(_) => Err(Error::custom(format!( -// "expected table key `{}`, but was `{}`", -// index, key -// ))), -// }) -// .collect(); -// let tuple_values = tuple_values?; - -// if tuple_values.len() == len { -// serde::de::Deserializer::deserialize_seq( -// tuple_values.into_deserializer(), -// visitor, -// ) -// } else { -// Err(Error::custom(format!("expected tuple with length {}", len))) -// } -// } -// e => Err(Error::custom(format!( -// "expected table, found {}", -// e.type_str() -// ))), -// } -// } - -// fn struct_variant( -// self, -// fields: &'static [&'static str], -// visitor: V, -// ) -> Result -// where -// V: des::Visitor<'de>, -// { -// des::Deserializer::deserialize_struct( -// self.value.into_deserializer(), -// "", // TODO: this should be the variant name -// fields, -// visitor, -// ) -// } -// } - -// impl<'de> des::IntoDeserializer<'de, DesErr> for Value<'de> { -// type Deserializer = Self; - -// fn into_deserializer(self) -> Self { -// self -// } -// } - -// pub fn deserialize_spanned(doc: &str) -> anyhow::Result { -// let root = de::from_str(doc)?; -// Ok(T::deserialize(root.into_deserializer())?) -// } - -#[test] -fn oh_god_please_work() { - let s = r#" -[[fruit]] -name = "apple" - -[fruit.physical] -color = "red" -shape = "round" - -[[fruit.variety]] -name = "red delicious" - -[[fruit.variety]] -name = "granny smith" - -[[fruit]] -name = "banana" - -[[fruit.variety]] -name = "plantain" -"#; - let table = de::from_str(s).unwrap(); - - panic!("{table:?}"); -} diff --git a/toml-file-derive/Cargo.toml b/toml-file-derive/Cargo.toml new file mode 100644 index 0000000..5b43258 --- /dev/null +++ b/toml-file-derive/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "toml-file-derive" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +proc-macro2.workspace = true +quote.workspace = true +syn.workspace = true diff --git a/toml-file-derive/src/de.rs b/toml-file-derive/src/de.rs new file mode 100644 index 0000000..f1a1de0 --- /dev/null +++ b/toml-file-derive/src/de.rs @@ -0,0 +1,5 @@ +use proc_macro2::TokenStream; + +pub(super) fn expand(_input: &mut syn::DeriveInput) -> syn::Result { + unimplemented!() +} diff --git a/toml-file-derive/src/lib.rs b/toml-file-derive/src/lib.rs new file mode 100644 index 0000000..0ed41fb --- /dev/null +++ b/toml-file-derive/src/lib.rs @@ -0,0 +1,9 @@ +mod de; + +#[proc_macro_derive(Deserialize, attributes(toml))] +pub fn derive_deserialize(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let mut input = syn::parse_macro_input!(input as syn::DeriveInput); + de::expand(&mut input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() +} diff --git a/toml-file/Cargo.toml b/toml-file/Cargo.toml new file mode 100644 index 0000000..ec1b6ed --- /dev/null +++ b/toml-file/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "toml-file" +version = "0.1.0" +edition = "2021" + +[features] +serde = ["dep:serde"] +reporting = ["dep:codespan-reporting"] + +[dependencies] +codespan-reporting = { version = "0.11", optional = true } +identconv = "0.2" +serde = { version = "1.0", optional = true } +smallvec = "1.13" diff --git a/src/de.rs b/toml-file/src/de.rs similarity index 75% rename from src/de.rs rename to toml-file/src/de.rs index 3c391d2..26c4a5a 100644 --- a/src/de.rs +++ b/toml-file/src/de.rs @@ -1,21 +1,20 @@ -use super::{ +use crate::{ error::{Error, ErrorKind}, tokens::{Error as TokenError, Token, Tokenizer}, - Key, Span, Value, ValueInner, + value::{self, Key, Value, ValueInner}, + Span, }; use smallvec::SmallVec; use std::{ borrow::Cow, - collections::{btree_map::Entry, BTreeMap, BTreeSet}, - fmt::Display, - iter, str, vec, + collections::{btree_map::Entry, BTreeMap}, }; type DeStr<'de> = Cow<'de, str>; type TablePair<'de> = (Key<'de>, Val<'de>); type InlineVec = SmallVec<[T; 5]>; -pub fn from_str(s: &str) -> Result, Error> { +pub fn parse(s: &str) -> Result, Error> { let mut de = Deserializer::new(s); let mut tables = de.tables()?; @@ -30,12 +29,16 @@ pub fn from_str(s: &str) -> Result, Error> { table_pindices: &table_pindices, de: &de, values: None, + max: tables.len(), }; - let mut root = super::Table::new(); + let mut root = value::Table::new(); deserialize_table(root_ctx, &mut tables, &mut root)?; - Ok(root) + Ok(Value::with_span( + ValueInner::Table(root), + Span::new(0, s.len()), + )) } struct Deserializer<'a> { @@ -47,7 +50,7 @@ struct Ctx<'de, 'b> { depth: usize, cur: usize, cur_parent: usize, - //max: usize, + max: usize, table_indices: &'b BTreeMap>, Vec>, table_pindices: &'b BTreeMap>, Vec>, de: &'b Deserializer<'de>, @@ -57,22 +60,34 @@ struct Ctx<'de, 'b> { impl<'de, 'b> Ctx<'de, 'b> { #[inline] - fn error(&self, at: usize, kind: ErrorKind) -> Error { - self.de.error(at, kind) + fn error(&self, start: usize, end: Option, kind: ErrorKind) -> Error { + self.de.error(start, end, kind) } } +macro_rules! printc { + ($c:expr, $($arg:tt)*) => {{ + let ctx = $c; + for _ in 0..ctx.depth { + eprint!(" "); + } + + eprint!("{}:{} {}:{} ", file!(), line!(), ctx.cur_parent, ctx.cur); + + eprintln!($($arg)*); + }}; +} + fn deserialize_table<'de, 'b>( mut ctx: Ctx<'de, 'b>, tables: &'b mut [Table<'de>], - table: &mut super::Table<'de>, + table: &mut value::Table<'de>, ) -> Result { - let max = tables.len(); - - while ctx.cur_parent < max && ctx.cur < max { + while ctx.cur_parent < ctx.max && ctx.cur < ctx.max { if let Some(values) = ctx.values.take() { for (key, val) in values { - table_insert(table, key, val)?; + printc!(&ctx, "{} => {val:?}", key.name); + table_insert(table, key, val, ctx.de)?; } } @@ -90,7 +105,7 @@ fn deserialize_table<'de, 'b>( } entries[start..].iter().find_map(|i| { let i = *i; - (i < max && tables[i].values.is_some()).then_some(i) + (i < ctx.max && tables[i].values.is_some()).then_some(i) }) }) }; @@ -99,29 +114,38 @@ fn deserialize_table<'de, 'b>( break; }; - ctx.cur = dbg!(pos); + ctx.cur = pos; + printc!(&ctx, "next table"); // Test to see if we're duplicating our parent's table, and if so // then this is an error in the toml format if ctx.cur_parent != pos { - if tables[ctx.cur_parent].header == tables[pos].header { - let at = tables[pos].at; - let name = tables[pos].header.iter().fold(String::new(), |mut s, k| { + let cur = &tables[pos]; + let parent = &tables[ctx.cur_parent]; + if parent.header == cur.header { + let name = cur.header.iter().fold(String::new(), |mut s, k| { if !s.is_empty() { s.push('.'); } s.push_str(&k.name); s }); - return Err(ctx.error(at, ErrorKind::DuplicateTable(name))); + + let first = Span::new(parent.at, parent.end); + + return Err(ctx.error( + cur.at, + Some(cur.end), + ErrorKind::DuplicateTable { name, first }, + )); } // If we're here we know we should share the same prefix, and if // the longer table was defined first then we want to narrow // down our parent's length if possible to ensure that we catch // duplicate tables defined afterwards. - let parent_len = tables[ctx.cur_parent].header.len(); - let cur_len = tables[pos].header.len(); + let parent_len = parent.header.len(); + let cur_len = cur.header.len(); if cur_len < parent_len { ctx.cur_parent = pos; } @@ -134,27 +158,31 @@ fn deserialize_table<'de, 'b>( // decoding. if ctx.depth != ttable.header.len() { let key = ttable.header[ctx.depth].clone(); - dbg!(table.keys().map(|k| k.name.as_ref()).collect::>()); - if table.contains_key(dbg!(&key)) { - return Err(Error::from_kind( - Some(key.span.start), - ErrorKind::DuplicateKey(key.name.to_string()), + printc!(&ctx, "need next table '{}'", key.name); + if let Some((k, _)) = table.get_key_value(&key) { + return Err(ctx.error( + key.span.start, + Some(key.span.end), + ErrorKind::DuplicateKey { + key: key.name.to_string(), + first: k.span, + }, )); } - let array = dbg!(ttable.array && ctx.depth == ttable.header.len() - 1); - + let array = ttable.array && ctx.depth == ttable.header.len() - 1; ctx.cur += 1; - dbg!(ctx.cur); + printc!(&ctx, "before"); let cctx = Ctx { - depth: ctx.depth + 1, //if array { 0 } else { 1 }, + depth: ctx.depth + if array { 0 } else { 1 }, + max: ctx.max, cur: 0, - cur_parent: dbg!(pos), + cur_parent: pos, table_indices: ctx.table_indices, table_pindices: ctx.table_pindices, de: ctx.de, - values: array.then(|| ttable.values.take().unwrap()), + values: None, //array.then(|| ttable.values.take().unwrap()), }; let value = if array { @@ -162,18 +190,12 @@ fn deserialize_table<'de, 'b>( deserialize_array(cctx, tables, &mut arr)?; ValueInner::Array(arr) } else { - let mut tab = super::Table::new(); + let mut tab = value::Table::new(); deserialize_table(cctx, tables, &mut tab)?; ValueInner::Table(tab) }; - table.insert( - key, - Value { - value, - span: Span::new(0, 0), - }, - ); + table.insert(key, Value::new(value)); continue; } @@ -182,16 +204,18 @@ fn deserialize_table<'de, 'b>( // [[foo.bar]] // [[foo]] if ttable.array { - return Err(ctx.error(ttable.at, ErrorKind::RedefineAsArray)); + return Err(ctx.error(ttable.at, Some(ttable.end), ErrorKind::RedefineAsArray)); } + printc!(&ctx, "taking values"); ctx.values = ttable.values.take(); } + printc!(&ctx, "done"); Ok(ctx.cur_parent) } -fn to_value(val: Val<'_>) -> Result, Error> { +fn to_value<'de>(val: Val<'de>, de: &Deserializer<'de>) -> Result, Error> { let value = match val.e { E::String(s) => ValueInner::String(s), E::Boolean(b) => ValueInner::Boolean(b), @@ -200,62 +224,59 @@ fn to_value(val: Val<'_>) -> Result, Error> { E::Array(arr) => { let mut varr = Vec::new(); for val in arr { - varr.push(to_value(val)?); + varr.push(to_value(val, de)?); } ValueInner::Array(varr) } E::DottedTable(tab) | E::InlineTable(tab) => { - let mut ntable = super::Table::new(); + let mut ntable = value::Table::new(); for (k, v) in tab { - table_insert(&mut ntable, k, v)?; + table_insert(&mut ntable, k, v, de)?; } ValueInner::Table(ntable) } }; - Ok(Value { - value, - span: Span::new(val.start, val.end), - }) + Ok(Value::with_span(value, Span::new(val.start, val.end))) } fn table_insert<'de>( - table: &mut super::Table<'de>, + table: &mut value::Table<'de>, key: Key<'de>, val: Val<'de>, + de: &Deserializer<'de>, ) -> Result<(), Error> { match table.entry(key.clone()) { - Entry::Occupied(occ) => { - return Err(Error::from_kind( - Some(key.span.start), - ErrorKind::DuplicateKey(key.name.to_string()), - )); - } + Entry::Occupied(occ) => Err(de.error( + key.span.start, + Some(key.span.end), + ErrorKind::DuplicateKey { + key: key.name.to_string(), + first: occ.key().span, + }, + )), Entry::Vacant(vac) => { - vac.insert(to_value(val)?); + vac.insert(to_value(val, de)?); + Ok(()) } } - - Ok(()) } fn deserialize_array<'de, 'b>( mut ctx: Ctx<'de, 'b>, tables: &'b mut [Table<'de>], - arr: &mut Vec>, + arr: &mut Vec>, ) -> Result { - let max = tables.len(); - if let Some(values) = ctx.values.take() { for (key, val) in values { - dbg!(key); - arr.push(to_value(val)?); + printc!(&ctx, "{} => {val:?}", key.name); + arr.push(to_value(val, ctx.de)?); } } - while ctx.cur_parent < max { + while ctx.cur_parent < ctx.max { let header_stripped = tables[ctx.cur_parent] .header .iter() @@ -272,21 +293,37 @@ fn deserialize_array<'de, 'b>( } entries[start..] .iter() - .filter_map(|i| if *i < max { Some(*i) } else { None }) + .filter_map(|i| if *i < ctx.max { Some(*i) } else { None }) .map(|i| (i, &tables[i])) .find(|(_, table)| table.array) .map(|p| p.0) }) - .unwrap_or(max); + .unwrap_or(ctx.max); + + printc!(&ctx, "array enter"); + + let actx = Ctx { + values: Some( + tables[ctx.cur_parent] + .values + .take() + .expect("no array values"), + ), + max: next, + depth: ctx.depth + 1, + cur: 0, + cur_parent: ctx.cur_parent, + table_indices: ctx.table_indices, + table_pindices: ctx.table_pindices, + de: ctx.de, + }; - if let Some(values) = tables[ctx.cur_parent].values.take() { - for (key, val) in values { - dbg!(key); - arr.push(to_value(val)?); - } - } + let mut table = value::Table::new(); + deserialize_table(actx, tables, &mut table)?; + arr.push(Value::new(ValueInner::Table(table))); - ctx.cur_parent = dbg!(next); + ctx.cur_parent = next; + printc!(&ctx, "array advance"); } Ok(ctx.cur_parent) @@ -354,6 +391,7 @@ fn build_table_pindices<'de>(tables: &[Table<'de>]) -> BTreeMap { at: usize, + end: usize, header: InlineVec>, values: Option>>, array: bool, @@ -371,6 +409,7 @@ impl<'a> Deserializer<'a> { let mut tables = Vec::new(); let mut cur_table = Table { at: 0, + end: 0, header: InlineVec::new(), values: None, array: false, @@ -380,6 +419,7 @@ impl<'a> Deserializer<'a> { match line { Line::Table { at, + end, mut header, array, } => { @@ -388,13 +428,13 @@ impl<'a> Deserializer<'a> { } cur_table = Table { at, + end, header: InlineVec::new(), values: Some(Vec::new()), array, }; loop { - let part = header.next().map_err(|e| self.token_error(e)); - match part? { + match header.next().map_err(|e| self.token_error(e))? { Some(part) => cur_table.header.push(part), None => break, } @@ -439,8 +479,10 @@ impl<'a> Deserializer<'a> { let array = self.eat(Token::LeftBracket)?; let ret = Header::new(self.tokens.clone(), array); self.tokens.skip_to_newline(); + let end = self.tokens.current(); Ok(Line::Table { at: start, + end, header: ret, array, }) @@ -498,6 +540,7 @@ impl<'a> Deserializer<'a> { Some(token) => { return Err(self.error( at, + Some(token.0.end), ErrorKind::Wanted { expected: "a value", found: token.1.describe(), @@ -517,7 +560,7 @@ impl<'a> Deserializer<'a> { let first_char = key.chars().next().expect("key should not be empty here"); match first_char { '-' | '0'..='9' => self.number(span, key), - _ => Err(self.error(at, ErrorKind::UnquotedString)), + _ => Err(self.error(at, Some(span.end), ErrorKind::UnquotedString)), } } @@ -549,7 +592,7 @@ impl<'a> Deserializer<'a> { end, }) } - _ => Err(self.error(at, ErrorKind::NumberInvalid)), + _ => Err(self.error(at, Some(end), ErrorKind::InvalidNumber)), } } else if s == "inf" { Ok(Val { @@ -580,11 +623,11 @@ impl<'a> Deserializer<'a> { } } - fn number_leading_plus(&mut self, Span { start, .. }: Span) -> Result, Error> { + fn number_leading_plus(&mut self, Span { start, end }: Span) -> Result, Error> { let start_token = self.tokens.current(); match self.next()? { Some((Span { end, .. }, Token::Keylike(s))) => self.number(Span { start, end }, s), - _ => Err(self.error(start_token, ErrorKind::NumberInvalid)), + _ => Err(self.error(start_token, Some(end), ErrorKind::InvalidNumber)), } } @@ -594,10 +637,10 @@ impl<'a> Deserializer<'a> { let (prefix, suffix) = self.parse_integer(s, allow_sign, allow_leading_zeros, radix)?; let start = self.tokens.substr_offset(s); if !suffix.is_empty() { - return Err(self.error(start, ErrorKind::NumberInvalid)); + return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)); } i64::from_str_radix(prefix.replace('_', "").trim_start_matches('+'), radix) - .map_err(|_e| self.error(start, ErrorKind::NumberInvalid)) + .map_err(|_e| self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)) } fn parse_integer( @@ -613,6 +656,7 @@ impl<'a> Deserializer<'a> { let mut first_zero = false; let mut underscore = false; let mut end = s.len(); + let send = start + s.len(); for (i, c) in s.char_indices() { let at = i + start; if i == 0 && (c == '+' || c == '-') && allow_sign { @@ -623,11 +667,11 @@ impl<'a> Deserializer<'a> { first_zero = true; } else if c.is_digit(radix) { if !first && first_zero && !allow_leading_zeros { - return Err(self.error(at, ErrorKind::NumberInvalid)); + return Err(self.error(at, Some(send), ErrorKind::InvalidNumber)); } underscore = false; } else if c == '_' && first { - return Err(self.error(at, ErrorKind::NumberInvalid)); + return Err(self.error(at, Some(send), ErrorKind::InvalidNumber)); } else if c == '_' && !underscore { underscore = true; } else { @@ -637,7 +681,7 @@ impl<'a> Deserializer<'a> { first = false; } if first || underscore { - return Err(self.error(start, ErrorKind::NumberInvalid)); + return Err(self.error(start, Some(send), ErrorKind::InvalidNumber)); } Ok((&s[..end], &s[end..])) } @@ -649,7 +693,7 @@ impl<'a> Deserializer<'a> { let mut fraction = None; if let Some(after) = after_decimal { if !suffix.is_empty() { - return Err(self.error(start, ErrorKind::NumberInvalid)); + return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)); } let (a, b) = self.parse_integer(after, false, true, 10)?; fraction = Some(a); @@ -662,17 +706,23 @@ impl<'a> Deserializer<'a> { self.eat(Token::Plus)?; match self.next()? { Some((_, Token::Keylike(s))) => self.parse_integer(s, false, true, 10)?, - _ => return Err(self.error(start, ErrorKind::NumberInvalid)), + _ => { + return Err(self.error( + start, + Some(start + s.len()), + ErrorKind::InvalidNumber, + )) + } } } else { self.parse_integer(&suffix[1..], true, true, 10)? }; if !b.is_empty() { - return Err(self.error(start, ErrorKind::NumberInvalid)); + return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)); } exponent = Some(a); } else if !suffix.is_empty() { - return Err(self.error(start, ErrorKind::NumberInvalid)); + return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)); } let mut number = integral @@ -690,12 +740,12 @@ impl<'a> Deserializer<'a> { } number .parse() - .map_err(|_e| self.error(start, ErrorKind::NumberInvalid)) + .map_err(|_e| self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)) .and_then(|n: f64| { if n.is_finite() { Ok(n) } else { - Err(self.error(start, ErrorKind::NumberInvalid)) + Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber)) } }) } @@ -809,8 +859,8 @@ impl<'a> Deserializer<'a> { )) => { return self.add_dotted_key(key_parts, value, v); } - Some(&mut (_, Val { start, .. })) => { - return Err(self.error(start, ErrorKind::DottedKeyInvalidType)); + Some(&mut (_, Val { start, end, .. })) => { + return Err(self.error(start, Some(end), ErrorKind::DottedKeyInvalidType)); } None => {} } @@ -880,36 +930,52 @@ impl<'a> Deserializer<'a> { } fn eof(&self) -> Error { - self.error(self.input.len(), ErrorKind::UnexpectedEof) + self.error(self.input.len(), None, ErrorKind::UnexpectedEof) } fn token_error(&self, error: TokenError) -> Error { match error { TokenError::InvalidCharInString(at, ch) => { - self.error(at, ErrorKind::InvalidCharInString(ch)) + self.error(at, None, ErrorKind::InvalidCharInString(ch)) + } + TokenError::InvalidEscape(at, ch) => self.error(at, None, ErrorKind::InvalidEscape(ch)), + TokenError::InvalidEscapeValue(at, len, v) => { + self.error(at, Some(at + len), ErrorKind::InvalidEscapeValue(v)) + } + TokenError::InvalidHexEscape(at, ch) => { + self.error(at, None, ErrorKind::InvalidHexEscape(ch)) } - TokenError::InvalidEscape(at, ch) => self.error(at, ErrorKind::InvalidEscape(ch)), - TokenError::InvalidEscapeValue(at, v) => { - self.error(at, ErrorKind::InvalidEscapeValue(v)) + TokenError::NewlineInString(at) => { + self.error(at, None, ErrorKind::InvalidCharInString('\n')) } - TokenError::InvalidHexEscape(at, ch) => self.error(at, ErrorKind::InvalidHexEscape(ch)), - TokenError::NewlineInString(at) => self.error(at, ErrorKind::NewlineInString), - TokenError::Unexpected(at, ch) => self.error(at, ErrorKind::Unexpected(ch)), - TokenError::UnterminatedString(at) => self.error(at, ErrorKind::UnterminatedString), - TokenError::NewlineInTableKey(at) => self.error(at, ErrorKind::NewlineInTableKey), + TokenError::Unexpected(at, ch) => self.error(at, None, ErrorKind::Unexpected(ch)), + TokenError::UnterminatedString(at) => { + self.error(at, None, ErrorKind::UnterminatedString) + } + TokenError::NewlineInTableKey(at) => self.error(at, None, ErrorKind::NewlineInTableKey), TokenError::Wanted { at, expected, found, - } => self.error(at, ErrorKind::Wanted { expected, found }), - TokenError::MultilineStringKey(at) => self.error(at, ErrorKind::MultilineStringKey), + } => self.error( + at, + Some(at + found.len()), + ErrorKind::Wanted { expected, found }, + ), + TokenError::MultilineStringKey(at, end) => { + self.error(at, Some(end), ErrorKind::MultilineStringKey) + } } } - fn error(&self, at: usize, kind: ErrorKind) -> Error { - let mut err = Error::from_kind(Some(at), kind); - err.fix_linecol(|at| self.to_linecol(at)); - err + fn error(&self, start: usize, end: Option, kind: ErrorKind) -> Error { + let span = Span::new(start, end.unwrap_or(start + 1)); + let line_info = Some(self.to_linecol(start)); + Error { + span, + kind, + line_info, + } } /// Converts a byte offset from an error message to a (line, column) pair @@ -930,59 +996,59 @@ impl<'a> Deserializer<'a> { } } -impl Error { - pub(crate) fn line_col(&self) -> Option<(usize, usize)> { - self.line.map(|line| (line, self.col)) - } +// impl Error { +// pub(crate) fn line_col(&self) -> Option<(usize, usize)> { +// self.line.map(|line| (line, self.col)) +// } - fn from_kind(at: Option, kind: ErrorKind) -> Self { - Error { - kind, - line: None, - col: 0, - at, - message: String::new(), - key: Vec::new(), - } - } +// fn from_kind(at: Option, kind: ErrorKind) -> Self { +// Error { +// kind, +// line: None, +// col: 0, +// at, +// message: String::new(), +// key: Vec::new(), +// } +// } - fn custom(at: Option, s: String) -> Self { - Error { - kind: ErrorKind::Custom, - line: None, - col: 0, - at, - message: s, - key: Vec::new(), - } - } +// fn custom(at: Option, s: String) -> Self { +// Error { +// kind: ErrorKind::Custom, +// line: None, +// col: 0, +// at, +// message: s, +// key: Vec::new(), +// } +// } - pub(crate) fn add_key_context(&mut self, key: &str) { - self.key.insert(0, key.to_string()); - } +// pub(crate) fn add_key_context(&mut self, key: &str) { +// self.key.insert(0, key.to_string()); +// } - fn fix_offset(&mut self, f: F) - where - F: FnOnce() -> Option, - { - // An existing offset is always better positioned than anything we might - // want to add later. - if self.at.is_none() { - self.at = f(); - } - } +// fn fix_offset(&mut self, f: F) +// where +// F: FnOnce() -> Option, +// { +// // An existing offset is always better positioned than anything we might +// // want to add later. +// if self.at.is_none() { +// self.at = f(); +// } +// } - fn fix_linecol(&mut self, f: F) - where - F: FnOnce(usize) -> (usize, usize), - { - if let Some(at) = self.at { - let (line, col) = f(at); - self.line = Some(line); - self.col = col; - } - } -} +// fn fix_linecol(&mut self, f: F) +// where +// F: FnOnce(usize) -> (usize, usize), +// { +// if let Some(at) = self.at { +// let (line, col) = f(at); +// self.line = Some(line); +// self.col = col; +// } +// } +// } impl std::convert::From for std::io::Error { fn from(e: Error) -> Self { @@ -993,6 +1059,7 @@ impl std::convert::From for std::io::Error { enum Line<'a> { Table { at: usize, + end: usize, header: Header<'a>, array: bool, }, @@ -1055,6 +1122,7 @@ enum E<'a> { } impl<'a> E<'a> { + #[allow(dead_code)] fn type_name(&self) -> &'static str { match *self { E::String(..) => "string", diff --git a/toml-file/src/de_helpers.rs b/toml-file/src/de_helpers.rs new file mode 100644 index 0000000..bc762f4 --- /dev/null +++ b/toml-file/src/de_helpers.rs @@ -0,0 +1,140 @@ +use crate::{ + value::{self, Table, Value, ValueInner}, + DeserError, Deserialize, Error, ErrorKind, +}; + +pub struct TableHelper<'de> { + table: value::Table<'de>, + errors: Vec, +} + +impl<'de> TableHelper<'de> { + pub fn new(value: &mut Value<'de>) -> Result { + let table = match value.take() { + ValueInner::Table(table) => table, + other => { + return Err(Error { + kind: ErrorKind::Wanted { + expected: "a table", + found: other.type_str(), + }, + span: value.span, + line_info: None, + } + .into()); + } + }; + + Ok(Self { + errors: Vec::new(), + table, + }) + } + + pub fn required(&mut self, name: &'static str) -> T { + let Some(mut val) = self.table.remove(&name.into()) else { + self.errors.push(Error { + kind: ErrorKind::MissingField(name), + span: Default::default(), + line_info: None, + }); + return T::default(); + }; + + match T::deserialize(&mut val) { + Ok(val) => val, + Err(mut err) => { + self.errors.append(&mut err.errors); + T::default() + } + } + } + + pub fn optional(&mut self, name: &'static str) -> Option { + let Some(mut val) = self.table.remove(&name.into()) else { + return None; + }; + + match T::deserialize(&mut val) { + Ok(val) => Some(val), + Err(mut err) => { + self.errors.append(&mut err.errors); + None + } + } + } + + pub fn finalize(mut self, fail_on_unknown_fields: bool) -> Result<(), DeserError> { + if fail_on_unknown_fields && !self.table.is_empty() { + let keys = self + .table + .into_keys() + .map(|key| (key.name.into(), key.span)) + .collect(); + + self.errors.push(Error { + span: Default::default(), + kind: ErrorKind::UnexpectedKeys { keys }, + line_info: None, + }) + } + + if self.errors.is_empty() { + Ok(()) + } else { + Err(DeserError { + errors: self.errors, + }) + } + } +} + +impl Deserialize for String { + fn deserialize<'de>(value: &mut Value<'de>) -> Result { + match value.take() { + ValueInner::String(s) => Ok(s.into()), + other => Err(Error { + kind: ErrorKind::Wanted { + expected: "a string", + found: other.type_str(), + }, + span: value.span, + line_info: None, + } + .into()), + } + } +} + +macro_rules! integer { + ($num:ty) => { + impl Deserialize for $num { + fn deserialize<'de>(value: &mut Value<'de>) -> Result { + match value.take() { + ValueInner::Integer(i) => { + let i = i.try_into().map_err(|_| { + DeserError::from(Error { + kind: ErrorKind::InvalidNumber, + span: value.span, + line_info: None, + }) + })?; + + Ok(i) + } + other => Err(Error { + kind: ErrorKind::Wanted { + expected: stringify!($num), + found: other.type_str(), + }, + span: value.span, + line_info: None, + } + .into()), + } + } + } + }; +} + +integer!(u32); diff --git a/toml-file/src/error.rs b/toml-file/src/error.rs new file mode 100644 index 0000000..06d5b1c --- /dev/null +++ b/toml-file/src/error.rs @@ -0,0 +1,294 @@ +use crate::Span; +use std::fmt::{self, Debug, Display}; + +/// Error that can occur when deserializing TOML. +#[derive(Debug)] +pub struct Error { + pub kind: ErrorKind, + pub span: Span, + pub line_info: Option<(usize, usize)>, +} + +impl std::error::Error for Error {} + +/// Errors that can occur when deserializing a type. +#[derive(Debug)] +pub enum ErrorKind { + /// EOF was reached when looking for a value. + UnexpectedEof, + + /// An invalid character not allowed in a string was found. + InvalidCharInString(char), + + /// An invalid character was found as an escape. + InvalidEscape(char), + + /// An invalid character was found in a hex escape. + InvalidHexEscape(char), + + /// An invalid escape value was specified in a hex escape in a string. + /// + /// Valid values are in the plane of unicode codepoints. + InvalidEscapeValue(u32), + + /// An unexpected character was encountered, typically when looking for a + /// value. + Unexpected(char), + + /// An unterminated string was found where EOF was found before the ending + /// EOF mark. + UnterminatedString, + + /// A newline was found in a table key. + NewlineInTableKey, + + /// A number failed to parse. + InvalidNumber, + + /// Wanted one sort of token, but found another. + Wanted { + /// Expected token type. + expected: &'static str, + /// Actually found token type. + found: &'static str, + }, + + /// A duplicate table definition was found. + DuplicateTable { name: String, first: Span }, + + /// Duplicate key in table. + DuplicateKey { + /// The duplicate key + key: String, + /// The span where the first key is located + first: Span, + }, + + /// A previously defined table was redefined as an array. + RedefineAsArray, + + /// Multiline strings are not allowed for key. + MultilineStringKey, + + /// A custom error which could be generated when deserializing a particular + /// type. + Custom(String), + + /// A tuple with a certain number of elements was expected but something + /// else was found. + ExpectedTuple(usize), + + /// Expected table keys to be in increasing tuple index order, but something + /// else was found. + ExpectedTupleIndex { + /// Expected index. + expected: usize, + /// Key that was specified. + found: String, + }, + + /// An empty table was expected but entries were found. + ExpectedEmptyTable, + + /// Dotted key attempted to extend something that is not a table. + DottedKeyInvalidType, + + /// An unexpected key was encountered. + /// + /// Used when deserializing a struct with a limited set of fields. + UnexpectedKeys { + /// The unexpected keys. + keys: Vec<(String, Span)>, + }, + + /// Unquoted string was found when quoted one was expected. + UnquotedString, + + /// A required + MissingField(&'static str), +} + +impl Display for ErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::UnexpectedEof => f.write_str("unexpected-eof"), + Self::Custom(..) => f.write_str("custom"), + Self::DottedKeyInvalidType => f.write_str("dotted-key-invalid-type"), + Self::DuplicateKey { .. } => f.write_str("duplicate-key"), + Self::DuplicateTable { .. } => f.write_str("duplicate-table"), + Self::ExpectedEmptyTable => f.write_str("expected-empty-table"), + Self::ExpectedTuple(..) => f.write_str("expected-tuple"), + Self::ExpectedTupleIndex { .. } => f.write_str("expected-tuple-index"), + Self::UnexpectedKeys { .. } => f.write_str("unexpected-keys"), + Self::UnquotedString => f.write_str("unquoted-string"), + Self::MultilineStringKey => f.write_str("multiline-string-key"), + Self::RedefineAsArray => f.write_str("redefine-as-array"), + Self::InvalidCharInString(..) => f.write_str("invalid-char-in-string"), + Self::InvalidEscape(..) => f.write_str("invalid-escape"), + Self::InvalidEscapeValue(..) => f.write_str("invalid-escape-value"), + Self::InvalidHexEscape(..) => f.write_str("invalid-hex-escape"), + //Self::NewlineInString => f.write_str("newline-in-string"), + Self::Unexpected(..) => f.write_str("unexpected"), + Self::UnterminatedString => f.write_str("unterminated-string"), + Self::NewlineInTableKey => f.write_str("newline-in-table-key"), + Self::InvalidNumber => f.write_str("invalid-number"), + Self::Wanted { .. } => f.write_str("wanted"), + Self::MissingField(..) => f.write_str("missing-field"), + } + } +} + +struct Escape(char); + +impl fmt::Display for Escape { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use std::fmt::Write as _; + + if self.0.is_whitespace() { + for esc in self.0.escape_default() { + f.write_char(esc)?; + } + Ok(()) + } else { + f.write_char(self.0) + } + } +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.kind { + ErrorKind::UnexpectedEof => f.write_str("unexpected eof encountered")?, + ErrorKind::InvalidCharInString(c) => { + write!(f, "invalid character in string: `{}`", Escape(*c))? + } + ErrorKind::InvalidEscape(c) => { + write!(f, "invalid escape character in string: `{}`", Escape(*c))? + } + ErrorKind::InvalidHexEscape(c) => write!( + f, + "invalid hex escape character in string: `{}`", + Escape(*c) + )?, + ErrorKind::InvalidEscapeValue(c) => write!(f, "invalid escape value: `{c}`")?, + ErrorKind::Unexpected(c) => write!(f, "unexpected character found: `{}`", Escape(*c))?, + ErrorKind::UnterminatedString => f.write_str("unterminated string")?, + ErrorKind::NewlineInTableKey => f.write_str("found newline in table key")?, + ErrorKind::Wanted { expected, found } => { + write!(f, "expected {expected}, found {found}")?; + } + ErrorKind::InvalidNumber => f.write_str("invalid number")?, + ErrorKind::DuplicateTable { name, .. } => { + write!(f, "redefinition of table `{name}`")?; + } + ErrorKind::DuplicateKey { key, .. } => { + write!(f, "duplicate key: `{key}`")?; + } + ErrorKind::RedefineAsArray => f.write_str("table redefined as array")?, + ErrorKind::MultilineStringKey => { + f.write_str("multiline strings are not allowed for key")? + } + ErrorKind::Custom(message) => f.write_str(message)?, + ErrorKind::ExpectedTuple(l) => write!(f, "expected table with length {l}")?, + ErrorKind::ExpectedTupleIndex { expected, found } => { + write!(f, "expected table key `{expected}`, but was `{found}`")? + } + ErrorKind::ExpectedEmptyTable => f.write_str("expected empty table")?, + ErrorKind::DottedKeyInvalidType => { + f.write_str("dotted key attempted to extend non-table type")?; + } + ErrorKind::UnexpectedKeys { keys } => { + write!(f, "unexpected keys in table: `{keys:?}`")? + } + ErrorKind::UnquotedString => { + f.write_str("invalid TOML value, did you mean to use a quoted string?")? + } + ErrorKind::MissingField(field) => f.write_str("missing field '{field}' in table")?, + } + + // if !self.key.is_empty() { + // write!(f, " for key `")?; + // for (i, k) in self.key.iter().enumerate() { + // if i > 0 { + // write!(f, ".")?; + // } + // write!(f, "{}", k)?; + // } + // write!(f, "`")?; + // } + + // if let Some(line) = self.line { + // write!(f, " at line {} column {}", line + 1, self.col + 1)?; + // } + + Ok(()) + } +} + +#[cfg(feature = "reporting")] +impl Error { + pub fn to_diagnostic( + &self, + fid: FileId, + ) -> codespan_reporting::diagnostic::Diagnostic { + let diag = + codespan_reporting::diagnostic::Diagnostic::error().with_code(self.kind.to_string()); + + use codespan_reporting::diagnostic::Label; + + let diag = match &self.kind { + ErrorKind::DuplicateKey { first, .. } => diag.with_labels(vec![ + Label::secondary(fid, *first).with_message("first key instance"), + Label::primary(fid, self.span).with_message("duplicate key"), + ]), + ErrorKind::Unexpected(c) => diag.with_labels(vec![Label::primary(fid, self.span) + .with_message(format!("unexpected character '{}'", Escape(*c)))]), + ErrorKind::InvalidCharInString(c) => { + diag.with_labels(vec![Label::primary(fid, self.span) + .with_message(format!("invalid character '{}' in string", Escape(*c)))]) + } + ErrorKind::InvalidEscape(c) => diag.with_labels(vec![Label::primary(fid, self.span) + .with_message(format!( + "invalid escape character '{}' in string", + Escape(*c) + ))]), + ErrorKind::InvalidEscapeValue(_) => diag + .with_labels(vec![ + Label::primary(fid, self.span).with_message("invalid escape value") + ]), + ErrorKind::InvalidNumber => diag.with_labels(vec![ + Label::primary(fid, self.span).with_message("unable to parse number") + ]), + ErrorKind::Wanted { expected, .. } => diag + .with_labels(vec![ + Label::primary(fid, self.span).with_message(format!("expected {expected}")) + ]), + ErrorKind::MultilineStringKey => diag.with_labels(vec![ + Label::primary(fid, self.span).with_message("multiline keys are not allowed") + ]), + ErrorKind::UnterminatedString => diag + .with_labels(vec![Label::primary(fid, self.span) + .with_message("eof reached before string terminator")]), + ErrorKind::DuplicateTable { first, .. } => diag.with_labels(vec![ + Label::secondary(fid, *first).with_message("first table instance"), + Label::primary(fid, self.span).with_message("duplicate table"), + ]), + ErrorKind::InvalidHexEscape(c) => diag + .with_labels(vec![Label::primary(fid, self.span) + .with_message(format!("invalid hex escape '{}'", Escape(*c)))]), + ErrorKind::UnquotedString => diag.with_labels(vec![ + Label::primary(fid, self.span).with_message("string is not quoted") + ]), + ErrorKind::UnexpectedKeys { keys } => diag + .with_message(format!("found {} unexpected keys", keys.len())) + .with_labels( + keys.iter() + .map(|(_name, span)| Label::secondary(fid, *span)) + .collect(), + ), + kind => unimplemented!("{kind}"), + }; + + diag + } +} diff --git a/toml-file/src/lib.rs b/toml-file/src/lib.rs new file mode 100644 index 0000000..d5eb351 --- /dev/null +++ b/toml-file/src/lib.rs @@ -0,0 +1,26 @@ +pub mod de; +pub mod de_helpers; +mod error; +pub mod span; +pub mod tokens; +pub mod value; + +pub use de::parse; +pub use error::{Error, ErrorKind}; +pub use span::Span; + +pub struct DeserError { + pub errors: Vec, +} + +impl From for DeserError { + fn from(value: Error) -> Self { + Self { + errors: vec![value], + } + } +} + +pub trait Deserialize: Sized { + fn deserialize<'de>(value: &mut value::Value<'de>) -> Result; +} diff --git a/toml-file/src/span.rs b/toml-file/src/span.rs new file mode 100644 index 0000000..cb00f37 --- /dev/null +++ b/toml-file/src/span.rs @@ -0,0 +1,148 @@ +#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + #[inline] + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.start == 0 && self.end == 0 + } +} + +impl From for (usize, usize) { + fn from(Span { start, end }: Span) -> (usize, usize) { + (start, end) + } +} + +impl From> for Span { + fn from(s: std::ops::Range) -> Self { + Self { + start: s.start, + end: s.end, + } + } +} + +impl From for std::ops::Range { + fn from(s: Span) -> Self { + Self { + start: s.start, + end: s.end, + } + } +} + +pub struct Spanned { + pub value: T, + pub span: Span, +} + +impl Spanned { + #[inline] + pub const fn new(value: T, span: Span) -> Self { + Self { value, span } + } + + /// Converts Self into its inner value + #[inline] + pub fn take(self) -> T { + self.value + } +} + +impl Default for Spanned +where + T: Default, +{ + fn default() -> Self { + Self { + value: Default::default(), + span: Span::default(), + } + } +} + +impl AsRef for Spanned { + fn as_ref(&self) -> &T { + &self.value + } +} + +impl std::fmt::Debug for Spanned +where + T: std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.value) + } +} + +impl Clone for Spanned +where + T: Clone, +{ + fn clone(&self) -> Self { + Self { + value: self.value.clone(), + span: self.span.clone(), + } + } +} + +impl PartialOrd for Spanned +where + T: PartialOrd, +{ + fn partial_cmp(&self, o: &Spanned) -> Option { + self.value.partial_cmp(&o.value) + } +} + +impl Ord for Spanned +where + T: Ord, +{ + fn cmp(&self, o: &Spanned) -> std::cmp::Ordering { + self.value.cmp(&o.value) + } +} + +impl PartialEq for Spanned +where + T: PartialEq, +{ + fn eq(&self, o: &Spanned) -> bool { + self.value == o.value + } +} + +impl Eq for Spanned where T: Eq {} + +impl PartialEq for Spanned +where + T: PartialEq, +{ + fn eq(&self, o: &T) -> bool { + &self.value == o + } +} + +impl crate::Deserialize for Spanned +where + T: crate::Deserialize, +{ + #[inline] + fn deserialize<'de>(value: &mut crate::value::Value<'de>) -> Result { + let span = value.span; + let value = T::deserialize(value)?; + Ok(Self { span, value }) + } +} diff --git a/src/tokens.rs b/toml-file/src/tokens.rs similarity index 98% rename from src/tokens.rs rename to toml-file/src/tokens.rs index 98a32fb..1a4b59c 100644 --- a/src/tokens.rs +++ b/toml-file/src/tokens.rs @@ -1,5 +1,4 @@ -use super::Key; -use crate::Span; +use crate::{value::Key, Span}; use std::{borrow::Cow, char, str}; #[derive(Eq, PartialEq, Debug)] @@ -31,12 +30,12 @@ pub enum Error { InvalidCharInString(usize, char), InvalidEscape(usize, char), InvalidHexEscape(usize, char), - InvalidEscapeValue(usize, u32), + InvalidEscapeValue(usize, usize, u32), NewlineInString(usize), Unexpected(usize, char), UnterminatedString(usize), NewlineInTableKey(usize), - MultilineStringKey(usize), + MultilineStringKey(usize, usize), Wanted { at: usize, expected: &'static str, @@ -173,7 +172,7 @@ impl<'a> Tokenizer<'a> { )) => { let offset = self.substr_offset(src); if multiline { - return Err(Error::MultilineStringKey(offset)); + return Err(Error::MultilineStringKey(offset, offset + val.len())); } match src.find('\n') { None => Ok(Key { span, name: val }), @@ -414,7 +413,7 @@ impl<'a> Tokenizer<'a> { let val = u32::from_str_radix(&buf, 16).unwrap(); match char::from_u32(val) { Some(ch) => Ok(ch), - None => Err(Error::InvalidEscapeValue(i, val)), + None => Err(Error::InvalidEscapeValue(i, len, val)), } } diff --git a/toml-file/src/value.rs b/toml-file/src/value.rs new file mode 100644 index 0000000..2c2d93c --- /dev/null +++ b/toml-file/src/value.rs @@ -0,0 +1,111 @@ +use crate::Span; +use std::{borrow::Cow, fmt}; + +#[cfg(feature = "serde")] +mod impl_serde; + +pub struct Value<'de> { + value: Option>, + pub span: Span, +} + +impl<'de> Value<'de> { + #[inline] + pub fn new(value: ValueInner<'de>) -> Self { + Self::with_span(value, Span::default()) + } + + #[inline] + pub fn with_span(value: ValueInner<'de>, span: Span) -> Self { + Self { + value: Some(value), + span, + } + } + + #[inline] + pub fn take(&mut self) -> ValueInner<'de> { + self.value.take().expect("the value has already been taken") + } +} + +impl<'de> AsRef> for Value<'de> { + fn as_ref(&self) -> &ValueInner<'de> { + self.value + .as_ref() + .expect("the value has already been taken") + } +} + +impl<'de> fmt::Debug for Value<'de> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.value) + } +} + +#[derive(Clone)] +pub struct Key<'de> { + pub name: Cow<'de, str>, + pub span: Span, +} + +impl<'de> From<&'de str> for Key<'de> { + fn from(k: &'de str) -> Self { + Self { + name: Cow::Borrowed(k), + span: Span::default(), + } + } +} + +impl<'de> fmt::Debug for Key<'de> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name) + } +} + +impl<'de> Ord for Key<'de> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.name.cmp(&other.name) + } +} + +impl<'de> PartialOrd for Key<'de> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'de> PartialEq for Key<'de> { + fn eq(&self, other: &Self) -> bool { + self.name.eq(&other.name) + } +} + +impl<'de> Eq for Key<'de> {} + +pub type Table<'de> = std::collections::BTreeMap, Value<'de>>; +pub type Array<'de> = Vec>; + +#[derive(Debug)] +pub enum ValueInner<'de> { + String(Cow<'de, str>), + Integer(i64), + Float(f64), + Boolean(bool), + Array(Array<'de>), + Table(Table<'de>), +} + +impl<'de> ValueInner<'de> { + pub fn type_str(&self) -> &'static str { + match self { + Self::String(..) => "string", + Self::Integer(..) => "integer", + Self::Float(..) => "float", + Self::Boolean(..) => "boolean", + Self::Array(..) => "array", + Self::Table(..) => "table", + } + } +} diff --git a/toml-file/src/value/impl_serde.rs b/toml-file/src/value/impl_serde.rs new file mode 100644 index 0000000..75e2a30 --- /dev/null +++ b/toml-file/src/value/impl_serde.rs @@ -0,0 +1,33 @@ +use crate::value::{Value, ValueInner}; +use serde::{ + self, + ser::{SerializeMap, SerializeSeq}, +}; + +impl<'de> serde::Serialize for Value<'de> { + fn serialize(&self, ser: S) -> Result + where + S: serde::Serializer, + { + match self.as_ref() { + ValueInner::String(s) => ser.serialize_str(&s), + ValueInner::Integer(i) => ser.serialize_i64(*i), + ValueInner::Float(f) => ser.serialize_f64(*f), + ValueInner::Boolean(b) => ser.serialize_bool(*b), + ValueInner::Array(arr) => { + let mut seq = ser.serialize_seq(Some(arr.len()))?; + for ele in arr { + seq.serialize_element(ele)?; + } + seq.end() + } + ValueInner::Table(tab) => { + let mut map = ser.serialize_map(Some(tab.len()))?; + for (k, v) in tab { + map.serialize_entry(&k.name, v)?; + } + map.end() + } + } + } +} From 65d8a47862edab2ca14ca0ba1b191090cdba7464 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 12 Feb 2024 10:15:18 +0100 Subject: [PATCH 03/16] Checkpoint --- integ-tests/data/basic_arrays.toml | 7 + integ-tests/data/flattened.toml | 7 + integ-tests/src/lib.rs | 71 ++++- integ-tests/tests/de.rs | 166 ++++++++++- .../tests/snapshots/de__basic_arrays.snap | 52 ++++ .../tests/snapshots/de__flattened.snap | 71 +++++ .../tests/snapshots/de__missing_required.snap | 7 + toml-file/src/de.rs | 24 +- toml-file/src/de_helpers.rs | 261 ++++++++++++++---- toml-file/src/error.rs | 71 ++++- toml-file/src/lib.rs | 19 +- toml-file/src/span.rs | 26 +- toml-file/src/tokens.rs | 20 +- toml-file/src/value.rs | 45 ++- toml-file/src/value/impl_serde.rs | 12 + 15 files changed, 755 insertions(+), 104 deletions(-) create mode 100644 integ-tests/data/basic_arrays.toml create mode 100644 integ-tests/data/flattened.toml create mode 100644 integ-tests/tests/snapshots/de__basic_arrays.snap create mode 100644 integ-tests/tests/snapshots/de__flattened.snap create mode 100644 integ-tests/tests/snapshots/de__missing_required.snap diff --git a/integ-tests/data/basic_arrays.toml b/integ-tests/data/basic_arrays.toml new file mode 100644 index 0000000..2fe5690 --- /dev/null +++ b/integ-tests/data/basic_arrays.toml @@ -0,0 +1,7 @@ +packages = [ + "first", + { name = "second" }, + { name = "third", version = "3.0.0" }, + { crate = "fourth:0.1" }, + "last", +] diff --git a/integ-tests/data/flattened.toml b/integ-tests/data/flattened.toml new file mode 100644 index 0000000..db7607a --- /dev/null +++ b/integ-tests/data/flattened.toml @@ -0,0 +1,7 @@ +flattened = [ + "simple", + { crate = "single:0.2.0" }, + { name = "split", version = ">=0.2" }, + { name = "split2" }, + { crate = "single2", reason = "additional data" }, +] diff --git a/integ-tests/src/lib.rs b/integ-tests/src/lib.rs index d511976..1ea0c29 100644 --- a/integ-tests/src/lib.rs +++ b/integ-tests/src/lib.rs @@ -42,12 +42,16 @@ macro_rules! valid_de { .expect(concat!("failed to load ", stringify!($name), ".toml")); let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); - match $kind::deserialize(&mut valid_toml) { + match <$kind>::deserialize(&mut valid_toml) { Ok(de) => { insta::assert_debug_snapshot!(de); } Err(err) => { - $crate::unexpected!($name, err.errors, toml_str); + $crate::unexpected!( + $name, + err.errors.into_iter().map(|d| d.to_diagnostic(())), + &toml_str + ); } } } @@ -62,7 +66,58 @@ macro_rules! valid_de { insta::assert_debug_snapshot!(de); } Err(err) => { - $crate::unexpected!($name, err.errors, $toml); + $crate::unexpected!( + $name, + err.errors.into_iter().map(|d| d.to_diagnostic(())), + $toml + ); + } + } + } + }; +} + +/// Loads a valid toml file, deserializes it to the specified type and asserts +/// the appropriate errors are produced +#[macro_export] +macro_rules! invalid_de { + ($name:ident, $kind:ty) => { + #[test] + fn $name() { + let toml_str = + std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) + .expect(concat!("failed to load ", stringify!($name), ".toml")); + let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); + + match <$kind>::deserialize(&mut valid_toml) { + Ok(de) => { + panic!("expected errors but deserialized '{de:#?}' successfully"); + } + Err(err) => { + $crate::error_snapshot!( + $name, + err.errors.into_iter().map(|d| d.to_diagnostic(())), + &toml_str + ); + } + } + } + }; + ($name:ident, $kind:ty, $toml:literal) => { + #[test] + fn $name() { + let mut valid_toml = toml_file::parse($toml).expect("failed to parse toml"); + + match <$kind>::deserialize(&mut valid_toml) { + Ok(de) => { + panic!("expected errors but deserialized '{de:#?}' successfully"); + } + Err(err) => { + $crate::error_snapshot!( + $name, + err.errors.into_iter().map(|d| d.to_diagnostic(())), + $toml + ); } } } @@ -71,11 +126,13 @@ macro_rules! valid_de { pub type File<'s> = codespan_reporting::files::SimpleFile<&'static str, &'s str>; -pub fn emit_error(f: &File, error: impl IntoIterator) -> String { +pub fn emit_error( + f: &File, + error: impl IntoIterator>, +) -> String { let mut output = codespan_reporting::term::termcolor::NoColor::new(Vec::new()); - for err in error { - let diag = err.to_diagnostic(()); + for diag in error { codespan_reporting::term::emit( &mut output, &codespan_reporting::term::Config::default(), @@ -116,7 +173,7 @@ macro_rules! invalid { #[test] fn $name() { let error = toml_file::parse($toml).unwrap_err(); - $crate::error_snapshot!($name, Some(error), $toml); + $crate::error_snapshot!($name, Some(error.to_diagnostic(())), $toml); } }; } diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs index c811317..531f95d 100644 --- a/integ-tests/tests/de.rs +++ b/integ-tests/tests/de.rs @@ -1,5 +1,12 @@ -use integ_tests::valid_de; -use toml_file::{de_helpers::*, value::Value, DeserError, Deserialize, Error}; +#![allow(dead_code)] + +use integ_tests::{invalid_de, valid_de}; +use toml_file::{ + de_helpers::*, + span::Spanned, + value::{Value, ValueInner}, + DeserError, Deserialize, Span, +}; #[derive(Debug)] struct Boop { @@ -7,17 +14,164 @@ struct Boop { os: Option, } -impl Deserialize for Boop { - fn deserialize<'de>(value: &mut Value<'de>) -> Result { +impl<'de> Deserialize<'de> for Boop { + fn deserialize(value: &mut Value<'de>) -> Result { let mut mh = TableHelper::new(value)?; - let s = mh.required("s"); + let s = mh.required("s")?; let os = mh.optional("os"); - mh.finalize(true)?; + mh.finalize(None)?; Ok(Self { s, os }) } } valid_de!(basic_table, Boop, "s = 'boop string'\nos = 20"); +invalid_de!(missing_required, Boop, "os = 20"); + +#[derive(Debug)] +struct Package { + name: String, + version: Option, + span: Span, +} + +impl<'de> Deserialize<'de> for Package { + fn deserialize(value: &mut Value<'de>) -> Result { + fn from_str(s: std::borrow::Cow<'_, str>) -> (String, Option) { + if let Some((name, version)) = s.split_once(':') { + (name.to_owned(), Some(version.to_owned())) + } else { + (s.into(), None) + } + } + + match value.take() { + ValueInner::String(s) => { + let (name, version) = from_str(s); + + Ok(Self { + name, + version, + span: value.span, + }) + } + ValueInner::Table(tab) => { + let mut th = TableHelper::from(tab); + + if let Some(mut val) = th.table.remove(&"crate".into()) { + let (name, version) = match val.take() { + ValueInner::String(s) => from_str(s), + found => { + th.errors + .push(expected("a package string", found, val.span)); + th.finalize(Some(value))?; + unreachable!(); + } + }; + + th.finalize(Some(value))?; + + Ok(Self { + name, + version, + span: val.span, + }) + } else { + let name = th.required_s("name")?; + let version = th.optional("version"); + + th.finalize(Some(value))?; + + Ok(Self { + name: name.value, + version, + span: name.span, + }) + } + } + other => Err(expected("a string or table", other, value.span).into()), + } + } +} + +#[derive(Debug)] +struct Array { + packages: Vec, +} + +impl<'de> Deserialize<'de> for Array { + fn deserialize(value: &mut Value<'de>) -> Result { + let mut th = TableHelper::new(value)?; + let packages = th.required("packages")?; + th.finalize(Some(value))?; + Ok(Self { packages }) + } +} + +valid_de!(basic_arrays, Array); + +#[derive(Debug)] +enum UntaggedPackage { + Simple { + spec: Package, + }, + Split { + name: Spanned, + version: Option, + }, +} + +#[derive(Debug)] +pub struct PackageSpecOrExtended { + spec: Package, + inner: Option, +} + +impl<'de, T> Deserialize<'de> for PackageSpecOrExtended +where + T: Deserialize<'de>, +{ + fn deserialize(value: &mut Value<'de>) -> Result { + let spec = Package::deserialize(value)?; + + let inner = if value.has_keys() { + Some(T::deserialize(value)?) + } else { + None + }; + + Ok(Self { spec, inner }) + } +} + +#[derive(Debug)] +struct Reason { + reason: String, +} + +impl<'de> Deserialize<'de> for Reason { + fn deserialize(value: &mut Value<'de>) -> Result { + let mut th = TableHelper::new(value)?; + let reason = th.required("reason")?; + th.finalize(None)?; + Ok(Self { reason }) + } +} + +#[derive(Debug)] +struct Flattened { + flattened: Vec>, +} + +impl<'de> Deserialize<'de> for Flattened { + fn deserialize(value: &mut Value<'de>) -> Result { + let mut th = TableHelper::new(value)?; + let flattened = th.required("flattened")?; + th.finalize(Some(value))?; + Ok(Self { flattened }) + } +} + +valid_de!(flattened, Flattened); diff --git a/integ-tests/tests/snapshots/de__basic_arrays.snap b/integ-tests/tests/snapshots/de__basic_arrays.snap new file mode 100644 index 0000000..11cd871 --- /dev/null +++ b/integ-tests/tests/snapshots/de__basic_arrays.snap @@ -0,0 +1,52 @@ +--- +source: integ-tests/tests/de.rs +expression: de +--- +Array { + packages: [ + Package { + name: "first", + version: None, + span: Span { + start: 17, + end: 24, + }, + }, + Package { + name: "second", + version: None, + span: Span { + start: 39, + end: 47, + }, + }, + Package { + name: "third", + version: Some( + "3.0.0", + ), + span: Span { + start: 64, + end: 71, + }, + }, + Package { + name: "fourth", + version: Some( + "0.1", + ), + span: Span { + start: 108, + end: 120, + }, + }, + Package { + name: "last", + version: None, + span: Span { + start: 128, + end: 134, + }, + }, + ], +} diff --git a/integ-tests/tests/snapshots/de__flattened.snap b/integ-tests/tests/snapshots/de__flattened.snap new file mode 100644 index 0000000..2f82602 --- /dev/null +++ b/integ-tests/tests/snapshots/de__flattened.snap @@ -0,0 +1,71 @@ +--- +source: integ-tests/tests/de.rs +expression: de +--- +Flattened { + flattened: [ + PackageSpecOrExtended { + spec: Package { + name: "simple", + version: None, + span: Span { + start: 18, + end: 26, + }, + }, + inner: None, + }, + PackageSpecOrExtended { + spec: Package { + name: "single", + version: Some( + "0.2.0", + ), + span: Span { + start: 42, + end: 56, + }, + }, + inner: None, + }, + PackageSpecOrExtended { + spec: Package { + name: "split", + version: Some( + ">=0.2", + ), + span: Span { + start: 73, + end: 80, + }, + }, + inner: None, + }, + PackageSpecOrExtended { + spec: Package { + name: "split2", + version: None, + span: Span { + start: 116, + end: 124, + }, + }, + inner: None, + }, + PackageSpecOrExtended { + spec: Package { + name: "single2", + version: None, + span: Span { + start: 142, + end: 151, + }, + }, + inner: Some( + Reason { + reason: "additional data", + }, + ), + }, + ], +} diff --git a/integ-tests/tests/snapshots/de__missing_required.snap b/integ-tests/tests/snapshots/de__missing_required.snap new file mode 100644 index 0000000..7d0a637 --- /dev/null +++ b/integ-tests/tests/snapshots/de__missing_required.snap @@ -0,0 +1,7 @@ +--- +source: integ-tests/tests/de.rs +expression: error +--- +error[missing-field]: missing field 's' + + diff --git a/toml-file/src/de.rs b/toml-file/src/de.rs index 26c4a5a..18fb818 100644 --- a/toml-file/src/de.rs +++ b/toml-file/src/de.rs @@ -67,14 +67,14 @@ impl<'de, 'b> Ctx<'de, 'b> { macro_rules! printc { ($c:expr, $($arg:tt)*) => {{ - let ctx = $c; - for _ in 0..ctx.depth { - eprint!(" "); - } + // let ctx = $c; + // for _ in 0..ctx.depth { + // eprint!(" "); + // } - eprint!("{}:{} {}:{} ", file!(), line!(), ctx.cur_parent, ctx.cur); + // eprint!("{}:{} {}:{} ", file!(), line!(), ctx.cur_parent, ctx.cur); - eprintln!($($arg)*); + // eprintln!($($arg)*); }}; } @@ -269,12 +269,12 @@ fn deserialize_array<'de, 'b>( tables: &'b mut [Table<'de>], arr: &mut Vec>, ) -> Result { - if let Some(values) = ctx.values.take() { - for (key, val) in values { - printc!(&ctx, "{} => {val:?}", key.name); - arr.push(to_value(val, ctx.de)?); - } - } + // if let Some(values) = ctx.values.take() { + // for (key, val) in values { + // //printc!(&ctx, "{} => {val:?}", key.name); + // arr.push(to_value(val, ctx.de)?); + // } + // } while ctx.cur_parent < ctx.max { let header_stripped = tables[ctx.cur_parent] diff --git a/toml-file/src/de_helpers.rs b/toml-file/src/de_helpers.rs index bc762f4..b1fc1c2 100644 --- a/toml-file/src/de_helpers.rs +++ b/toml-file/src/de_helpers.rs @@ -1,28 +1,58 @@ use crate::{ + span::Spanned, value::{self, Table, Value, ValueInner}, - DeserError, Deserialize, Error, ErrorKind, + DeserError, Deserialize, Error, ErrorKind, Span, }; +use std::{fmt::Display, str::FromStr}; + +#[inline] +pub fn expected(expected: &'static str, found: ValueInner<'_>, span: Span) -> Error { + Error { + kind: ErrorKind::Wanted { + expected, + found: found.type_str(), + }, + span, + line_info: None, + } +} + +#[inline] +pub fn parse(value: &mut Value<'_>) -> Result +where + T: FromStr, + E: Display, +{ + let s = value.take_string(None)?; + match s.parse() { + Ok(v) => Ok(v), + Err(err) => Err(Error { + kind: ErrorKind::Custom(format!("failed to parse string: {err}")), + span: value.span, + line_info: None, + }), + } +} pub struct TableHelper<'de> { - table: value::Table<'de>, - errors: Vec, + pub table: Table<'de>, + pub errors: Vec, +} + +impl<'de> From> for TableHelper<'de> { + fn from(table: Table<'de>) -> Self { + Self { + table, + errors: Vec::new(), + } + } } impl<'de> TableHelper<'de> { pub fn new(value: &mut Value<'de>) -> Result { let table = match value.take() { ValueInner::Table(table) => table, - other => { - return Err(Error { - kind: ErrorKind::Wanted { - expected: "a table", - found: other.type_str(), - }, - span: value.span, - line_info: None, - } - .into()); - } + other => return Err(expected("a table", other, value.span).into()), }; Ok(Self { @@ -31,32 +61,71 @@ impl<'de> TableHelper<'de> { }) } - pub fn required(&mut self, name: &'static str) -> T { + #[inline] + pub fn contains(&self, name: &'static str) -> bool { + self.table.contains_key(&name.into()) + } + + #[inline] + pub fn take(&mut self, name: &'static str) -> Option<(value::Key<'de>, Value<'de>)> { + self.table.remove_entry(&name.into()) + } + + #[inline] + pub fn required>(&mut self, name: &'static str) -> Result { + Ok(self.required_s(name)?.value) + } + + pub fn required_s>( + &mut self, + name: &'static str, + ) -> Result, Error> { let Some(mut val) = self.table.remove(&name.into()) else { - self.errors.push(Error { + let missing = Error { kind: ErrorKind::MissingField(name), span: Default::default(), line_info: None, - }); - return T::default(); + }; + self.errors.push(missing.clone()); + return Err(missing); + }; + + Spanned::::deserialize(&mut val).map_err(|mut errs| { + let err = errs.errors.last().unwrap().clone(); + self.errors.append(&mut errs.errors); + err + }) + } + + pub fn with_default>( + &mut self, + name: &'static str, + def: impl FnOnce() -> T, + ) -> (T, Span) { + let Some(mut val) = self.table.remove(&name.into()) else { + return (def(), Span::default()); }; match T::deserialize(&mut val) { - Ok(val) => val, + Ok(v) => (v, val.span), Err(mut err) => { self.errors.append(&mut err.errors); - T::default() + (def(), Span::default()) } } } - pub fn optional(&mut self, name: &'static str) -> Option { + pub fn optional>(&mut self, name: &'static str) -> Option { + self.optional_s(name).map(|v| v.value) + } + + pub fn optional_s>(&mut self, name: &'static str) -> Option> { let Some(mut val) = self.table.remove(&name.into()) else { return None; }; - match T::deserialize(&mut val) { - Ok(val) => Some(val), + match Spanned::::deserialize(&mut val) { + Ok(v) => Some(v), Err(mut err) => { self.errors.append(&mut err.errors); None @@ -64,8 +133,51 @@ impl<'de> TableHelper<'de> { } } - pub fn finalize(mut self, fail_on_unknown_fields: bool) -> Result<(), DeserError> { - if fail_on_unknown_fields && !self.table.is_empty() { + pub fn parse(&mut self, name: &'static str) -> T + where + T: FromStr + Default, + E: Display, + { + let Some(mut val) = self.table.remove(&name.into()) else { + self.errors.push(Error { + kind: ErrorKind::MissingField(name), + span: Default::default(), + line_info: None, + }); + return T::default(); + }; + + match parse(&mut val) { + Ok(v) => v, + Err(err) => { + self.errors.push(err); + T::default() + } + } + } + + pub fn parse_opt(&mut self, name: &'static str) -> Option + where + T: FromStr, + E: Display, + { + let Some(mut val) = self.table.remove(&name.into()) else { + return None; + }; + + match parse(&mut val) { + Ok(v) => Some(v), + Err(err) => { + self.errors.push(err); + None + } + } + } + + pub fn finalize(mut self, original: Option<&mut Value<'de>>) -> Result<(), DeserError> { + if let Some(original) = original { + original.set(ValueInner::Table(self.table)); + } else if !self.table.is_empty() { let keys = self .table .into_keys() @@ -89,27 +201,34 @@ impl<'de> TableHelper<'de> { } } -impl Deserialize for String { - fn deserialize<'de>(value: &mut Value<'de>) -> Result { +impl<'de> Deserialize<'de> for String { + fn deserialize(value: &mut Value<'de>) -> Result { + value + .take_string(None) + .map(|s| s.into()) + .map_err(DeserError::from) + } +} + +impl<'de> Deserialize<'de> for std::borrow::Cow<'de, str> { + fn deserialize(value: &mut Value<'de>) -> Result { + value.take_string(None).map_err(DeserError::from) + } +} + +impl<'de> Deserialize<'de> for bool { + fn deserialize(value: &mut Value<'de>) -> Result { match value.take() { - ValueInner::String(s) => Ok(s.into()), - other => Err(Error { - kind: ErrorKind::Wanted { - expected: "a string", - found: other.type_str(), - }, - span: value.span, - line_info: None, - } - .into()), + ValueInner::Boolean(b) => Ok(b), + other => Err(expected("a bool", other, value.span).into()), } } } macro_rules! integer { ($num:ty) => { - impl Deserialize for $num { - fn deserialize<'de>(value: &mut Value<'de>) -> Result { + impl<'de> Deserialize<'de> for $num { + fn deserialize(value: &mut Value<'de>) -> Result { match value.take() { ValueInner::Integer(i) => { let i = i.try_into().map_err(|_| { @@ -122,19 +241,65 @@ macro_rules! integer { Ok(i) } - other => Err(Error { - kind: ErrorKind::Wanted { - expected: stringify!($num), - found: other.type_str(), - }, - span: value.span, - line_info: None, - } - .into()), + other => Err(expected(stringify!($num), other, value.span).into()), } } } }; } +integer!(u8); +integer!(u16); integer!(u32); +integer!(u64); +integer!(i8); +integer!(i16); +integer!(i32); +integer!(i64); +integer!(usize); +integer!(isize); + +impl<'de> Deserialize<'de> for f32 { + fn deserialize(value: &mut Value<'de>) -> Result { + match value.take() { + ValueInner::Float(f) => Ok(f as f32), + other => Err(expected("a float", other, value.span).into()), + } + } +} + +impl<'de> Deserialize<'de> for f64 { + fn deserialize(value: &mut Value<'de>) -> Result { + match value.take() { + ValueInner::Float(f) => Ok(f), + other => Err(expected("a float", other, value.span).into()), + } + } +} + +impl<'de, T> Deserialize<'de> for Vec +where + T: Deserialize<'de>, +{ + fn deserialize(value: &mut value::Value<'de>) -> Result { + match value.take() { + ValueInner::Array(arr) => { + let mut errors = Vec::new(); + let mut s = Vec::new(); + for mut v in arr { + match T::deserialize(&mut v) { + Ok(v) => s.push(v), + Err(mut err) => errors.append(&mut err.errors), + } + } + + if errors.is_empty() { + Ok(s) + } else { + Err(DeserError { errors }) + } + } + other => Err(expected("an array", other, value.span).into()), + } + } +} diff --git a/toml-file/src/error.rs b/toml-file/src/error.rs index 06d5b1c..cfb22f5 100644 --- a/toml-file/src/error.rs +++ b/toml-file/src/error.rs @@ -2,7 +2,7 @@ use crate::Span; use std::fmt::{self, Debug, Display}; /// Error that can occur when deserializing TOML. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Error { pub kind: ErrorKind, pub span: Span, @@ -11,8 +11,18 @@ pub struct Error { impl std::error::Error for Error {} +impl From<(ErrorKind, Span)> for Error { + fn from((kind, span): (ErrorKind, Span)) -> Self { + Self { + kind, + span, + line_info: None, + } + } +} + /// Errors that can occur when deserializing a type. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum ErrorKind { /// EOF was reached when looking for a value. UnexpectedEof, @@ -104,8 +114,17 @@ pub enum ErrorKind { /// Unquoted string was found when quoted one was expected. UnquotedString, - /// A required + /// A required field is missing from a table MissingField(&'static str), + + /// A field in the table is deprecated and the new key should be used instead + Deprecated { + old: &'static str, + new: &'static str, + }, + + /// An unexpected value was encountered + UnexpectedValue { expected: &'static [&'static str] }, } impl Display for ErrorKind { @@ -134,6 +153,8 @@ impl Display for ErrorKind { Self::InvalidNumber => f.write_str("invalid-number"), Self::Wanted { .. } => f.write_str("wanted"), Self::MissingField(..) => f.write_str("missing-field"), + Self::Deprecated { .. } => f.write_str("deprecated"), + Self::UnexpectedValue { .. } => f.write_str("unexpected-value"), } } } @@ -203,7 +224,11 @@ impl Display for Error { ErrorKind::UnquotedString => { f.write_str("invalid TOML value, did you mean to use a quoted string?")? } - ErrorKind::MissingField(field) => f.write_str("missing field '{field}' in table")?, + ErrorKind::MissingField(field) => write!(f, "missing field '{field}' in table")?, + ErrorKind::Deprecated { old, new } => { + write!(f, "field '{old}' is deprecated, '{new}' has replaced it")? + } + ErrorKind::UnexpectedValue { expected } => write!(f, "expected '{expected:?}'")?, } // if !self.key.is_empty() { @@ -286,9 +311,47 @@ impl Error { .map(|(_name, span)| Label::secondary(fid, *span)) .collect(), ), + ErrorKind::MissingField(field) => diag.with_message(format!("missing field '{field}'")), + ErrorKind::Deprecated { new, .. } => diag + .with_message(format!( + "deprecated field enountered, '{new}' should be used instead" + )) + .with_labels(vec![ + Label::primary(fid, self.span).with_message("deprecated field") + ]), + ErrorKind::UnexpectedValue { expected } => diag + .with_message(format!("expected '{expected:?}'")) + .with_labels(vec![ + Label::primary(fid, self.span).with_message("unexpected value") + ]), kind => unimplemented!("{kind}"), }; diag } } + +#[derive(Debug)] +pub struct DeserError { + pub errors: Vec, +} + +impl std::error::Error for DeserError {} + +impl From for DeserError { + fn from(value: Error) -> Self { + Self { + errors: vec![value], + } + } +} + +impl fmt::Display for DeserError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for err in &self.errors { + writeln!(f, "{err}")?; + } + + Ok(()) + } +} diff --git a/toml-file/src/lib.rs b/toml-file/src/lib.rs index d5eb351..9760345 100644 --- a/toml-file/src/lib.rs +++ b/toml-file/src/lib.rs @@ -6,21 +6,12 @@ pub mod tokens; pub mod value; pub use de::parse; -pub use error::{Error, ErrorKind}; +pub use error::{DeserError, Error, ErrorKind}; pub use span::Span; -pub struct DeserError { - pub errors: Vec, +pub trait Deserialize<'de>: Sized { + fn deserialize(value: &mut value::Value<'de>) -> Result; } -impl From for DeserError { - fn from(value: Error) -> Self { - Self { - errors: vec![value], - } - } -} - -pub trait Deserialize: Sized { - fn deserialize<'de>(value: &mut value::Value<'de>) -> Result; -} +pub trait DeserializeOwned: for<'de> Deserialize<'de> {} +impl DeserializeOwned for T where T: for<'de> Deserialize<'de> {} diff --git a/toml-file/src/span.rs b/toml-file/src/span.rs index cb00f37..4994900 100644 --- a/toml-file/src/span.rs +++ b/toml-file/src/span.rs @@ -47,7 +47,15 @@ pub struct Spanned { impl Spanned { #[inline] - pub const fn new(value: T, span: Span) -> Self { + pub const fn new(value: T) -> Self { + Self { + value, + span: Span { start: 0, end: 0 }, + } + } + + #[inline] + pub const fn with_span(value: T, span: Span) -> Self { Self { value, span } } @@ -56,6 +64,16 @@ impl Spanned { pub fn take(self) -> T { self.value } + + pub fn map(self) -> Spanned + where + V: From, + { + Spanned { + value: self.value.into(), + span: self.span, + } + } } impl Default for Spanned @@ -135,12 +153,12 @@ where } } -impl crate::Deserialize for Spanned +impl<'de, T> crate::Deserialize<'de> for Spanned where - T: crate::Deserialize, + T: crate::Deserialize<'de>, { #[inline] - fn deserialize<'de>(value: &mut crate::value::Value<'de>) -> Result { + fn deserialize(value: &mut crate::value::Value<'de>) -> Result { let span = value.span; let value = T::deserialize(value)?; Ok(Self { span, value }) diff --git a/toml-file/src/tokens.rs b/toml-file/src/tokens.rs index 1a4b59c..02e9091 100644 --- a/toml-file/src/tokens.rs +++ b/toml-file/src/tokens.rs @@ -360,8 +360,12 @@ impl<'a> Tokenizer<'a> { Some((_, 'r')) => val.push('\r'), Some((_, 't')) => val.push('\t'), Some((i, c @ ('u' | 'U'))) => { - let len = if c == 'u' { 4 } else { 8 }; - val.push(me.hex(start, i, len)?); + let c = if c == 'u' { + me.hex::<4>(start, i) + } else { + me.hex::<8>(start, i) + }; + val.push(c?); } Some((i, c @ (' ' | '\t' | '\n'))) if multi => { if c != '\n' { @@ -401,19 +405,19 @@ impl<'a> Tokenizer<'a> { }) } - fn hex(&mut self, start: usize, i: usize, len: usize) -> Result { - let mut buf = String::with_capacity(len); - for _ in 0..len { + fn hex(&mut self, start: usize, i: usize) -> Result { + let mut buf = [0; N]; + for i in 0..N { match self.one() { - Some((_, ch)) if ch as u32 <= 0x7F && ch.is_ascii_hexdigit() => buf.push(ch), + Some((_, ch)) if ch as u32 <= 0x7F && ch.is_ascii_hexdigit() => buf[i] = ch as u8, Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)), None => return Err(Error::UnterminatedString(start)), } } - let val = u32::from_str_radix(&buf, 16).unwrap(); + let val = u32::from_str_radix(std::str::from_utf8(&buf).unwrap(), 16).unwrap(); match char::from_u32(val) { Some(ch) => Ok(ch), - None => Err(Error::InvalidEscapeValue(i, len, val)), + None => Err(Error::InvalidEscapeValue(i, N, val)), } } diff --git a/toml-file/src/value.rs b/toml-file/src/value.rs index 2c2d93c..84052ad 100644 --- a/toml-file/src/value.rs +++ b/toml-file/src/value.rs @@ -1,4 +1,4 @@ -use crate::Span; +use crate::{Error, ErrorKind, Span}; use std::{borrow::Cow, fmt}; #[cfg(feature = "serde")] @@ -27,6 +27,49 @@ impl<'de> Value<'de> { pub fn take(&mut self) -> ValueInner<'de> { self.value.take().expect("the value has already been taken") } + + #[inline] + pub fn set(&mut self, value: ValueInner<'de>) { + self.value = Some(value); + } + + /// Returns true if the value is a table and is non-empty + #[inline] + pub fn has_keys(&self) -> bool { + self.value.as_ref().map_or(false, |val| { + if let ValueInner::Table(table) = val { + !table.is_empty() + } else { + false + } + }) + } + + #[inline] + pub fn has_key(&self, key: &str) -> bool { + self.value.as_ref().map_or(false, |val| { + if let ValueInner::Table(table) = val { + table.contains_key(&key.into()) + } else { + false + } + }) + } + + #[inline] + pub fn take_string(&mut self, msg: Option<&'static str>) -> Result, Error> { + match self.take() { + ValueInner::String(s) => Ok(s), + other => Err(Error { + kind: ErrorKind::Wanted { + expected: msg.unwrap_or("a string"), + found: other.type_str(), + }, + span: self.span, + line_info: None, + }), + } + } } impl<'de> AsRef> for Value<'de> { diff --git a/toml-file/src/value/impl_serde.rs b/toml-file/src/value/impl_serde.rs index 75e2a30..a682b45 100644 --- a/toml-file/src/value/impl_serde.rs +++ b/toml-file/src/value/impl_serde.rs @@ -31,3 +31,15 @@ impl<'de> serde::Serialize for Value<'de> { } } } + +impl serde::Serialize for crate::span::Spanned +where + T: serde::Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.value.serialize(serializer) + } +} From efeb080e2c6dfbe8ee4d6772d0f04d4505b6889f Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 12 Feb 2024 11:23:47 +0100 Subject: [PATCH 04/16] Print diagnostics with spans for valid tests --- integ-tests/src/lib.rs | 78 +++++++++++++++++++++++++++++++------ integ-tests/tests/de.rs | 16 ++------ integ-tests/tests/tokens.rs | 24 +++++++----- 3 files changed, 84 insertions(+), 34 deletions(-) diff --git a/integ-tests/src/lib.rs b/integ-tests/src/lib.rs index 1ea0c29..29166c3 100644 --- a/integ-tests/src/lib.rs +++ b/integ-tests/src/lib.rs @@ -4,11 +4,12 @@ macro_rules! valid { ($name:ident) => { #[test] fn $name() { - let toml_str = - std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) - .expect(concat!("failed to load ", stringify!($name), ".toml")); + let toml_str = std::fs::read_to_string(concat!("data/", stringify!($name), ".toml")) + .expect(concat!("failed to load ", stringify!($name), ".toml")); let valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); insta::assert_json_snapshot!(valid_toml); + + $crate::emit_spans!($name, valid_toml, &toml_str); } }; ($name:ident, $toml:literal) => { @@ -16,6 +17,8 @@ macro_rules! valid { fn $name() { let valid_toml = toml_file::parse($toml).expect("failed to parse toml"); insta::assert_json_snapshot!(valid_toml); + + $crate::emit_spans!($name, valid_toml, $toml); } }; } @@ -24,7 +27,7 @@ macro_rules! valid { macro_rules! unexpected { ($name:ident, $err:expr, $toml:expr) => {{ let file = $crate::File::new(stringify!($name), $toml); - let error = $crate::emit_error(&file, $err); + let error = $crate::emit_diags(&file, $err); panic!("unexpected toml deserialization errors:\n{error}"); }}; @@ -37,9 +40,8 @@ macro_rules! valid_de { ($name:ident, $kind:ty) => { #[test] fn $name() { - let toml_str = - std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) - .expect(concat!("failed to load ", stringify!($name), ".toml")); + let toml_str = std::fs::read_to_string(concat!("data/", stringify!($name), ".toml")) + .expect(concat!("failed to load ", stringify!($name), ".toml")); let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); match <$kind>::deserialize(&mut valid_toml) { @@ -84,9 +86,8 @@ macro_rules! invalid_de { ($name:ident, $kind:ty) => { #[test] fn $name() { - let toml_str = - std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) - .expect(concat!("failed to load ", stringify!($name), ".toml")); + let toml_str = std::fs::read_to_string(concat!("data/", stringify!($name), ".toml")) + .expect(concat!("failed to load ", stringify!($name), ".toml")); let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); match <$kind>::deserialize(&mut valid_toml) { @@ -126,7 +127,7 @@ macro_rules! invalid_de { pub type File<'s> = codespan_reporting::files::SimpleFile<&'static str, &'s str>; -pub fn emit_error( +pub fn emit_diags( f: &File, error: impl IntoIterator>, ) -> String { @@ -151,11 +152,64 @@ pub fn emit_error( macro_rules! error_snapshot { ($name:ident, $err:expr, $toml:expr) => { let file = $crate::File::new(stringify!($name), $toml); - let error = $crate::emit_error(&file, $err); + let error = $crate::emit_diags(&file, $err); insta::assert_snapshot!(error); }; } +use codespan_reporting::diagnostic::Diagnostic; + +pub fn collect_spans( + key: &str, + val: &toml_file::value::Value<'_>, + diags: &mut Vec>, +) { + use codespan_reporting::diagnostic::Label; + use toml_file::value::ValueInner; + + let code = match val.as_ref() { + ValueInner::String(_s) => "string", + ValueInner::Integer(_s) => "integer", + ValueInner::Float(_s) => "float", + ValueInner::Boolean(_s) => "bool", + ValueInner::Array(arr) => { + for (i, v) in arr.iter().enumerate() { + collect_spans(&format!("{key}_{i}"), v, diags); + } + + "array" + } + ValueInner::Table(tab) => { + for (k, v) in tab { + collect_spans(&format!("{key}_{}", k.name), v, diags); + } + + "table" + } + }; + + diags.push( + Diagnostic::note() + .with_code(code) + .with_message(key) + .with_labels(vec![Label::primary((), val.span)]), + ); +} + +#[macro_export] +macro_rules! emit_spans { + ($name:ident, $val:expr, $toml:expr) => { + let file = $crate::File::new(stringify!($name), $toml); + + let mut spans = Vec::new(); + + $crate::collect_spans("root", &$val, &mut spans); + + let spans = $crate::emit_diags(&file, spans); + insta::assert_snapshot!(spans); + }; +} + /// Loads an invalid toml file and does a snapshot assertion on the error #[macro_export] macro_rules! invalid { diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs index 531f95d..8ab4659 100644 --- a/integ-tests/tests/de.rs +++ b/integ-tests/tests/de.rs @@ -5,7 +5,7 @@ use toml_file::{ de_helpers::*, span::Spanned, value::{Value, ValueInner}, - DeserError, Deserialize, Span, + DeserError, Deserialize, }; #[derive(Debug)] @@ -34,7 +34,6 @@ invalid_de!(missing_required, Boop, "os = 20"); struct Package { name: String, version: Option, - span: Span, } impl<'de> Deserialize<'de> for Package { @@ -51,11 +50,7 @@ impl<'de> Deserialize<'de> for Package { ValueInner::String(s) => { let (name, version) = from_str(s); - Ok(Self { - name, - version, - span: value.span, - }) + Ok(Self { name, version }) } ValueInner::Table(tab) => { let mut th = TableHelper::from(tab); @@ -73,11 +68,7 @@ impl<'de> Deserialize<'de> for Package { th.finalize(Some(value))?; - Ok(Self { - name, - version, - span: val.span, - }) + Ok(Self { name, version }) } else { let name = th.required_s("name")?; let version = th.optional("version"); @@ -87,7 +78,6 @@ impl<'de> Deserialize<'de> for Package { Ok(Self { name: name.value, version, - span: name.span, }) } } diff --git a/integ-tests/tests/tokens.rs b/integ-tests/tests/tokens.rs index 79e9287..8411aa7 100644 --- a/integ-tests/tests/tokens.rs +++ b/integ-tests/tests/tokens.rs @@ -11,17 +11,23 @@ fn err(input: &str, err: Error) { #[test] fn strings() { - fn t(input: &str, val: &str, multiline: bool) { + fn t(input: &str, eval: &str, emultiline: bool) { let mut t = Tokenizer::new(input); let (_, token) = t.next().unwrap().unwrap(); - assert_eq!( - token, - Token::String { - src: input, - val: Cow::Borrowed(val), - multiline, - } - ); + + if let Token::String { + src, + val, + multiline, + .. + } = token + { + assert_eq!(src, input); + assert_eq!(val, Cow::Borrowed(eval)); + assert_eq!(multiline, emultiline); + } else { + panic!("not a string"); + } assert!(t.next().unwrap().is_none()); } From 7eedba66644032edb961681ce4e1cf1f40f15891 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 12 Feb 2024 11:23:55 +0100 Subject: [PATCH 05/16] Fix string spans --- .../tests/snapshots/de__basic_arrays.snap | 20 -- .../tests/snapshots/de__flattened.snap | 20 -- .../snapshots/parser__blank_literal-2.snap | 17 ++ .../tests/snapshots/parser__booleans-2.snap | 25 ++ .../tests/snapshots/parser__crlf-2.snap | 82 +++++ .../snapshots/parser__empty_string-2.snap | 17 ++ .../snapshots/parser__empty_table-2.snap | 19 ++ .../tests/snapshots/parser__fruit-2.snap | 113 +++++++ .../snapshots/parser__inline_tables-2.snap | 143 +++++++++ .../tests/snapshots/parser__key_names-2.snap | 101 ++++++ .../snapshots/parser__key_no_space-2.snap | 17 ++ .../parser__literal_eats_crlf-2.snap | 34 +++ .../tests/snapshots/parser__many_blank-2.snap | 22 ++ .../tests/snapshots/parser__strings-2.snap | 289 ++++++++++++++++++ .../snapshots/parser__table_names-2.snap | 59 ++++ .../snapshots/parser__tables_in_arrays-2.snap | 47 +++ .../snapshots/parser__underscores-2.snap | 46 +++ .../snapshots/valid__arrays__empty-2.snap | 41 +++ .../valid__arrays__ints_and_arrays-2.snap | 35 +++ .../snapshots/valid__arrays__nested-2.snap | 41 +++ .../snapshots/valid__arrays__no_spaces-2.snap | 35 +++ .../tests/snapshots/valid__arrays__one-2.snap | 37 +++ .../valid__arrays__strings_and_ints-2.snap | 29 ++ .../tests/snapshots/valid__comments-2.snap | 54 ++++ .../tests/snapshots/valid__evil-2.snap | 111 +++++++ .../snapshots/valid__numbers__integers-2.snap | 89 ++++++ .../valid__tables__array_many-2.snap | 77 +++++ ...tables__implicit_and_explicit_after-2.snap | 47 +++ ...ables__implicit_and_explicit_before-2.snap | 47 +++ .../valid__tables__implicit_array-2.snap | 36 +++ .../valid__tables__implicit_groups-2.snap | 36 +++ .../valid__tables__nested_arrays-2.snap | 107 +++++++ .../snapshots/valid__tables__sub_empty-2.snap | 24 ++ toml-file/src/tokens.rs | 71 +++-- 34 files changed, 1923 insertions(+), 65 deletions(-) create mode 100644 integ-tests/tests/snapshots/parser__blank_literal-2.snap create mode 100644 integ-tests/tests/snapshots/parser__booleans-2.snap create mode 100644 integ-tests/tests/snapshots/parser__crlf-2.snap create mode 100644 integ-tests/tests/snapshots/parser__empty_string-2.snap create mode 100644 integ-tests/tests/snapshots/parser__empty_table-2.snap create mode 100644 integ-tests/tests/snapshots/parser__fruit-2.snap create mode 100644 integ-tests/tests/snapshots/parser__inline_tables-2.snap create mode 100644 integ-tests/tests/snapshots/parser__key_names-2.snap create mode 100644 integ-tests/tests/snapshots/parser__key_no_space-2.snap create mode 100644 integ-tests/tests/snapshots/parser__literal_eats_crlf-2.snap create mode 100644 integ-tests/tests/snapshots/parser__many_blank-2.snap create mode 100644 integ-tests/tests/snapshots/parser__strings-2.snap create mode 100644 integ-tests/tests/snapshots/parser__table_names-2.snap create mode 100644 integ-tests/tests/snapshots/parser__tables_in_arrays-2.snap create mode 100644 integ-tests/tests/snapshots/parser__underscores-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__empty-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__ints_and_arrays-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__nested-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__no_spaces-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__one-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__strings_and_ints-2.snap create mode 100644 integ-tests/tests/snapshots/valid__comments-2.snap create mode 100644 integ-tests/tests/snapshots/valid__evil-2.snap create mode 100644 integ-tests/tests/snapshots/valid__numbers__integers-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__array_many-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_array-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__implicit_groups-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__nested_arrays-2.snap create mode 100644 integ-tests/tests/snapshots/valid__tables__sub_empty-2.snap diff --git a/integ-tests/tests/snapshots/de__basic_arrays.snap b/integ-tests/tests/snapshots/de__basic_arrays.snap index 11cd871..2996079 100644 --- a/integ-tests/tests/snapshots/de__basic_arrays.snap +++ b/integ-tests/tests/snapshots/de__basic_arrays.snap @@ -7,46 +7,26 @@ Array { Package { name: "first", version: None, - span: Span { - start: 17, - end: 24, - }, }, Package { name: "second", version: None, - span: Span { - start: 39, - end: 47, - }, }, Package { name: "third", version: Some( "3.0.0", ), - span: Span { - start: 64, - end: 71, - }, }, Package { name: "fourth", version: Some( "0.1", ), - span: Span { - start: 108, - end: 120, - }, }, Package { name: "last", version: None, - span: Span { - start: 128, - end: 134, - }, }, ], } diff --git a/integ-tests/tests/snapshots/de__flattened.snap b/integ-tests/tests/snapshots/de__flattened.snap index 2f82602..4893745 100644 --- a/integ-tests/tests/snapshots/de__flattened.snap +++ b/integ-tests/tests/snapshots/de__flattened.snap @@ -8,10 +8,6 @@ Flattened { spec: Package { name: "simple", version: None, - span: Span { - start: 18, - end: 26, - }, }, inner: None, }, @@ -21,10 +17,6 @@ Flattened { version: Some( "0.2.0", ), - span: Span { - start: 42, - end: 56, - }, }, inner: None, }, @@ -34,10 +26,6 @@ Flattened { version: Some( ">=0.2", ), - span: Span { - start: 73, - end: 80, - }, }, inner: None, }, @@ -45,10 +33,6 @@ Flattened { spec: Package { name: "split2", version: None, - span: Span { - start: 116, - end: 124, - }, }, inner: None, }, @@ -56,10 +40,6 @@ Flattened { spec: Package { name: "single2", version: None, - span: Span { - start: 142, - end: 151, - }, }, inner: Some( Reason { diff --git a/integ-tests/tests/snapshots/parser__blank_literal-2.snap b/integ-tests/tests/snapshots/parser__blank_literal-2.snap new file mode 100644 index 0000000..9bab284 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__blank_literal-2.snap @@ -0,0 +1,17 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_foo + ┌─ blank_literal:1:7 + │ +1 │ foo = '' + │ ^ + +note[table]: root + ┌─ blank_literal:1:1 + │ +1 │ foo = '' + │ ^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/parser__booleans-2.snap b/integ-tests/tests/snapshots/parser__booleans-2.snap new file mode 100644 index 0000000..03338e0 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__booleans-2.snap @@ -0,0 +1,25 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[bool]: root_false + ┌─ booleans:2:11 + │ +2 │ "false" = false + │ ^^^^^ + +note[bool]: root_true + ┌─ booleans:1:10 + │ +1 │ "true" = true + │ ^^^^ + +note[table]: root + ┌─ booleans:1:1 + │ +1 │ ╭ "true" = true +2 │ │ "false" = false +3 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__crlf-2.snap b/integ-tests/tests/snapshots/parser__crlf-2.snap new file mode 100644 index 0000000..80b4e23 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__crlf-2.snap @@ -0,0 +1,82 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_lib_0_description + ┌─ crlf:11:18 + │ +11 │ description = """A Rust implementation of a TAR file reader and writer. This library does not + │ ╭──────────────────^ +12 │ │ currently handle compression, but it is abstract over all I/O readers and +13 │ │ writers. Additionally, great lengths are taken to ensure that the entire +14 │ │ contents are never required to be entirely resident in memory all at once. +15 │ │ """ + │ ╰^ + +note[string]: root_lib_0_name + ┌─ crlf:10:9 + │ +10 │ name = "splay" + │ ^^^^^ + +note[string]: root_lib_0_path + ┌─ crlf:9:9 + │ +9 │ path = "lib.rs" + │ ^^^^^^ + +note[table]: root_lib_0 + ┌─ crlf:1:1 + │ +1 │ [project] + │ ^ + +note[array]: root_lib + ┌─ crlf:1:1 + │ +1 │ [project] + │ ^ + +note[string]: root_project_authors_0 + ┌─ crlf:5:13 + │ +5 │ authors = ["alex@crichton.co"] + │ ^^^^^^^^^^^^^^^^ + +note[array]: root_project_authors + ┌─ crlf:5:11 + │ +5 │ authors = ["alex@crichton.co"] + │ ^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_project_name + ┌─ crlf:3:9 + │ +3 │ name = "splay" + │ ^^^^^ + +note[string]: root_project_version + ┌─ crlf:4:12 + │ +4 │ version = "0.1.0" + │ ^^^^^ + +note[table]: root_project + ┌─ crlf:1:1 + │ +1 │ [project] + │ ^ + +note[table]: root + ┌─ crlf:1:1 + │ + 1 │ ╭ [project] + 2 │ │ + 3 │ │ name = "splay" + 4 │ │ version = "0.1.0" + · │ +14 │ │ contents are never required to be entirely resident in memory all at once. +15 │ │ """ + │ ╰───^ + + diff --git a/integ-tests/tests/snapshots/parser__empty_string-2.snap b/integ-tests/tests/snapshots/parser__empty_string-2.snap new file mode 100644 index 0000000..478860f --- /dev/null +++ b/integ-tests/tests/snapshots/parser__empty_string-2.snap @@ -0,0 +1,17 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_foo + ┌─ empty_string:1:7 + │ +1 │ foo = "" + │ ^ + +note[table]: root + ┌─ empty_string:1:1 + │ +1 │ foo = "" + │ ^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/parser__empty_table-2.snap b/integ-tests/tests/snapshots/parser__empty_table-2.snap new file mode 100644 index 0000000..5904ae0 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__empty_table-2.snap @@ -0,0 +1,19 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[table]: root_foo + ┌─ empty_table:1:1 + │ +1 │ + │ ^ + +note[table]: root + ┌─ empty_table:1:1 + │ +1 │ ╭ +2 │ │ [foo] +3 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__fruit-2.snap b/integ-tests/tests/snapshots/parser__fruit-2.snap new file mode 100644 index 0000000..cfb0f99 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__fruit-2.snap @@ -0,0 +1,113 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_fruit_0_name + ┌─ fruit:2:9 + │ +2 │ name = "apple" + │ ^^^^^ + +note[string]: root_fruit_0_physical_color + ┌─ fruit:5:10 + │ +5 │ color = "red" + │ ^^^ + +note[string]: root_fruit_0_physical_shape + ┌─ fruit:6:10 + │ +6 │ shape = "round" + │ ^^^^^ + +note[table]: root_fruit_0_physical + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[string]: root_fruit_0_variety_0_name + ┌─ fruit:9:9 + │ +9 │ name = "red delicious" + │ ^^^^^^^^^^^^^ + +note[table]: root_fruit_0_variety_0 + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[string]: root_fruit_0_variety_1_name + ┌─ fruit:12:9 + │ +12 │ name = "granny smith" + │ ^^^^^^^^^^^^ + +note[table]: root_fruit_0_variety_1 + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[array]: root_fruit_0_variety + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[table]: root_fruit_0 + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[string]: root_fruit_1_name + ┌─ fruit:15:9 + │ +15 │ name = "banana" + │ ^^^^^^ + +note[string]: root_fruit_1_variety_0_name + ┌─ fruit:18:9 + │ +18 │ name = "plantain" + │ ^^^^^^^^ + +note[table]: root_fruit_1_variety_0 + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[array]: root_fruit_1_variety + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[table]: root_fruit_1 + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[array]: root_fruit + ┌─ fruit:1:1 + │ +1 │ [[fruit]] + │ ^ + +note[table]: root + ┌─ fruit:1:1 + │ + 1 │ ╭ [[fruit]] + 2 │ │ name = "apple" + 3 │ │ + 4 │ │ [fruit.physical] + · │ +18 │ │ name = "plantain" +19 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__inline_tables-2.snap b/integ-tests/tests/snapshots/parser__inline_tables-2.snap new file mode 100644 index 0000000..ecf2463 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__inline_tables-2.snap @@ -0,0 +1,143 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[table]: root_a + ┌─ inline_tables:1:5 + │ +1 │ a = {} + │ ^^ + +note[integer]: root_b_b + ┌─ inline_tables:2:8 + │ +2 │ b = {b=1} + │ ^ + +note[table]: root_b + ┌─ inline_tables:2:5 + │ +2 │ b = {b=1} + │ ^^^^^ + +note[integer]: root_c_b + ┌─ inline_tables:3:17 + │ +3 │ c = { b = 1 } + │ ^ + +note[table]: root_c + ┌─ inline_tables:3:5 + │ +3 │ c = { b = 1 } + │ ^^^^^^^^^^^^^^^^^^ + +note[integer]: root_d_a + ┌─ inline_tables:4:8 + │ +4 │ d = {a=1,b=2} + │ ^ + +note[integer]: root_d_b + ┌─ inline_tables:4:12 + │ +4 │ d = {a=1,b=2} + │ ^ + +note[table]: root_d + ┌─ inline_tables:4:5 + │ +4 │ d = {a=1,b=2} + │ ^^^^^^^^^ + +note[integer]: root_e_a + ┌─ inline_tables:5:8 + │ +5 │ e = {a=1,b=2,c={}} + │ ^ + +note[integer]: root_e_b + ┌─ inline_tables:5:12 + │ +5 │ e = {a=1,b=2,c={}} + │ ^ + +note[table]: root_e_c + ┌─ inline_tables:5:16 + │ +5 │ e = {a=1,b=2,c={}} + │ ^^ + +note[table]: root_e + ┌─ inline_tables:5:5 + │ +5 │ e = {a=1,b=2,c={}} + │ ^^^^^^^^^^^^^^ + +note[array]: root_f_a + ┌─ inline_tables:6:8 + │ +6 │ f = {a=[ + │ ╭────────^ +7 │ │ ]} + │ ╰─^ + +note[table]: root_f + ┌─ inline_tables:6:5 + │ +6 │ f = {a=[ + │ ╭─────^ +7 │ │ ]} + │ ╰──^ + +note[array]: root_g_a + ┌─ inline_tables:8:10 + │ +8 │ g = {"a"=[ + │ ╭──────────^ +9 │ │ ]} + │ ╰─^ + +note[table]: root_g + ┌─ inline_tables:8:5 + │ +8 │ g = {"a"=[ + │ ╭─────^ +9 │ │ ]} + │ ╰──^ + +note[table]: root_h_0 + ┌─ inline_tables:11:5 + │ +11 │ {}, + │ ^^ + +note[table]: root_h_1 + ┌─ inline_tables:12:5 + │ +12 │ {}, + │ ^^ + +note[array]: root_h + ┌─ inline_tables:10:5 + │ +10 │ h = [ + │ ╭─────^ +11 │ │ {}, +12 │ │ {}, +13 │ │ ] + │ ╰─^ + +note[table]: root + ┌─ inline_tables:1:1 + │ + 1 │ ╭ a = {} + 2 │ │ b = {b=1} + 3 │ │ c = { b = 1 } + 4 │ │ d = {a=1,b=2} + · │ +12 │ │ {}, +13 │ │ ] + │ ╰─^ + + diff --git a/integ-tests/tests/snapshots/parser__key_names-2.snap b/integ-tests/tests/snapshots/parser__key_names-2.snap new file mode 100644 index 0000000..2bacc40 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__key_names-2.snap @@ -0,0 +1,101 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[integer]: root_! + ┌─ key_names:8:7 + │ +8 │ "!" = 3 + │ ^ + +note[integer]: root_" + ┌─ key_names:10:8 + │ +10 │ "\"" = 3 + │ ^ + +note[integer]: root_- + ┌─ key_names:5:5 + │ +5 │ - = 3 + │ ^ + +note[integer]: root_8 + ┌─ key_names:6:5 + │ +6 │ 8 = 8 + │ ^ + +note[integer]: root__ + ┌─ key_names:4:5 + │ +4 │ _ = 3 + │ ^ + +note[integer]: root_a + ┌─ key_names:7:7 + │ +7 │ "a" = 3 + │ ^ + +note[integer]: root_a^b + ┌─ key_names:9:9 + │ +9 │ "a^b" = 3 + │ ^ + +note[string]: root_character encoding + ┌─ key_names:11:25 + │ +11 │ "character encoding" = "value" + │ ^^^^^ + +note[integer]: root_foo + ┌─ key_names:1:7 + │ +1 │ foo = 3 + │ ^ + +note[integer]: root_foo_-2--3--r23f--4-f2-4 + ┌─ key_names:3:27 + │ +3 │ foo_-2--3--r23f--4-f2-4 = 3 + │ ^ + +note[integer]: root_foo_3 + ┌─ key_names:2:9 + │ +2 │ foo_3 = 3 + │ ^ + +note[integer]: root_key#name + ┌─ key_names:13:14 + │ +13 │ "key#name" = 5 + │ ^ + +note[integer]: root_~!@#$^&*()_+-`1234567890[]\|/?><.,;:' + ┌─ key_names:14:44 + │ +14 │ "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'" = 1 + │ ^ + +note[string]: root_ʎǝʞ + ┌─ key_names:12:10 + │ +12 │ 'ʎǝʞ' = "value" + │ ^^^^^ + +note[table]: root + ┌─ key_names:1:1 + │ + 1 │ ╭ foo = 3 + 2 │ │ foo_3 = 3 + 3 │ │ foo_-2--3--r23f--4-f2-4 = 3 + 4 │ │ _ = 3 + · │ +14 │ │ "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'" = 1 +15 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__key_no_space-2.snap b/integ-tests/tests/snapshots/parser__key_no_space-2.snap new file mode 100644 index 0000000..7f82355 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__key_no_space-2.snap @@ -0,0 +1,17 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[integer]: root_foo + ┌─ key_no_space:1:5 + │ +1 │ foo=42 + │ ^^ + +note[table]: root + ┌─ key_no_space:1:1 + │ +1 │ foo=42 + │ ^^^^^^ + + diff --git a/integ-tests/tests/snapshots/parser__literal_eats_crlf-2.snap b/integ-tests/tests/snapshots/parser__literal_eats_crlf-2.snap new file mode 100644 index 0000000..6dd0cab --- /dev/null +++ b/integ-tests/tests/snapshots/parser__literal_eats_crlf-2.snap @@ -0,0 +1,34 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_bar + ┌─ literal_eats_crlf:3:10 + │ +3 │ bar = """\ + │ ╭──────────^ +4 │ │ +5 │ │ +6 │ │ a""" + │ ╰────^ + +note[string]: root_foo + ┌─ literal_eats_crlf:1:10 + │ +1 │ foo = """\ + │ ╭──────────^ +2 │ │ """ + │ ╰^ + +note[table]: root + ┌─ literal_eats_crlf:1:1 + │ +1 │ ╭ foo = """\ +2 │ │ """ +3 │ │ bar = """\ +4 │ │ +5 │ │ +6 │ │ a""" + │ ╰───────^ + + diff --git a/integ-tests/tests/snapshots/parser__many_blank-2.snap b/integ-tests/tests/snapshots/parser__many_blank-2.snap new file mode 100644 index 0000000..ed17669 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__many_blank-2.snap @@ -0,0 +1,22 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_foo + ┌─ many_blank:2:1 + │ +2 │ ╭ +3 │ │ +4 │ │ """ + │ ╰^ + +note[table]: root + ┌─ many_blank:1:1 + │ +1 │ ╭ foo = """ +2 │ │ +3 │ │ +4 │ │ """ + │ ╰───^ + + diff --git a/integ-tests/tests/snapshots/parser__strings-2.snap b/integ-tests/tests/snapshots/parser__strings-2.snap new file mode 100644 index 0000000..421db90 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__strings-2.snap @@ -0,0 +1,289 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[string]: root_answer1 + ┌─ strings:63:12 + │ +63 │ answer1 = "\u000B" + │ ^^^^^^ + +note[string]: root_answer10 + ┌─ strings:67:13 + │ +67 │ answer10 = "\u03b4α" + │ ^^^^^^^ + +note[string]: root_answer11 + ┌─ strings:68:13 + │ +68 │ answer11 = "\U0000abc1" + │ ^^^^^^^^^^ + +note[string]: root_answer4 + ┌─ strings:64:12 + │ +64 │ answer4 = "\u03B4α" + │ ^^^^^^^ + +note[string]: root_answer8 + ┌─ strings:65:12 + │ +65 │ answer8 = "\U000003B4β" + │ ^^^^^^^^^^^ + +note[string]: root_answer9 + ┌─ strings:66:12 + │ +66 │ answer9 = "\uc0de" + │ ^^^^^^ + +note[string]: root_backslash + ┌─ strings:54:14 + │ +54 │ backslash = "This string has a \\ backslash character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_backspace + ┌─ strings:47:14 + │ +47 │ backspace = "This string has a \b backspace character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_bar + ┌─ strings:1:8 + │ +1 │ bar = "\U00000000" + │ ^^^^^^^^^^ + +note[string]: root_carriage + ┌─ strings:51:13 + │ +51 │ carriage = "This string has a \r carriage return character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_delete + ┌─ strings:59:11 + │ +59 │ delete = "This string has a \u007F delete control code." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_empty + ┌─ strings:5:9 + │ +5 │ empty = "" + │ ^ + +note[string]: root_firstnl + ┌─ strings:36:1 + │ +36 │ This string has a ' quote character.''' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_formfeed + ┌─ strings:50:13 + │ +50 │ formfeed = "This string has a \f form feed character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_key1 + ┌─ strings:3:9 + │ +3 │ key1 = "One\nTwo" + │ ^^^^^^^^ + +note[string]: root_key2 + ┌─ strings:4:11 + │ +4 │ key2 = """One\nTwo""" + │ ^^^^^^^^ + +note[string]: root_key3 + ┌─ strings:7:1 + │ +7 │ ╭ One +8 │ │ Two""" + │ ╰───^ + +note[string]: root_key4 + ┌─ strings:10:9 + │ +10 │ key4 = "The quick brown fox jumps over the lazy dog." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_key5 + ┌─ strings:12:1 + │ +12 │ ╭ The quick brown \ +13 │ │ +14 │ │ +15 │ │ fox jumps over \ +16 │ │ the lazy dog.""" + │ ╰─────────────^ + +note[string]: root_key6 + ┌─ strings:17:11 + │ +17 │ key6 = """\ + │ ╭───────────^ +18 │ │ The quick brown \ +19 │ │ fox jumps over \ +20 │ │ the lazy dog.\ +21 │ │ """ + │ ╰───^ + +note[string]: root_lbackslash + ┌─ strings:44:15 + │ +44 │ lbackslash = 'This string has a \\ backslash character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_lbackspace + ┌─ strings:38:15 + │ +38 │ lbackspace = 'This string has a \b backspace character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_lcarriage + ┌─ strings:42:14 + │ +42 │ lcarriage = 'This string has a \r carriage return character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_lformfeed + ┌─ strings:41:14 + │ +41 │ lformfeed = 'This string has a \f form feed character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_lines + ┌─ strings:30:1 + │ +30 │ ╭ The first newline is +31 │ │ trimmed in raw strings. +32 │ │ All other whitespace +33 │ │ is preserved. +34 │ │ ''' + │ ╰^ + +note[string]: root_lnewline + ┌─ strings:40:13 + │ +40 │ lnewline = 'This string has a \n new line character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_lslash + ┌─ strings:43:11 + │ +43 │ lslash = 'This string has a \/ slash character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_ltab + ┌─ strings:39:9 + │ +39 │ ltab = 'This string has a \t tab character.' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_newline + ┌─ strings:49:12 + │ +49 │ newline = "This string has a \n new line character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_notunicode1 + ┌─ strings:55:16 + │ +55 │ notunicode1 = "This string does not have a unicode \\u escape." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_notunicode2 + ┌─ strings:56:16 + │ +56 │ notunicode2 = "This string does not have a unicode \u005Cu escape." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_notunicode3 + ┌─ strings:57:16 + │ +57 │ notunicode3 = "This string does not have a unicode \\u0075 escape." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_notunicode4 + ┌─ strings:58:16 + │ +58 │ notunicode4 = "This string does not have a unicode \\\u0075 escape." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_quote + ┌─ strings:52:10 + │ +52 │ quote = "This string has a \" quote character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_quoted + ┌─ strings:25:11 + │ +25 │ quoted = 'Tom "Dubs" Preston-Werner' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_regex + ┌─ strings:26:10 + │ +26 │ regex = '<\i\c*\s*>' + │ ^^^^^^^^^^ + +note[string]: root_regex2 + ┌─ strings:28:13 + │ +28 │ regex2 = '''I [dw]on't need \d{2} apples''' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_slash + ┌─ strings:53:10 + │ +53 │ slash = "This string has a / slash character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_tab + ┌─ strings:48:8 + │ +48 │ tab = "This string has a \t tab character." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_unicode + ┌─ strings:2:12 + │ +2 │ unicode = "δ" + │ ^ + +note[string]: root_unitseparator + ┌─ strings:60:18 + │ +60 │ unitseparator = "This string has a \u001F unit separator control code." + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_winpath + ┌─ strings:23:12 + │ +23 │ winpath = 'C:\Users\nodejs\templates' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_winpath2 + ┌─ strings:24:13 + │ +24 │ winpath2 = '\\ServerX\admin$\system32\' + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root + ┌─ strings:1:1 + │ + 1 │ ╭ bar = "\U00000000" + 2 │ │ unicode = "δ" + 3 │ │ key1 = "One\nTwo" + 4 │ │ key2 = """One\nTwo""" + · │ +68 │ │ answer11 = "\U0000abc1" +69 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__table_names-2.snap b/integ-tests/tests/snapshots/parser__table_names-2.snap new file mode 100644 index 0000000..c1a232e --- /dev/null +++ b/integ-tests/tests/snapshots/parser__table_names-2.snap @@ -0,0 +1,59 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[table]: root_" + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root_"" + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root_a_b + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root_a + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root_a.a + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root_f f + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root_f.f + ┌─ table_names:1:1 + │ +1 │ [a."b"] + │ ^ + +note[table]: root + ┌─ table_names:1:1 + │ +1 │ ╭ [a."b"] +2 │ │ ["f f"] +3 │ │ ["f.f"] +4 │ │ ["\""] +5 │ │ ['a.a'] +6 │ │ ['""'] +7 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__tables_in_arrays-2.snap b/integ-tests/tests/snapshots/parser__tables_in_arrays-2.snap new file mode 100644 index 0000000..b9f451d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__tables_in_arrays-2.snap @@ -0,0 +1,47 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[table]: root_foo_0_bar + ┌─ tables_in_arrays:1:1 + │ +1 │ [[foo]] + │ ^ + +note[table]: root_foo_0 + ┌─ tables_in_arrays:1:1 + │ +1 │ [[foo]] + │ ^ + +note[table]: root_foo_1_bar + ┌─ tables_in_arrays:1:1 + │ +1 │ [[foo]] + │ ^ + +note[table]: root_foo_1 + ┌─ tables_in_arrays:1:1 + │ +1 │ [[foo]] + │ ^ + +note[array]: root_foo + ┌─ tables_in_arrays:1:1 + │ +1 │ [[foo]] + │ ^ + +note[table]: root + ┌─ tables_in_arrays:1:1 + │ + 1 │ ╭ [[foo]] + 2 │ │ #… + 3 │ │ [foo.bar] + 4 │ │ #… + · │ + 9 │ │ #... +10 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/parser__underscores-2.snap b/integ-tests/tests/snapshots/parser__underscores-2.snap new file mode 100644 index 0000000..3bce9fd --- /dev/null +++ b/integ-tests/tests/snapshots/parser__underscores-2.snap @@ -0,0 +1,46 @@ +--- +source: integ-tests/tests/parser.rs +expression: spans +--- +note[integer]: root_hundred + ┌─ underscores:2:11 + │ +2 │ hundred = 1_0_0 + │ ^^^^^ + +note[integer]: root_ten + ┌─ underscores:1:7 + │ +1 │ ten = 1_0 + │ ^^^ + +note[integer]: root_thousand + ┌─ underscores:3:12 + │ +3 │ thousand = 1_000 + │ ^^^^^ + +note[integer]: root_thousand-neg + ┌─ underscores:5:15 + │ +5 │ thousand-neg =-1_000 + │ ^^^^^^ + +note[integer]: root_thousand-pos + ┌─ underscores:4:16 + │ +4 │ thousand-pos = +1_000 + │ ^^^^^^ + +note[table]: root + ┌─ underscores:1:1 + │ +1 │ ╭ ten = 1_0 +2 │ │ hundred = 1_0_0 +3 │ │ thousand = 1_000 +4 │ │ thousand-pos = +1_000 +5 │ │ thousand-neg =-1_000 +6 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__empty-2.snap b/integ-tests/tests/snapshots/valid__arrays__empty-2.snap new file mode 100644 index 0000000..fd130eb --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__empty-2.snap @@ -0,0 +1,41 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[array]: root_thevoid_0_0_0_0 + ┌─ empty:1:15 + │ +1 │ thevoid = [[[[[]]]]] + │ ^^ + +note[array]: root_thevoid_0_0_0 + ┌─ empty:1:14 + │ +1 │ thevoid = [[[[[]]]]] + │ ^^^^ + +note[array]: root_thevoid_0_0 + ┌─ empty:1:13 + │ +1 │ thevoid = [[[[[]]]]] + │ ^^^^^^ + +note[array]: root_thevoid_0 + ┌─ empty:1:12 + │ +1 │ thevoid = [[[[[]]]]] + │ ^^^^^^^^ + +note[array]: root_thevoid + ┌─ empty:1:11 + │ +1 │ thevoid = [[[[[]]]]] + │ ^^^^^^^^^^ + +note[table]: root + ┌─ empty:1:1 + │ +1 │ thevoid = [[[[[]]]]] + │ ^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__ints_and_arrays-2.snap b/integ-tests/tests/snapshots/valid__arrays__ints_and_arrays-2.snap new file mode 100644 index 0000000..1ac51d6 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__ints_and_arrays-2.snap @@ -0,0 +1,35 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_arrays-and-ints_0 + ┌─ ints_and_arrays:1:21 + │ +1 │ arrays-and-ints = [1, ["Arrays are not integers."]] + │ ^ + +note[string]: root_arrays-and-ints_1_0 + ┌─ ints_and_arrays:1:26 + │ +1 │ arrays-and-ints = [1, ["Arrays are not integers."]] + │ ^^^^^^^^^^^^^^^^^^^^^^^^ + +note[array]: root_arrays-and-ints_1 + ┌─ ints_and_arrays:1:24 + │ +1 │ arrays-and-ints = [1, ["Arrays are not integers."]] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[array]: root_arrays-and-ints + ┌─ ints_and_arrays:1:20 + │ +1 │ arrays-and-ints = [1, ["Arrays are not integers."]] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root + ┌─ ints_and_arrays:1:1 + │ +1 │ arrays-and-ints = [1, ["Arrays are not integers."]] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__nested-2.snap b/integ-tests/tests/snapshots/valid__arrays__nested-2.snap new file mode 100644 index 0000000..aef80d4 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__nested-2.snap @@ -0,0 +1,41 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_nest_0_0 + ┌─ nested:1:11 + │ +1 │ nest = [["a"], ["b"]] + │ ^ + +note[array]: root_nest_0 + ┌─ nested:1:9 + │ +1 │ nest = [["a"], ["b"]] + │ ^^^^^ + +note[string]: root_nest_1_0 + ┌─ nested:1:18 + │ +1 │ nest = [["a"], ["b"]] + │ ^ + +note[array]: root_nest_1 + ┌─ nested:1:16 + │ +1 │ nest = [["a"], ["b"]] + │ ^^^^^ + +note[array]: root_nest + ┌─ nested:1:8 + │ +1 │ nest = [["a"], ["b"]] + │ ^^^^^^^^^^^^^^ + +note[table]: root + ┌─ nested:1:1 + │ +1 │ nest = [["a"], ["b"]] + │ ^^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__no_spaces-2.snap b/integ-tests/tests/snapshots/valid__arrays__no_spaces-2.snap new file mode 100644 index 0000000..96b509c --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__no_spaces-2.snap @@ -0,0 +1,35 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_ints_0 + ┌─ no_spaces:1:9 + │ +1 │ ints = [1,2,3] + │ ^ + +note[integer]: root_ints_1 + ┌─ no_spaces:1:11 + │ +1 │ ints = [1,2,3] + │ ^ + +note[integer]: root_ints_2 + ┌─ no_spaces:1:13 + │ +1 │ ints = [1,2,3] + │ ^ + +note[array]: root_ints + ┌─ no_spaces:1:8 + │ +1 │ ints = [1,2,3] + │ ^^^^^^^ + +note[table]: root + ┌─ no_spaces:1:1 + │ +1 │ ints = [1,2,3] + │ ^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__one-2.snap b/integ-tests/tests/snapshots/valid__arrays__one-2.snap new file mode 100644 index 0000000..b0fb572 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__one-2.snap @@ -0,0 +1,37 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_people_0_first_name + ┌─ one:2:15 + │ +2 │ first_name = "Bruce" + │ ^^^^^ + +note[string]: root_people_0_last_name + ┌─ one:3:14 + │ +3 │ last_name = "Springsteen" + │ ^^^^^^^^^^^ + +note[table]: root_people_0 + ┌─ one:1:1 + │ +1 │ [[people]] + │ ^ + +note[array]: root_people + ┌─ one:1:1 + │ +1 │ [[people]] + │ ^ + +note[table]: root + ┌─ one:1:1 + │ +1 │ ╭ [[people]] +2 │ │ first_name = "Bruce" +3 │ │ last_name = "Springsteen" + │ ╰─────────────────────────^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__strings_and_ints-2.snap b/integ-tests/tests/snapshots/valid__arrays__strings_and_ints-2.snap new file mode 100644 index 0000000..f32b604 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__strings_and_ints-2.snap @@ -0,0 +1,29 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_strings-and-ints_0 + ┌─ strings_and_ints:1:22 + │ +1 │ strings-and-ints = ["hi", 42] + │ ^^ + +note[integer]: root_strings-and-ints_1 + ┌─ strings_and_ints:1:27 + │ +1 │ strings-and-ints = ["hi", 42] + │ ^^ + +note[array]: root_strings-and-ints + ┌─ strings_and_ints:1:20 + │ +1 │ strings-and-ints = ["hi", 42] + │ ^^^^^^^^^^ + +note[table]: root + ┌─ strings_and_ints:1:1 + │ +1 │ strings-and-ints = ["hi", 42] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__comments-2.snap b/integ-tests/tests/snapshots/valid__comments-2.snap new file mode 100644 index 0000000..1929ca1 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__comments-2.snap @@ -0,0 +1,54 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_group_answer + ┌─ comments:8:10 + │ +8 │ answer = 42 # Comment + │ ^^ + +note[integer]: root_group_more_0 + ┌─ comments:17:3 + │ +17 │ 42, 42, # Comments within arrays are fun. + │ ^^ + +note[integer]: root_group_more_1 + ┌─ comments:17:7 + │ +17 │ 42, 42, # Comments within arrays are fun. + │ ^^ + +note[array]: root_group_more + ┌─ comments:11:8 + │ +11 │ more = [ # Comment + │ ╭────────^ +12 │ │ # What about multiple # comments? +13 │ │ # Can you handle it? +14 │ │ # + · │ +23 │ │ # ] Did I fool you? +24 │ │ ] # Hopefully not. + │ ╰─^ + +note[table]: root_group + ┌─ comments:1:1 + │ +1 │ # Top comment. + │ ^ + +note[table]: root + ┌─ comments:1:1 + │ + 1 │ ╭ # Top comment. + 2 │ │ # Top comment. + 3 │ │ # Top comment. + 4 │ │ + · │ +24 │ │ ] # Hopefully not. +25 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__evil-2.snap b/integ-tests/tests/snapshots/valid__evil-2.snap new file mode 100644 index 0000000..c3d484c --- /dev/null +++ b/integ-tests/tests/snapshots/valid__evil-2.snap @@ -0,0 +1,111 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_the_hard_another_test_string + ┌─ evil:12:28 + │ +12 │ another_test_string = " Same thing, but with a string #" + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_the_hard_bit#_multi_line_array_0 + ┌─ evil:19:14 + │ +19 │ "]", + │ ^ + +note[array]: root_the_hard_bit#_multi_line_array + ┌─ evil:18:28 + │ +18 │ multi_line_array = [ + │ ╭────────────────────────────^ +19 │ │ "]", +20 │ │ # ] Oh yes I did +21 │ │ ] + │ ╰─────────────^ + +note[string]: root_the_hard_bit#_what? + ┌─ evil:17:20 + │ +17 │ "what?" = "You don't think some user won't do that?" + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root_the_hard_bit# + ┌─ evil:1:1 + │ +1 │ # Test file for TOML + │ ^ + +note[string]: root_the_hard_harder_test_string + ┌─ evil:13:27 + │ +13 │ harder_test_string = " And when \"'s are in the string, along with # \"" # "and comments are there too" + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_the_hard_test_array_0 + ┌─ evil:9:21 + │ +9 │ test_array = [ "] ", " # "] # ] There you go, parse this! + │ ^^ + +note[string]: root_the_hard_test_array_1 + ┌─ evil:9:27 + │ +9 │ test_array = [ "] ", " # "] # ] There you go, parse this! + │ ^^^ + +note[array]: root_the_hard_test_array + ┌─ evil:9:18 + │ +9 │ test_array = [ "] ", " # "] # ] There you go, parse this! + │ ^^^^^^^^^^^^^^ + +note[string]: root_the_hard_test_array2_0 + ┌─ evil:10:22 + │ +10 │ test_array2 = [ "Test #11 ]proved that", "Experiment #9 was a success" ] + │ ^^^^^^^^^^^^^^^^^^^^^ + +note[string]: root_the_hard_test_array2_1 + ┌─ evil:10:47 + │ +10 │ test_array2 = [ "Test #11 ]proved that", "Experiment #9 was a success" ] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[array]: root_the_hard_test_array2 + ┌─ evil:10:19 + │ +10 │ test_array2 = [ "Test #11 ]proved that", "Experiment #9 was a success" ] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root_the_hard + ┌─ evil:1:1 + │ +1 │ # Test file for TOML + │ ^ + +note[string]: root_the_test_string + ┌─ evil:6:16 + │ +6 │ test_string = "You'll hate me after this - #" # " Annoying, isn't it? + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root_the + ┌─ evil:1:1 + │ +1 │ # Test file for TOML + │ ^ + +note[table]: root + ┌─ evil:1:1 + │ + 1 │ ╭ # Test file for TOML + 2 │ │ # Only this one tries to emulate a TOML file written by a user of the kind of parser writers probably hate + 3 │ │ # This part you'll really hate + 4 │ │ + · │ +21 │ │ ] +22 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__numbers__integers-2.snap b/integ-tests/tests/snapshots/valid__numbers__integers-2.snap new file mode 100644 index 0000000..54b582b --- /dev/null +++ b/integ-tests/tests/snapshots/valid__numbers__integers-2.snap @@ -0,0 +1,89 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_answer + ┌─ integers:1:10 + │ +1 │ answer = 42 + │ ^^ + +note[integer]: root_bin1 + ┌─ integers:17:8 + │ +17 │ bin1 = 0b11010110 + │ ^^^^^^^^^^ + +note[integer]: root_hex1 + ┌─ integers:8:8 + │ +8 │ hex1 = 0xDEADBEEF + │ ^^^^^^^^^^ + +note[integer]: root_hex2 + ┌─ integers:9:8 + │ +9 │ hex2 = 0xdeadbeef + │ ^^^^^^^^^^ + +note[integer]: root_hex3 + ┌─ integers:10:8 + │ +10 │ hex3 = 0xdead_beef + │ ^^^^^^^^^^^ + +note[integer]: root_long-answer + ┌─ integers:19:15 + │ +19 │ long-answer = 9223372036854775807 + │ ^^^^^^^^^^^^^^^^^^^ + +note[integer]: root_long-neganswer + ┌─ integers:20:18 + │ +20 │ long-neganswer = -9223372036854775808 + │ ^^^^^^^^^^^^^^^^^^^^ + +note[integer]: root_neg_zero + ┌─ integers:4:12 + │ +4 │ neg_zero = -0 + │ ^^ + +note[integer]: root_neganswer + ┌─ integers:2:13 + │ +2 │ neganswer = -42 + │ ^^^ + +note[integer]: root_oct1 + ┌─ integers:13:8 + │ +13 │ oct1 = 0o01234567 + │ ^^^^^^^^^^ + +note[integer]: root_oct2 + ┌─ integers:14:8 + │ +14 │ oct2 = 0o755 # useful for Unix file permissions + │ ^^^^^ + +note[integer]: root_pos_zero + ┌─ integers:5:12 + │ +5 │ pos_zero = +0 + │ ^^ + +note[table]: root + ┌─ integers:1:1 + │ + 1 │ ╭ answer = 42 + 2 │ │ neganswer = -42 + 3 │ │ + 4 │ │ neg_zero = -0 + · │ +20 │ │ long-neganswer = -9223372036854775808 +21 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__array_many-2.snap b/integ-tests/tests/snapshots/valid__tables__array_many-2.snap new file mode 100644 index 0000000..747e9a4 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__array_many-2.snap @@ -0,0 +1,77 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_people_0_first_name + ┌─ array_many:3:15 + │ +3 │ first_name = "Bruce" + │ ^^^^^ + +note[string]: root_people_0_last_name + ┌─ array_many:4:14 + │ +4 │ last_name = "Springsteen" + │ ^^^^^^^^^^^ + +note[table]: root_people_0 + ┌─ array_many:1:1 + │ +1 │ + │ ^ + +note[string]: root_people_1_first_name + ┌─ array_many:7:15 + │ +7 │ first_name = "Eric" + │ ^^^^ + +note[string]: root_people_1_last_name + ┌─ array_many:8:14 + │ +8 │ last_name = "Clapton" + │ ^^^^^^^ + +note[table]: root_people_1 + ┌─ array_many:1:1 + │ +1 │ + │ ^ + +note[string]: root_people_2_first_name + ┌─ array_many:11:15 + │ +11 │ first_name = "Bob" + │ ^^^ + +note[string]: root_people_2_last_name + ┌─ array_many:12:14 + │ +12 │ last_name = "Seger" + │ ^^^^^ + +note[table]: root_people_2 + ┌─ array_many:1:1 + │ +1 │ + │ ^ + +note[array]: root_people + ┌─ array_many:1:1 + │ +1 │ + │ ^ + +note[table]: root + ┌─ array_many:1:1 + │ + 1 │ ╭ + 2 │ │ [[people]] + 3 │ │ first_name = "Bruce" + 4 │ │ last_name = "Springsteen" + · │ +12 │ │ last_name = "Seger" +13 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after-2.snap b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after-2.snap new file mode 100644 index 0000000..6ac9fb9 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_after-2.snap @@ -0,0 +1,47 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_a_b_c_answer + ┌─ implicit_and_explicit_after:3:10 + │ +3 │ answer = 42 + │ ^^ + +note[table]: root_a_b_c + ┌─ implicit_and_explicit_after:1:1 + │ +1 │ + │ ^ + +note[table]: root_a_b + ┌─ implicit_and_explicit_after:1:1 + │ +1 │ + │ ^ + +note[integer]: root_a_better + ┌─ implicit_and_explicit_after:6:10 + │ +6 │ better = 43 + │ ^^ + +note[table]: root_a + ┌─ implicit_and_explicit_after:1:1 + │ +1 │ + │ ^ + +note[table]: root + ┌─ implicit_and_explicit_after:1:1 + │ +1 │ ╭ +2 │ │ [a.b.c] +3 │ │ answer = 42 +4 │ │ +5 │ │ [a] +6 │ │ better = 43 +7 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before-2.snap b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before-2.snap new file mode 100644 index 0000000..9d8576f --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_and_explicit_before-2.snap @@ -0,0 +1,47 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_a_b_c_answer + ┌─ implicit_and_explicit_before:6:10 + │ +6 │ answer = 42 + │ ^^ + +note[table]: root_a_b_c + ┌─ implicit_and_explicit_before:1:1 + │ +1 │ + │ ^ + +note[table]: root_a_b + ┌─ implicit_and_explicit_before:1:1 + │ +1 │ + │ ^ + +note[integer]: root_a_better + ┌─ implicit_and_explicit_before:3:10 + │ +3 │ better = 43 + │ ^^ + +note[table]: root_a + ┌─ implicit_and_explicit_before:1:1 + │ +1 │ + │ ^ + +note[table]: root + ┌─ implicit_and_explicit_before:1:1 + │ +1 │ ╭ +2 │ │ [a] +3 │ │ better = 43 +4 │ │ +5 │ │ [a.b.c] +6 │ │ answer = 42 +7 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_array-2.snap b/integ-tests/tests/snapshots/valid__tables__implicit_array-2.snap new file mode 100644 index 0000000..52df972 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_array-2.snap @@ -0,0 +1,36 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_albums_songs_0_name + ┌─ implicit_array:2:9 + │ +2 │ name = "Glory Days" + │ ^^^^^^^^^^ + +note[table]: root_albums_songs_0 + ┌─ implicit_array:1:1 + │ +1 │ [[albums.songs]] + │ ^ + +note[array]: root_albums_songs + ┌─ implicit_array:1:1 + │ +1 │ [[albums.songs]] + │ ^ + +note[table]: root_albums + ┌─ implicit_array:1:1 + │ +1 │ [[albums.songs]] + │ ^ + +note[table]: root + ┌─ implicit_array:1:1 + │ +1 │ ╭ [[albums.songs]] +2 │ │ name = "Glory Days" + │ ╰───────────────────^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__implicit_groups-2.snap b/integ-tests/tests/snapshots/valid__tables__implicit_groups-2.snap new file mode 100644 index 0000000..cfb2e66 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__implicit_groups-2.snap @@ -0,0 +1,36 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_a_b_c_answer + ┌─ implicit_groups:2:10 + │ +2 │ answer = 42 + │ ^^ + +note[table]: root_a_b_c + ┌─ implicit_groups:1:1 + │ +1 │ [a.b.c] + │ ^ + +note[table]: root_a_b + ┌─ implicit_groups:1:1 + │ +1 │ [a.b.c] + │ ^ + +note[table]: root_a + ┌─ implicit_groups:1:1 + │ +1 │ [a.b.c] + │ ^ + +note[table]: root + ┌─ implicit_groups:1:1 + │ +1 │ ╭ [a.b.c] +2 │ │ answer = 42 + │ ╰───────────^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__nested_arrays-2.snap b/integ-tests/tests/snapshots/valid__tables__nested_arrays-2.snap new file mode 100644 index 0000000..b0829a5 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__nested_arrays-2.snap @@ -0,0 +1,107 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[string]: root_albums_0_name + ┌─ nested_arrays:3:9 + │ +3 │ name = "Born to Run" + │ ^^^^^^^^^^^ + +note[string]: root_albums_0_songs_0_name + ┌─ nested_arrays:6:11 + │ +6 │ name = "Jungleland" + │ ^^^^^^^^^^ + +note[table]: root_albums_0_songs_0 + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[string]: root_albums_0_songs_1_name + ┌─ nested_arrays:9:11 + │ +9 │ name = "Meeting Across the River" + │ ^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root_albums_0_songs_1 + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[array]: root_albums_0_songs + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[table]: root_albums_0 + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[string]: root_albums_1_name + ┌─ nested_arrays:12:9 + │ +12 │ name = "Born in the USA" + │ ^^^^^^^^^^^^^^^ + +note[string]: root_albums_1_songs_0_name + ┌─ nested_arrays:15:11 + │ +15 │ name = "Glory Days" + │ ^^^^^^^^^^ + +note[table]: root_albums_1_songs_0 + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[string]: root_albums_1_songs_1_name + ┌─ nested_arrays:18:11 + │ +18 │ name = "Dancing in the Dark" + │ ^^^^^^^^^^^^^^^^^^^ + +note[table]: root_albums_1_songs_1 + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[array]: root_albums_1_songs + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[table]: root_albums_1 + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[array]: root_albums + ┌─ nested_arrays:1:1 + │ +1 │ + │ ^ + +note[table]: root + ┌─ nested_arrays:1:1 + │ + 1 │ ╭ + 2 │ │ [[albums]] + 3 │ │ name = "Born to Run" + 4 │ │ + · │ +18 │ │ name = "Dancing in the Dark" +19 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__tables__sub_empty-2.snap b/integ-tests/tests/snapshots/valid__tables__sub_empty-2.snap new file mode 100644 index 0000000..0bb920f --- /dev/null +++ b/integ-tests/tests/snapshots/valid__tables__sub_empty-2.snap @@ -0,0 +1,24 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[table]: root_a_b + ┌─ sub_empty:1:1 + │ +1 │ [a] + │ ^ + +note[table]: root_a + ┌─ sub_empty:1:1 + │ +1 │ [a] + │ ^ + +note[table]: root + ┌─ sub_empty:1:1 + │ +1 │ ╭ [a] +2 │ │ [a.b] + │ ╰─────^ + + diff --git a/toml-file/src/tokens.rs b/toml-file/src/tokens.rs index 02e9091..69f2bb3 100644 --- a/toml-file/src/tokens.rs +++ b/toml-file/src/tokens.rs @@ -87,18 +87,9 @@ impl<'a> Tokenizer<'a> { Some((start, '}')) => (start, Token::RightBrace), Some((start, '[')) => (start, Token::LeftBracket), Some((start, ']')) => (start, Token::RightBracket), - Some((start, '\'')) => { - return self - .literal_string(start) - .map(|t| Some((self.step_span(start), t))) - } - Some((start, '"')) => { - return self - .basic_string(start) - .map(|t| Some((self.step_span(start), t))) - } + Some((start, '\'')) => return self.literal_string(start).map(|(s, t)| Some((s, t))), + Some((start, '"')) => return self.basic_string(start).map(|(s, t)| Some((s, t))), Some((start, ch)) if is_keylike(ch) => (start, self.keylike(start)), - Some((start, ch)) => return Err(Error::Unexpected(start, ch)), None => return Ok(None), }; @@ -168,6 +159,7 @@ impl<'a> Tokenizer<'a> { src, val, multiline, + .. }, )) => { let offset = self.substr_offset(src); @@ -261,6 +253,11 @@ impl<'a> Tokenizer<'a> { Token::Comment(&self.input[start..self.current()]) } + /// String spans are treated slightly differently, as we only want the + /// characters in the string, not the quotes, as once the user gets the + /// string and its span they won't know the actual begin/end which can + /// be needed for doing substring indices (eg reporting error messages + /// when parsing a string) fn read_string( &mut self, delim: char, @@ -272,17 +269,22 @@ impl<'a> Tokenizer<'a> { usize, char, ) -> Result<(), Error>, - ) -> Result, Error> { + ) -> Result<(Span, Token<'a>), Error> { let mut multiline = false; if self.eatc(delim) { if self.eatc(delim) { multiline = true; } else { - return Ok(Token::String { - src: &self.input[start..start + 2], - val: Cow::Borrowed(""), - multiline: false, - }); + return Ok(( + // Point the caret at the beginning of the quote, that looks + // better than the end quote + (start..start + 1).into(), + Token::String { + src: &self.input[start..start + 2], + val: Cow::Borrowed(""), + multiline: false, + }, + )); } } let mut val = MaybeString::NotEscaped(self.current()); @@ -305,7 +307,7 @@ impl<'a> Tokenizer<'a> { } } Some((mut i, ch)) if ch == delim => { - if multiline { + let span = if multiline { if !self.eatc(delim) { val.push(delim); continue; @@ -323,12 +325,31 @@ impl<'a> Tokenizer<'a> { val.push(delim); i += 1; } + + // Also skip the first newline after the opening delimiters + let maybe_nl = self.input.as_bytes()[start + 3]; + let start_off = if maybe_nl == b'\n' { + 4 + } else if maybe_nl == b'\r' { + 5 + } else { + 3 + }; + + start + start_off..self.current() - 3 + } else { + start + 1..self.current() - 1 } - return Ok(Token::String { - src: &self.input[start..self.current()], - val: val.into_cow(&self.input[..i]), - multiline, - }); + .into(); + + return Ok(( + span, + Token::String { + src: &self.input[start..self.current()], + val: val.into_cow(&self.input[..i]), + multiline, + }, + )); } Some((i, c)) => new_ch(self, &mut val, multiline, i, c)?, None => return Err(Error::UnterminatedString(start)), @@ -336,7 +357,7 @@ impl<'a> Tokenizer<'a> { } } - fn literal_string(&mut self, start: usize) -> Result, Error> { + fn literal_string(&mut self, start: usize) -> Result<(Span, Token<'a>), Error> { self.read_string('\'', start, &mut |_me, val, _multi, i, ch| { if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') { val.push(ch); @@ -347,7 +368,7 @@ impl<'a> Tokenizer<'a> { }) } - fn basic_string(&mut self, start: usize) -> Result, Error> { + fn basic_string(&mut self, start: usize) -> Result<(Span, Token<'a>), Error> { self.read_string('"', start, &mut |me, val, multi, i, ch| match ch { '\\' => { val.make_owned(&me.input[..i]); From 346e407e05583e36759ba3e1b338e004d6acc705 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Mon, 12 Feb 2024 11:54:35 +0100 Subject: [PATCH 06/16] Fix float spans --- integ-tests/data/floats.toml | 2 +- .../valid__arrays__heterogenous-2.snap | 71 ++++++++++++ .../valid__arrays__ints_and_floats-2.snap | 29 +++++ .../snapshots/valid__numbers__floats-2.snap | 101 ++++++++++++++++++ .../snapshots/valid__numbers__floats.snap | 2 +- toml-file/src/de.rs | 6 +- 6 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 integ-tests/tests/snapshots/valid__arrays__heterogenous-2.snap create mode 100644 integ-tests/tests/snapshots/valid__arrays__ints_and_floats-2.snap create mode 100644 integ-tests/tests/snapshots/valid__numbers__floats-2.snap diff --git a/integ-tests/data/floats.toml b/integ-tests/data/floats.toml index c3f9e10..0d4d549 100644 --- a/integ-tests/data/floats.toml +++ b/integ-tests/data/floats.toml @@ -5,7 +5,7 @@ normal-neg-exp = 1.0e-0 neg-exp = 1E-0 multi-dec-exp = 1.001e-0 two-exp = 2e10 -two-pos-exp = 2e+10 +two-pos-exp = 2e+11 two-neg-exp = 2e-10 twenty = 2_0.0 twenty-exp = 2_0.0_0e1_0 diff --git a/integ-tests/tests/snapshots/valid__arrays__heterogenous-2.snap b/integ-tests/tests/snapshots/valid__arrays__heterogenous-2.snap new file mode 100644 index 0000000..b723bf0 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__heterogenous-2.snap @@ -0,0 +1,71 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_mixed_0_0 + ┌─ heterogenous:1:11 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^ + +note[integer]: root_mixed_0_1 + ┌─ heterogenous:1:14 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^ + +note[array]: root_mixed_0 + ┌─ heterogenous:1:10 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^^^^ + +note[string]: root_mixed_1_0 + ┌─ heterogenous:1:20 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^ + +note[string]: root_mixed_1_1 + ┌─ heterogenous:1:25 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^ + +note[array]: root_mixed_1 + ┌─ heterogenous:1:18 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^^^^^^^^ + +note[float]: root_mixed_2_0 + ┌─ heterogenous:1:31 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^ + +note[float]: root_mixed_2_1 + ┌─ heterogenous:1:36 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^ + +note[array]: root_mixed_2 + ┌─ heterogenous:1:30 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^^^^^^^^ + +note[array]: root_mixed + ┌─ heterogenous:1:9 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note[table]: root + ┌─ heterogenous:1:1 + │ +1 │ mixed = [[1, 2], ["a", "b"], [1.1, 2.1]] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__arrays__ints_and_floats-2.snap b/integ-tests/tests/snapshots/valid__arrays__ints_and_floats-2.snap new file mode 100644 index 0000000..47959fd --- /dev/null +++ b/integ-tests/tests/snapshots/valid__arrays__ints_and_floats-2.snap @@ -0,0 +1,29 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[integer]: root_ints-and-floats_0 + ┌─ ints_and_floats:1:20 + │ +1 │ ints-and-floats = [1, 1.1] + │ ^ + +note[float]: root_ints-and-floats_1 + ┌─ ints_and_floats:1:23 + │ +1 │ ints-and-floats = [1, 1.1] + │ ^^^ + +note[array]: root_ints-and-floats + ┌─ ints_and_floats:1:19 + │ +1 │ ints-and-floats = [1, 1.1] + │ ^^^^^^^^ + +note[table]: root + ┌─ ints_and_floats:1:1 + │ +1 │ ints-and-floats = [1, 1.1] + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/valid__numbers__floats-2.snap b/integ-tests/tests/snapshots/valid__numbers__floats-2.snap new file mode 100644 index 0000000..de600b5 --- /dev/null +++ b/integ-tests/tests/snapshots/valid__numbers__floats-2.snap @@ -0,0 +1,101 @@ +--- +source: integ-tests/tests/valid.rs +expression: spans +--- +note[float]: root_longpi + ┌─ floats:13:10 + │ +13 │ longpi = 3.141592653589793 + │ ^^^^^^^^^^^^^^^^^ + +note[float]: root_multi-dec-exp + ┌─ floats:6:17 + │ +6 │ multi-dec-exp = 1.001e-0 + │ ^^^^^^^^ + +note[float]: root_neg-exp + ┌─ floats:5:11 + │ +5 │ neg-exp = 1E-0 + │ ^^^^ + +note[float]: root_neglongpi + ┌─ floats:14:13 + │ +14 │ neglongpi = -3.141592653589793 + │ ^^^^^^^^^^^^^^^^^^ + +note[float]: root_normal + ┌─ floats:1:10 + │ +1 │ normal = 1.0 + │ ^^^ + +note[float]: root_normal-exp + ┌─ floats:2:14 + │ +2 │ normal-exp = 1.0e0 + │ ^^^^^ + +note[float]: root_normal-neg-exp + ┌─ floats:4:18 + │ +4 │ normal-neg-exp = 1.0e-0 + │ ^^^^^^ + +note[float]: root_normal-pos-exp + ┌─ floats:3:18 + │ +3 │ normal-pos-exp = 1.0e+0 + │ ^^^^^^ + +note[float]: root_twenty + ┌─ floats:10:10 + │ +10 │ twenty = 2_0.0 + │ ^^^^^ + +note[float]: root_twenty-exp + ┌─ floats:11:14 + │ +11 │ twenty-exp = 2_0.0_0e1_0 + │ ^^^^^^^^^^^ + +note[float]: root_twenty-punkt-ett + ┌─ floats:12:20 + │ +12 │ twenty-punkt-ett = 2_0.1_0e1_0 + │ ^^^^^^^^^^^ + +note[float]: root_two-exp + ┌─ floats:7:11 + │ +7 │ two-exp = 2e10 + │ ^^^^ + +note[float]: root_two-neg-exp + ┌─ floats:9:15 + │ +9 │ two-neg-exp = 2e-10 + │ ^^^^^ + +note[float]: root_two-pos-exp + ┌─ floats:8:15 + │ +8 │ two-pos-exp = 2e+11 + │ ^^^^^ + +note[table]: root + ┌─ floats:1:1 + │ + 1 │ ╭ normal = 1.0 + 2 │ │ normal-exp = 1.0e0 + 3 │ │ normal-pos-exp = 1.0e+0 + 4 │ │ normal-neg-exp = 1.0e-0 + · │ +14 │ │ neglongpi = -3.141592653589793 +15 │ │ + │ ╰^ + + diff --git a/integ-tests/tests/snapshots/valid__numbers__floats.snap b/integ-tests/tests/snapshots/valid__numbers__floats.snap index a3bcaa8..762b1f6 100644 --- a/integ-tests/tests/snapshots/valid__numbers__floats.snap +++ b/integ-tests/tests/snapshots/valid__numbers__floats.snap @@ -16,5 +16,5 @@ expression: valid_toml "twenty-punkt-ett": 201000000000.0, "two-exp": 20000000000.0, "two-neg-exp": 0.0000000002, - "two-pos-exp": 20000000000.0 + "two-pos-exp": 200000000000.0 } diff --git a/toml-file/src/de.rs b/toml-file/src/de.rs index 18fb818..6fea82f 100644 --- a/toml-file/src/de.rs +++ b/toml-file/src/de.rs @@ -580,16 +580,16 @@ impl<'a> Deserializer<'a> { self.float(s, None).map(|f| Val { e: E::Float(f), start, - end, + end: self.tokens.current(), }) } else if self.eat(Token::Period)? { let at = self.tokens.current(); match self.next()? { - Some((Span { start, end }, Token::Keylike(after))) => { + Some((Span { .. }, Token::Keylike(after))) => { self.float(s, Some(after)).map(|f| Val { e: E::Float(f), start, - end, + end: self.tokens.current(), }) } _ => Err(self.error(at, Some(end), ErrorKind::InvalidNumber)), From 5ad00c1dfc4a59be82e5ed5ce1d03f23a16fd8a3 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 14 Feb 2024 09:37:05 +0100 Subject: [PATCH 07/16] Rename --- Cargo.toml | 2 +- integ-tests/Cargo.toml | 3 +- toml-file-derive/Cargo.toml | 12 ------- toml-file-derive/src/de.rs | 5 --- toml-file-derive/src/lib.rs | 9 ------ {toml-file => toml-span}/Cargo.toml | 4 ++- {toml-file => toml-span}/src/de.rs | 0 {toml-file => toml-span}/src/de_helpers.rs | 31 +++++++++++++++---- {toml-file => toml-span}/src/error.rs | 21 +++++++++---- {toml-file => toml-span}/src/lib.rs | 0 {toml-file => toml-span}/src/span.rs | 0 {toml-file => toml-span}/src/tokens.rs | 0 {toml-file => toml-span}/src/value.rs | 0 .../src/value/impl_serde.rs | 0 14 files changed, 45 insertions(+), 42 deletions(-) delete mode 100644 toml-file-derive/Cargo.toml delete mode 100644 toml-file-derive/src/de.rs delete mode 100644 toml-file-derive/src/lib.rs rename {toml-file => toml-span}/Cargo.toml (68%) rename {toml-file => toml-span}/src/de.rs (100%) rename {toml-file => toml-span}/src/de_helpers.rs (91%) rename {toml-file => toml-span}/src/error.rs (94%) rename {toml-file => toml-span}/src/lib.rs (100%) rename {toml-file => toml-span}/src/span.rs (100%) rename {toml-file => toml-span}/src/tokens.rs (100%) rename {toml-file => toml-span}/src/value.rs (100%) rename {toml-file => toml-span}/src/value/impl_serde.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 1b7a52b..0f6e300 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ resolver = "2" members = ["integ-tests", "toml-file", "toml-file-derive"] [workspace.dependencies] -toml-file = { path = "toml-file" } +toml-span = { path = "toml-span" } proc-macro2 = "1.0" quote = "1.0" syn = "2.0" diff --git a/integ-tests/Cargo.toml b/integ-tests/Cargo.toml index 4ca8e49..015b7e7 100644 --- a/integ-tests/Cargo.toml +++ b/integ-tests/Cargo.toml @@ -5,8 +5,7 @@ edition = "2021" publish = false [dependencies] -basic-toml = "0.1" codespan-reporting = "0.11" insta = { version = "1.34", features = ["json"] } pretty_assertions = "1.4" -toml-file = { workspace = true, features = ["reporting", "serde"] } +toml-span = { workspace = true, features = ["reporting", "serde"] } diff --git a/toml-file-derive/Cargo.toml b/toml-file-derive/Cargo.toml deleted file mode 100644 index 5b43258..0000000 --- a/toml-file-derive/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "toml-file-derive" -version = "0.1.0" -edition = "2021" - -[lib] -proc-macro = true - -[dependencies] -proc-macro2.workspace = true -quote.workspace = true -syn.workspace = true diff --git a/toml-file-derive/src/de.rs b/toml-file-derive/src/de.rs deleted file mode 100644 index f1a1de0..0000000 --- a/toml-file-derive/src/de.rs +++ /dev/null @@ -1,5 +0,0 @@ -use proc_macro2::TokenStream; - -pub(super) fn expand(_input: &mut syn::DeriveInput) -> syn::Result { - unimplemented!() -} diff --git a/toml-file-derive/src/lib.rs b/toml-file-derive/src/lib.rs deleted file mode 100644 index 0ed41fb..0000000 --- a/toml-file-derive/src/lib.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod de; - -#[proc_macro_derive(Deserialize, attributes(toml))] -pub fn derive_deserialize(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let mut input = syn::parse_macro_input!(input as syn::DeriveInput); - de::expand(&mut input) - .unwrap_or_else(syn::Error::into_compile_error) - .into() -} diff --git a/toml-file/Cargo.toml b/toml-span/Cargo.toml similarity index 68% rename from toml-file/Cargo.toml rename to toml-span/Cargo.toml index ec1b6ed..40fa174 100644 --- a/toml-file/Cargo.toml +++ b/toml-span/Cargo.toml @@ -1,6 +1,8 @@ [package] -name = "toml-file" +name = "toml-span" version = "0.1.0" +description = "Toml parser and deserializer that preserves span information" +license = "Apache-2.0 OR MIT" edition = "2021" [features] diff --git a/toml-file/src/de.rs b/toml-span/src/de.rs similarity index 100% rename from toml-file/src/de.rs rename to toml-span/src/de.rs diff --git a/toml-file/src/de_helpers.rs b/toml-span/src/de_helpers.rs similarity index 91% rename from toml-file/src/de_helpers.rs rename to toml-span/src/de_helpers.rs index b1fc1c2..d767427 100644 --- a/toml-file/src/de_helpers.rs +++ b/toml-span/src/de_helpers.rs @@ -37,12 +37,16 @@ where pub struct TableHelper<'de> { pub table: Table<'de>, pub errors: Vec, + expected: Vec<&'static str>, + span: Span, } -impl<'de> From> for TableHelper<'de> { - fn from(table: Table<'de>) -> Self { +impl<'de> From<(Table<'de>, Span)> for TableHelper<'de> { + fn from((table, span): (Table<'de>, Span)) -> Self { Self { table, + span, + expected: Vec::new(), errors: Vec::new(), } } @@ -58,6 +62,8 @@ impl<'de> TableHelper<'de> { Ok(Self { errors: Vec::new(), table, + expected: Vec::new(), + span: value.span, }) } @@ -80,10 +86,12 @@ impl<'de> TableHelper<'de> { &mut self, name: &'static str, ) -> Result, Error> { + self.expected.push(name); + let Some(mut val) = self.table.remove(&name.into()) else { let missing = Error { kind: ErrorKind::MissingField(name), - span: Default::default(), + span: self.span, line_info: None, }; self.errors.push(missing.clone()); @@ -102,6 +110,8 @@ impl<'de> TableHelper<'de> { name: &'static str, def: impl FnOnce() -> T, ) -> (T, Span) { + self.expected.push(name); + let Some(mut val) = self.table.remove(&name.into()) else { return (def(), Span::default()); }; @@ -120,6 +130,8 @@ impl<'de> TableHelper<'de> { } pub fn optional_s>(&mut self, name: &'static str) -> Option> { + self.expected.push(name); + let Some(mut val) = self.table.remove(&name.into()) else { return None; }; @@ -138,10 +150,12 @@ impl<'de> TableHelper<'de> { T: FromStr + Default, E: Display, { + self.expected.push(name); + let Some(mut val) = self.table.remove(&name.into()) else { self.errors.push(Error { kind: ErrorKind::MissingField(name), - span: Default::default(), + span: self.span, line_info: None, }); return T::default(); @@ -161,6 +175,8 @@ impl<'de> TableHelper<'de> { T: FromStr, E: Display, { + self.expected.push(name); + let Some(mut val) = self.table.remove(&name.into()) else { return None; }; @@ -185,8 +201,11 @@ impl<'de> TableHelper<'de> { .collect(); self.errors.push(Error { - span: Default::default(), - kind: ErrorKind::UnexpectedKeys { keys }, + span: self.span, + kind: ErrorKind::UnexpectedKeys { + keys, + expected: self.expected.into_iter().map(String::from).collect(), + }, line_info: None, }) } diff --git a/toml-file/src/error.rs b/toml-span/src/error.rs similarity index 94% rename from toml-file/src/error.rs rename to toml-span/src/error.rs index cfb22f5..aab3839 100644 --- a/toml-file/src/error.rs +++ b/toml-span/src/error.rs @@ -109,6 +109,7 @@ pub enum ErrorKind { UnexpectedKeys { /// The unexpected keys. keys: Vec<(String, Span)>, + expected: Vec, }, /// Unquoted string was found when quoted one was expected. @@ -218,9 +219,10 @@ impl Display for Error { ErrorKind::DottedKeyInvalidType => { f.write_str("dotted key attempted to extend non-table type")?; } - ErrorKind::UnexpectedKeys { keys } => { - write!(f, "unexpected keys in table: `{keys:?}`")? - } + ErrorKind::UnexpectedKeys { keys, expected } => write!( + f, + "unexpected keys in table: `{keys:?}`\nexpected: {expected:?}" + )?, ErrorKind::UnquotedString => { f.write_str("invalid TOML value, did you mean to use a quoted string?")? } @@ -304,14 +306,21 @@ impl Error { ErrorKind::UnquotedString => diag.with_labels(vec![ Label::primary(fid, self.span).with_message("string is not quoted") ]), - ErrorKind::UnexpectedKeys { keys } => diag - .with_message(format!("found {} unexpected keys", keys.len())) + ErrorKind::UnexpectedKeys { keys, expected } => diag + .with_message(format!( + "found {} unexpected keys, expected: {expected:?}", + keys.len() + )) .with_labels( keys.iter() .map(|(_name, span)| Label::secondary(fid, *span)) .collect(), ), - ErrorKind::MissingField(field) => diag.with_message(format!("missing field '{field}'")), + ErrorKind::MissingField(field) => diag + .with_message(format!("missing field '{field}'")) + .with_labels(vec![ + Label::primary(fid, self.span).with_message("table with missing field") + ]), ErrorKind::Deprecated { new, .. } => diag .with_message(format!( "deprecated field enountered, '{new}' should be used instead" diff --git a/toml-file/src/lib.rs b/toml-span/src/lib.rs similarity index 100% rename from toml-file/src/lib.rs rename to toml-span/src/lib.rs diff --git a/toml-file/src/span.rs b/toml-span/src/span.rs similarity index 100% rename from toml-file/src/span.rs rename to toml-span/src/span.rs diff --git a/toml-file/src/tokens.rs b/toml-span/src/tokens.rs similarity index 100% rename from toml-file/src/tokens.rs rename to toml-span/src/tokens.rs diff --git a/toml-file/src/value.rs b/toml-span/src/value.rs similarity index 100% rename from toml-file/src/value.rs rename to toml-span/src/value.rs diff --git a/toml-file/src/value/impl_serde.rs b/toml-span/src/value/impl_serde.rs similarity index 100% rename from toml-file/src/value/impl_serde.rs rename to toml-span/src/value/impl_serde.rs From 0f6340f202ce2d5193688f9a1668a9c1f9429118 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Wed, 14 Feb 2024 09:37:23 +0100 Subject: [PATCH 08/16] Tests for missing fields --- integ-tests/tests/de.rs | 7 ++++++- integ-tests/tests/snapshots/de__missing_required.snap | 4 ++++ integ-tests/tests/snapshots/de__unknown_field.snap | 11 +++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 integ-tests/tests/snapshots/de__unknown_field.snap diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs index 8ab4659..ff61031 100644 --- a/integ-tests/tests/de.rs +++ b/integ-tests/tests/de.rs @@ -29,6 +29,11 @@ impl<'de> Deserialize<'de> for Boop { valid_de!(basic_table, Boop, "s = 'boop string'\nos = 20"); invalid_de!(missing_required, Boop, "os = 20"); +invalid_de!( + unknown_field, + Boop, + "s = 'val'\nthis-field-is-not-known = 20" +); #[derive(Debug)] struct Package { @@ -53,7 +58,7 @@ impl<'de> Deserialize<'de> for Package { Ok(Self { name, version }) } ValueInner::Table(tab) => { - let mut th = TableHelper::from(tab); + let mut th = TableHelper::from((tab, value.span)); if let Some(mut val) = th.table.remove(&"crate".into()) { let (name, version) = match val.take() { diff --git a/integ-tests/tests/snapshots/de__missing_required.snap b/integ-tests/tests/snapshots/de__missing_required.snap index 7d0a637..2f633f1 100644 --- a/integ-tests/tests/snapshots/de__missing_required.snap +++ b/integ-tests/tests/snapshots/de__missing_required.snap @@ -3,5 +3,9 @@ source: integ-tests/tests/de.rs expression: error --- error[missing-field]: missing field 's' + ┌─ missing_required:1:1 + │ +1 │ os = 20 + │ ^^^^^^^ table with missing field diff --git a/integ-tests/tests/snapshots/de__unknown_field.snap b/integ-tests/tests/snapshots/de__unknown_field.snap new file mode 100644 index 0000000..8f7eea9 --- /dev/null +++ b/integ-tests/tests/snapshots/de__unknown_field.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/de.rs +expression: error +--- +error[unexpected-keys]: found 1 unexpected keys, expected: ["s", "os"] + ┌─ unknown_field:2:1 + │ +2 │ this-field-is-not-known = 20 + │ ----------------------- + + From fbf835a4e84bda568973d040410924c0e9b1186d Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 12:42:05 +0100 Subject: [PATCH 09/16] Rename --- Cargo.lock | 23 ++--------------------- Cargo.toml | 2 +- integ-tests/src/lib.rs | 20 ++++++++++---------- integ-tests/tests/de.rs | 2 +- integ-tests/tests/tokens.rs | 2 +- 5 files changed, 15 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91c0c4b..c598f22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "basic-toml" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db21524cad41c5591204d22d75e1970a2d1f71060214ca931dc7d5afe2c14e5" -dependencies = [ - "serde", -] - [[package]] name = "codespan-reporting" version = "0.11.1" @@ -82,11 +73,10 @@ dependencies = [ name = "integ-tests" version = "0.1.0" dependencies = [ - "basic-toml", "codespan-reporting", "insta", "pretty_assertions", - "toml-file", + "toml-span", ] [[package]] @@ -197,7 +187,7 @@ dependencies = [ ] [[package]] -name = "toml-file" +name = "toml-span" version = "0.1.0" dependencies = [ "codespan-reporting", @@ -206,15 +196,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "toml-file-derive" -version = "0.1.0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/Cargo.toml b/Cargo.toml index 0f6e300..dc2a541 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["integ-tests", "toml-file", "toml-file-derive"] +members = ["integ-tests", "toml-span"] [workspace.dependencies] toml-span = { path = "toml-span" } diff --git a/integ-tests/src/lib.rs b/integ-tests/src/lib.rs index 29166c3..f9b2a4b 100644 --- a/integ-tests/src/lib.rs +++ b/integ-tests/src/lib.rs @@ -6,7 +6,7 @@ macro_rules! valid { fn $name() { let toml_str = std::fs::read_to_string(concat!("data/", stringify!($name), ".toml")) .expect(concat!("failed to load ", stringify!($name), ".toml")); - let valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); + let valid_toml = toml_span::parse(&toml_str).expect("failed to parse toml"); insta::assert_json_snapshot!(valid_toml); $crate::emit_spans!($name, valid_toml, &toml_str); @@ -15,7 +15,7 @@ macro_rules! valid { ($name:ident, $toml:literal) => { #[test] fn $name() { - let valid_toml = toml_file::parse($toml).expect("failed to parse toml"); + let valid_toml = toml_span::parse($toml).expect("failed to parse toml"); insta::assert_json_snapshot!(valid_toml); $crate::emit_spans!($name, valid_toml, $toml); @@ -42,7 +42,7 @@ macro_rules! valid_de { fn $name() { let toml_str = std::fs::read_to_string(concat!("data/", stringify!($name), ".toml")) .expect(concat!("failed to load ", stringify!($name), ".toml")); - let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); + let mut valid_toml = toml_span::parse(&toml_str).expect("failed to parse toml"); match <$kind>::deserialize(&mut valid_toml) { Ok(de) => { @@ -61,7 +61,7 @@ macro_rules! valid_de { ($name:ident, $kind:ty, $toml:literal) => { #[test] fn $name() { - let mut valid_toml = toml_file::parse($toml).expect("failed to parse toml"); + let mut valid_toml = toml_span::parse($toml).expect("failed to parse toml"); match <$kind>::deserialize(&mut valid_toml) { Ok(de) => { @@ -88,7 +88,7 @@ macro_rules! invalid_de { fn $name() { let toml_str = std::fs::read_to_string(concat!("data/", stringify!($name), ".toml")) .expect(concat!("failed to load ", stringify!($name), ".toml")); - let mut valid_toml = toml_file::parse(&toml_str).expect("failed to parse toml"); + let mut valid_toml = toml_span::parse(&toml_str).expect("failed to parse toml"); match <$kind>::deserialize(&mut valid_toml) { Ok(de) => { @@ -107,7 +107,7 @@ macro_rules! invalid_de { ($name:ident, $kind:ty, $toml:literal) => { #[test] fn $name() { - let mut valid_toml = toml_file::parse($toml).expect("failed to parse toml"); + let mut valid_toml = toml_span::parse($toml).expect("failed to parse toml"); match <$kind>::deserialize(&mut valid_toml) { Ok(de) => { @@ -161,11 +161,11 @@ use codespan_reporting::diagnostic::Diagnostic; pub fn collect_spans( key: &str, - val: &toml_file::value::Value<'_>, + val: &toml_span::value::Value<'_>, diags: &mut Vec>, ) { use codespan_reporting::diagnostic::Label; - use toml_file::value::ValueInner; + use toml_span::value::ValueInner; let code = match val.as_ref() { ValueInner::String(_s) => "string", @@ -219,14 +219,14 @@ macro_rules! invalid { let toml_str = std::fs::read_to_string(dbg!(concat!("data/", stringify!($name), ".toml"))) .expect(concat!("failed to load ", stringify!($name), ".toml")); - let error = toml_file::parse(toml_str).unwrap_err(); + let error = toml_span::parse(toml_str).unwrap_err(); $crate::error_snapshot!($name, error, &toml_str); } }; ($name:ident, $toml:expr) => { #[test] fn $name() { - let error = toml_file::parse($toml).unwrap_err(); + let error = toml_span::parse($toml).unwrap_err(); $crate::error_snapshot!($name, Some(error.to_diagnostic(())), $toml); } }; diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs index ff61031..8bed55d 100644 --- a/integ-tests/tests/de.rs +++ b/integ-tests/tests/de.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] use integ_tests::{invalid_de, valid_de}; -use toml_file::{ +use toml_span::{ de_helpers::*, span::Spanned, value::{Value, ValueInner}, diff --git a/integ-tests/tests/tokens.rs b/integ-tests/tests/tokens.rs index 8411aa7..be83678 100644 --- a/integ-tests/tests/tokens.rs +++ b/integ-tests/tests/tokens.rs @@ -1,6 +1,6 @@ use pretty_assertions::assert_eq; use std::borrow::Cow; -use toml_file::tokens::{Error, Token, Tokenizer}; +use toml_span::tokens::{Error, Token, Tokenizer}; fn err(input: &str, err: Error) { let mut t = Tokenizer::new(input); From 718f40d4e7ea7f9cbeec5985fadce2fad8560e4d Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 12:42:16 +0100 Subject: [PATCH 10/16] Add pointer API --- integ-tests/tests/de.rs | 22 +++++++ toml-span/src/lib.rs | 2 + toml-span/src/value.rs | 139 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+) diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs index 8bed55d..1922074 100644 --- a/integ-tests/tests/de.rs +++ b/integ-tests/tests/de.rs @@ -170,3 +170,25 @@ impl<'de> Deserialize<'de> for Flattened { } valid_de!(flattened, Flattened); + +/// Just validates the Value::pointer/_mut methods work as expected +#[test] +fn pointers() { + let mut ba = toml_span::parse(include_str!("../data/basic_arrays.toml")).unwrap(); + + assert_eq!( + ba.pointer("/packages/2/version").unwrap().as_str().unwrap(), + "3.0.0" + ); + + assert_eq!( + ba.pointer_mut("/packages/3/crate") + .unwrap() + .take() + .as_str() + .unwrap(), + "fourth:0.1" + ); + + assert!(dbg!(ba.pointer("/packages/3/crate")).is_none()); +} diff --git a/toml-span/src/lib.rs b/toml-span/src/lib.rs index 9760345..785e377 100644 --- a/toml-span/src/lib.rs +++ b/toml-span/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(missing_docs)] + pub mod de; pub mod de_helpers; mod error; diff --git a/toml-span/src/value.rs b/toml-span/src/value.rs index 84052ad..2ebd442 100644 --- a/toml-span/src/value.rs +++ b/toml-span/src/value.rs @@ -70,6 +70,91 @@ impl<'de> Value<'de> { }), } } + + #[inline] + pub fn as_str(&self) -> Option<&str> { + self.value.as_ref().and_then(|v| v.as_str()) + } + + #[inline] + pub fn as_table(&self) -> Option<&Table<'de>> { + self.value.as_ref().and_then(|v| v.as_table()) + } + + #[inline] + pub fn as_array(&self) -> Option<&Array<'de>> { + self.value.as_ref().and_then(|v| v.as_array()) + } + + #[inline] + pub fn as_integer(&self) -> Option { + self.value.as_ref().and_then(|v| v.as_integer()) + } + + #[inline] + pub fn as_float(&self) -> Option { + self.value.as_ref().and_then(|v| v.as_float()) + } + + #[inline] + pub fn as_bool(&self) -> Option { + self.value.as_ref().and_then(|v| v.as_bool()) + } + + pub fn pointer(&self, pointer: &'de str) -> Option<&Self> { + if pointer.is_empty() { + return Some(self); + } else if !pointer.starts_with('/') { + return None; + } + + pointer + .split('/') + .skip(1) + // Don't support / or ~ in key names unless someone actually opens + // an issue about it + //.map(|x| x.replace("~1", "/").replace("~0", "~")) + .try_fold(self, |target, token| { + (match &target.value { + Some(ValueInner::Table(tab)) => tab.get(&token.into()), + Some(ValueInner::Array(list)) => parse_index(token).and_then(|x| list.get(x)), + _ => None, + }) + .filter(|v| v.value.is_some()) + }) + } + + pub fn pointer_mut(&mut self, pointer: &'de str) -> Option<&mut Self> { + if pointer.is_empty() { + return Some(self); + } else if !pointer.starts_with('/') { + return None; + } + + pointer + .split('/') + .skip(1) + // Don't support / or ~ in key names unless someone actually opens + // an issue about it + //.map(|x| x.replace("~1", "/").replace("~0", "~")) + .try_fold(self, |target, token| { + (match &mut target.value { + Some(ValueInner::Table(tab)) => tab.get_mut(&token.into()), + Some(ValueInner::Array(list)) => { + parse_index(token).and_then(|x| list.get_mut(x)) + } + _ => None, + }) + .filter(|v| v.value.is_some()) + }) + } +} + +fn parse_index(s: &str) -> Option { + if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { + return None; + } + s.parse().ok() } impl<'de> AsRef> for Value<'de> { @@ -151,4 +236,58 @@ impl<'de> ValueInner<'de> { Self::Table(..) => "table", } } + + #[inline] + pub fn as_str(&self) -> Option<&str> { + if let Self::String(s) = self { + Some(s.as_ref()) + } else { + None + } + } + + #[inline] + pub fn as_table(&self) -> Option<&Table<'de>> { + if let ValueInner::Table(t) = self { + Some(t) + } else { + None + } + } + + #[inline] + pub fn as_array(&self) -> Option<&Array<'de>> { + if let ValueInner::Array(a) = self { + Some(a) + } else { + None + } + } + + #[inline] + pub fn as_integer(&self) -> Option { + if let ValueInner::Integer(i) = self { + Some(*i) + } else { + None + } + } + + #[inline] + pub fn as_float(&self) -> Option { + if let ValueInner::Float(f) = self { + Some(*f) + } else { + None + } + } + + #[inline] + pub fn as_bool(&self) -> Option { + if let ValueInner::Boolean(b) = self { + Some(*b) + } else { + None + } + } } From cff741244063cc0d2a5689a0403a9582a50661fe Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 15:02:25 +0100 Subject: [PATCH 11/16] Update README --- Cargo.lock | 35 ---- README.md | 389 ++++++++++++++++++++++++++++++++++++++++--- toml-span/Cargo.toml | 1 - toml-span/src/lib.rs | 3 +- 4 files changed, 368 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c598f22..b2c4097 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,15 +24,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "convert_case" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "diff" version = "0.1.13" @@ -45,16 +36,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "identconv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02df3085f97750c1f8deb1b56aeb168f242f303e363aee16284821c0a14ff90e" -dependencies = [ - "convert_case", - "litrs", -] - [[package]] name = "insta" version = "1.34.0" @@ -97,15 +78,6 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" -[[package]] -name = "litrs" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" -dependencies = [ - "proc-macro2", -] - [[package]] name = "pretty_assertions" version = "1.4.0" @@ -191,7 +163,6 @@ name = "toml-span" version = "0.1.0" dependencies = [ "codespan-reporting", - "identconv", "serde", "smallvec", ] @@ -202,12 +173,6 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-segmentation" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" - [[package]] name = "unicode-width" version = "0.1.11" diff --git a/README.md b/README.md index 0c48f41..7ea0aab 100644 --- a/README.md +++ b/README.md @@ -6,36 +6,377 @@
- -# `🌻 opensource-template` +# `↔️ toml-span` - -**Template for creating new open source repositories that follow the Embark open source guidelines** - - +**Span-preserving toml deserializer** [![Embark](https://img.shields.io/badge/embark-open%20source-blueviolet.svg)](https://embark.dev) [![Embark](https://img.shields.io/badge/discord-ark-%237289da.svg?logo=discord)](https://discord.gg/dAuKfZS) -[![Crates.io](https://img.shields.io/crates/v/rust-gpu.svg)](https://crates.io/crates/rust-gpu) -[![Docs](https://docs.rs/rust-gpu/badge.svg)](https://docs.rs/rust-gpu) -[![Git Docs](https://img.shields.io/badge/git%20main%20docs-published-blue)](https://embarkstudios.github.io/presser/presser/index.html) -[![dependency status](https://deps.rs/repo/github/EmbarkStudios/rust-gpu/status.svg)](https://deps.rs/repo/github/EmbarkStudios/rust-gpu) -[![Build status](https://github.com/EmbarkStudios/physx-rs/workflows/CI/badge.svg)](https://github.com/EmbarkStudios/physx-rs/actions) +[![Crates.io](https://img.shields.io/crates/v/rust-gpu.svg)](https://crates.io/crates/toml-span) +[![Docs](https://docs.rs/toml-span/badge.svg)](https://docs.rs/toml-span) +[![dependency status](https://deps.rs/repo/github/EmbarkStudios/toml-span/status.svg)](https://deps.rs/repo/github/EmbarkStudios/toml-span) +[![Build status](https://github.com/EmbarkStudios/toml-span/workflows/CI/badge.svg)](https://github.com/EmbarkStudios/toml-span/actions)
-## TEMPLATE INSTRUCTIONS +## Differences from `toml` + +First off I just want to be up front and clear about the differences/limitations of this crate versus `toml` + +1. No `serde` support for deserialization, there is a `serde` feature, but that only enables serialization of the `Value` and `Spanned` types. +1. No toml serialization. This crate is only intended to be a span preserving deserializer, there is no intention to provide serialization to toml, especially the advanced format preserving kind provided by `toml-edit`. +1. No datetime deserialization. It would be trivial to add support for this (behind an optional feature), I just have no use for it at the moment. PRs welcome. + +## Why does this crate exist? + +### The problem + +This crate was specifically made to suit the needs of [cargo-deny], namely, that it can always retrieve the span of any toml item that it wants to. While the [toml](https://docs.rs/toml/latest/toml/) crate can also produce span information via [toml::Spanned](https://docs.rs/toml/latest/toml/struct.Spanned.html) there is one rather significant limitation, namely, that it must pass through [serde](https://docs.rs/serde/latest/serde/). While in simple cases the `Spanned` type works quite well, eg. + +```rust +#[derive(serde::Deserialize)] +struct Simple { + /// This works just fine + simple_string: toml::Spanned, +} +``` + +As soon as you have a [more complicated scenario](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=aeb611bbe387538d2ebb6780055b3167), the mechanism that `toml` uses to get the span information breaks down. + +```rust +#[derive(serde::Deserialize)] +#[serde(untagged)] +enum Ohno { + Integer(u32), + SpannedString(toml::Spanned), +} + +#[derive(serde::Deserialize)] +struct Root { + integer: Ohno, + string: Ohno +} + +fn main() { + let toml = r#" +integer = 42 +string = "we want this to be spanned" +"#; + + let parsed: Root = toml::from_str(toml).expect("failed to deserialize toml"); +} +``` + +```text +thread 'main' panicked at src/main.rs:20:45: +failed to deserialize toml: Error { inner: Error { inner: TomlError { message: "data did not match any variant of untagged enum Ohno", original: Some("\ninteger = 42\nstring = \"we want this to be spanned\"\n"), keys: ["string"], span: Some(23..51) } } } +``` + +To understand why this fails we can look at what `#[derive(serde::Deserialize)]` expand to for `Ohno` in HIR. + +```rust +#[allow(unused_extern_crates, clippy :: useless_attribute)] +extern crate serde as _serde; +#[automatically_derived] +impl <'de> _serde::Deserialize<'de> for Ohno { + fn deserialize<__D>(__deserializer: __D) + -> _serde::__private::Result where + __D: _serde::Deserializer<'de> { + let __content = + match #[lang = "branch"](<_serde::__private::de::Content as + _serde::Deserialize>::deserialize(__deserializer)) { + #[lang = "Break"] { 0: residual } => + #[allow(unreachable_code)] + return #[lang = "from_residual"](residual), + #[lang = "Continue"] { 0: val } => + #[allow(unreachable_code)] + val, + }; + let __deserializer = + _serde::__private::de::ContentRefDeserializer<, , + __D::Error>::new(&__content); + if let _serde::__private::Ok(__ok) = + _serde::__private::Result::map(::deserialize(__deserializer), + Ohno::Integer) { return _serde::__private::Ok(__ok); } + if let _serde::__private::Ok(__ok) = + _serde::__private::Result::map( as + _serde::Deserialize>::deserialize(__deserializer), + Ohno::SpannedString) { return _serde::__private::Ok(__ok); } + _serde::__private::Err(_serde::de::Error::custom("data did not match any variant of untagged enum Ohno")) + } + } }; +``` + +What serde does in the untagged case is first deserialize into `_serde::__private::de::Content`, an internal API container that is easiest to think of as something like `serde_json::Value`. This is because serde speculatively parses each enum variant until one succeeds by passing a `ContentRefDeserializer` that just borrows the deserialized `Content` from earlier to satisfy the serde deserialize API consuming the `Deserializer`. The problem comes because of how [`toml::Spanned`](https://docs.rs/serde_spanned/0.6.5/src/serde_spanned/spanned.rs.html#161-212) works, namely that it uses a hack to workaround the limitations of the serde API in order to "deserialize" the item as well as its span information, by the `Spanned` object specifically requesting a set of keys from the `toml::Deserializer` impl so that it can [encode](https://github.com/toml-rs/toml/blob/c4b62fda23343037ebe5ea93db9393cb25fcf233/crates/toml_edit/src/de/spanned.rs#L27-L70) the span information as if it was a struct to satisfy serde. But serde doesn't know that when it deserializes the `Content` object, it just knows that the Deserializer reports it has a string, int or what have you, and deserializes that, "losing" the span information. This problem also affects things like `#[serde(flatten)]` for slightly different reasons, but they all basically come down to the serde API not truly supporting span information, nor [any plans](https://github.com/serde-rs/serde/issues/1811) to. + +### How `toml-span` is different + +This crate works by just...not using `serde`. The core of the crate is based off of [basic-toml](https://github.com/dtolnay/basic-toml) which itself a fork of `toml v0.5` before it added a ton of features an complexity that...well, is not needed by [cargo-deny] or many other crates that only need deserialization. + +Removing `serde` support means that while deserialization must be manually written, which can be tedious in some cases, while doing the porting of [cargo-deny] I actually came to appreciate it more and more due to a couple of things. + +1. Maximal control. `toml-span` does an initial deserialization pass into `toml_span::value::Value` which keeps span information for both keys and values, and provides helpers (namely `TableHelper`), but other than satisfying the `toml_span::Deserialize` trait doesn't restrict you in how you want to deserialize your values, and you don't even have to use that if you don't want to. +2. While it's slower to manually write deserialization code rather than just putting on a few serde attributes, the truth is that that initial convenience carries a compile time cost in terms of serde_derive and all of its dependencies, as well as all of the code that is generated, for...ever. This is fine when you are prototyping, but becomes quite wasteful once you have (mostly/somewhat) stabilized your data format. +3. (optional) Span-based errors. `toml-span` provides the `reporting` feature that can be enabled to have `toml_span::Error` be able to be converted into a [Diagnostic](https://docs.rs/codespan-reporting/latest/codespan_reporting/diagnostic/struct.Diagnostic.html) which can provide nice error output if you use the `codespan-reporting` crate. + +## Usage + +### Simple + +The most simple use case for `toml-span` is just as slimmer version of `toml` that also has a pointer API similar to [serde_json](https://docs.rs/serde_json/latest/serde_json/enum.Value.html#method.pointer) allowing easy piecemeal deserialization of a toml document. + +#### `toml` version + +```rust +fn is_crates_io_sparse(config: &toml::Value) -> Option { + config + .get("registries") + .and_then(|v| v.get("crates-io")) + .and_then(|v| v.get("protocol")) + .and_then(|v| v.as_str()) + .and_then(|v| match v { + "sparse" => Some(true), + "git" => Some(false), + _ => None, + }) +} +``` + +#### `toml-span` version + +```rust +fn is_crates_io_sparse(config: &toml_span::Value) -> Option { + match config.pointer("/registries/crates-io/protocol").and_then(|p| p.as_str())? { + "sparse" => Some(true), + "git" => Some(false), + _ => None + } +} +``` + +### Common + +Of course the most common case is deserializing toml into Rust containers. + +#### `toml` version -1. Create a new repository under EmbarkStudios using this template. -1. **Title:** Change the first line of this README to the name of your project, and replace the sunflower with an emoji that represents your project. 🚨 Your emoji selection is critical. -1. **Badges:** In the badges section above, change the repo name in each URL. If you are creating something other than a Rust crate, remove the crates.io and docs badges (and feel free to add more appropriate ones for your language). -1. **CI:** In `./github/workflows/` rename `rust-ci.yml` (or the appropriate config for your language) to `ci.yml`. And go over it and adapt it to work for your project - - If you aren't using or customized the CI workflow, also see the TODO in `.mergify.yml` - - If you want to use the automatic rustdoc publishing to github pages for git main, see `rustdoc-pages.yml` -1. **Issue & PR Templates**: Review the files in `.github/ISSUE_TEMPLATE` and `.github/pull_request_template`. Adapt them -to suit your needs, removing or re-wording any sections that don't make sense for your use case. -1. **CHANGELOG.md:** Change the `$REPO_NAME` in the links at the bottom to the name of the repository, and replace the example template lines with the actual notes for the repository/crate. -1. **release.toml:** in `./release.toml` change the `$REPO_NAME` to the name of the repository -1. **Cleanup:** Remove this section of the README and any unused files (such as configs for other languages) from the repo. +```rust +#[derive(Deserialize, Clone)] +#[cfg_attr(test, derive(Debug, PartialEq, Eq))] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub struct CrateBan { + pub name: Spanned, + pub version: Option, + /// One or more crates that will allow this crate to be used if it is a + /// direct dependency + pub wrappers: Option>>>, + /// Setting this to true will only emit an error if multiple + // versions of the crate are found + pub deny_multiple_versions: Option>, +} +``` + +#### `toml-span` version + +The following code is much more verbose (before proc macros run at least), but show cases something that moving [cargo-deny] to `toml-span` allowed, namely, `PackageSpec`. + +Before `toml-span`, all cases where a user specifies a crate spec, (ie, name + optional version requirement) was done via two separate fields, `name` and `version`. This was quite verbose, as in many cases not only is `version` not specified, but also could be just a string if the user doesn't need/want to provide other fields. Normally one would use the [string or struct](https://serde.rs/string-or-struct.html) idiom but this was impossible due to how I wanted to reorganize the data to have the package spec as either a string or struct, _as well as_ optional data that is flattened to the same level as the package spec. But since `toml-span` changes how deserialization is done, this change was quite trivial after the initial work of getting the crate stood up was done. + +```rust +pub type CrateBan = PackageSpecOrExtended; + +#[cfg_attr(test, derive(Debug, PartialEq, Eq))] +pub struct CrateBanExtended { + /// One or more crates that will allow this crate to be used if it is a + /// direct dependency + pub wrappers: Option>>>, + /// Setting this to true will only emit an error if multiple versions of the + /// crate are found + pub deny_multiple_versions: Option>, + /// The reason for banning the crate + pub reason: Option, + /// The crate to use instead of the banned crate, could be just the crate name + /// or a URL + pub use_instead: Option>, +} + +impl<'de> Deserialize<'de> for CrateBanExtended { + fn deserialize(value: &mut Value<'de>) -> Result { + // The table helper provides convenience wrappers around a Value::Table, which + // is just a BTreeMap + let mut th = TableHelper::new(value)?; + + // Since we specify the keys manually there is no need for serde(rename/rename_all) + let wrappers = th.optional("wrappers"); + let deny_multiple_versions = th.optional("deny-multiple-versions"); + let reason = th.optional_s("reason"); + let use_instead = th.optional("use-instead"); + // Specifying None means that any keys that still exist in the table are + // unknown, producing an error the same as with serde(deny_unknown_fields) + th.finalize(None)?; + + Ok(Self { + wrappers, + deny_multiple_versions, + reason: reason.map(Reason::from), + use_instead, + }) + } +} + +#[derive(Clone, PartialEq, Eq)] +pub struct PackageSpec { + pub name: Spanned, + pub version_req: Option, +} + +impl<'de> Deserialize<'de> for PackageSpec { + fn deserialize(value: &mut Value<'de>) -> Result { + use std::borrow::Cow; + + struct Ctx<'de> { + inner: Cow<'de, str>, + split: Option<(usize, bool)>, + span: Span, + } + + impl<'de> Ctx<'de> { + fn from_str(bs: Cow<'de, str>, span: Span) -> Self { + let split = bs + .find('@') + .map(|i| (i, true)) + .or_else(|| bs.find(':').map(|i| (i, false))); + Self { + inner: bs, + split, + span, + } + } + } + + let ctx = match value.take() { + ValueInner::String(s) => Ctx::from_str(s, value.span), + ValueInner::Table(tab) => { + let mut th = TableHelper::from((tab, value.span)); + + if let Some(mut val) = th.table.remove(&"crate".into()) { + let s = val.take_string(Some("a crate spec"))?; + th.finalize(Some(value))?; + + Ctx::from_str(s, val.span) + } else { + // Encourage user to use the 'crate' spec instead + let name = th.required("name").map_err(|e| { + if matches!(e.kind, toml_span::ErrorKind::MissingField(_)) { + (toml_span::ErrorKind::MissingField("crate"), e.span).into() + } else { + e + } + })?; + let version = th.optional::>>("version"); + + // We return all the keys we haven't deserialized back to the value, + // so that further deserializers can use them as this spec is + // always embedded in a larger structure + th.finalize(Some(value))?; + + let version_req = if let Some(vr) = version { + Some(vr.value.parse().map_err(|e: semver::Error| { + toml_span::Error::from(( + toml_span::ErrorKind::Custom(e.to_string()), + vr.span, + )) + })?) + } else { + None + }; + + return Ok(Self { name, version_req }); + } + } + other => return Err(expected("a string or table", other, value.span).into()), + }; + + let (name, version_req) = if let Some((i, make_exact)) = ctx.split { + let mut v: VersionReq = ctx.inner[i + 1..].parse().map_err(|e: semver::Error| { + toml_span::Error::from(( + toml_span::ErrorKind::Custom(e.to_string()), + Span::new(ctx.span.start + i + 1, ctx.span.end), + )) + })?; + if make_exact { + if let Some(comp) = v.comparators.get_mut(0) { + comp.op = semver::Op::Exact; + } + } + + ( + Spanned::with_span( + ctx.inner[..i].into(), + Span::new(ctx.span.start, ctx.span.start + i), + ), + Some(v), + ) + } else { + (Spanned::with_span(ctx.inner.into(), ctx.span), None) + }; + + Ok(Self { name, version_req }) + } +} + +pub struct PackageSpecOrExtended { + pub spec: PackageSpec, + pub inner: Option, +} + +impl PackageSpecOrExtended { + pub fn try_convert(self) -> Result, E> + where + V: TryFrom, + { + let inner = if let Some(i) = self.inner { + Some(V::try_from(i)?) + } else { + None + }; + + Ok(PackageSpecOrExtended { + spec: self.spec, + inner, + }) + } + + pub fn convert(self) -> PackageSpecOrExtended + where + V: From, + { + PackageSpecOrExtended { + spec: self.spec, + inner: self.inner.map(V::from), + } + } +} + +impl<'de, T> toml_span::Deserialize<'de> for PackageSpecOrExtended +where + T: toml_span::Deserialize<'de>, +{ + fn deserialize(value: &mut Value<'de>) -> Result { + let spec = PackageSpec::deserialize(value)?; + + // If more keys exist in the table (or string) then try to deserialize + // the rest as the "extended" portion + let inner = if value.has_keys() { + Some(T::deserialize(value)?) + } else { + None + }; + + Ok(Self { spec, inner }) + } +} +``` ## Contributing @@ -58,3 +399,5 @@ This contribution is dual licensed under EITHER OF at your option. For clarity, "your" refers to Embark or any other licensee/user of the contribution. + +[cargo-deny]: https://github.com/EmbarkStudios/cargo-deny diff --git a/toml-span/Cargo.toml b/toml-span/Cargo.toml index 40fa174..829a1a9 100644 --- a/toml-span/Cargo.toml +++ b/toml-span/Cargo.toml @@ -11,6 +11,5 @@ reporting = ["dep:codespan-reporting"] [dependencies] codespan-reporting = { version = "0.11", optional = true } -identconv = "0.2" serde = { version = "1.0", optional = true } smallvec = "1.13" diff --git a/toml-span/src/lib.rs b/toml-span/src/lib.rs index 785e377..a229b66 100644 --- a/toml-span/src/lib.rs +++ b/toml-span/src/lib.rs @@ -10,9 +10,10 @@ pub mod value; pub use de::parse; pub use error::{DeserError, Error, ErrorKind}; pub use span::Span; +pub use value::Value; pub trait Deserialize<'de>: Sized { - fn deserialize(value: &mut value::Value<'de>) -> Result; + fn deserialize(value: &mut Value<'de>) -> Result; } pub trait DeserializeOwned: for<'de> Deserialize<'de> {} From a5c2a45ca45c81982b8e2202a5586a6b53e80a29 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 15:08:23 +0100 Subject: [PATCH 12/16] Update CI --- .github/workflows/rust-ci.yml | 180 ++++------------------------ .github/workflows/rustdoc-pages.yml | 64 ---------- .mergify.yml | 24 ---- CHANGELOG.md | 29 +---- release.toml | 3 +- 5 files changed, 25 insertions(+), 275 deletions(-) delete mode 100644 .github/workflows/rustdoc-pages.yml delete mode 100644 .mergify.yml diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 3278b80..40fcc72 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -1,24 +1,19 @@ -# TODO: Replace this line with the commented ones to actually run the action in your repo(s) -on: public -# on: -# push: -# branches: -# - main -# tags: -# - "*" -# pull_request: +on: + push: + branches: + - main + tags: + - "*" + pull_request: name: CI jobs: lint: name: Lint - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable # make sure all code has been formatted with rustfmt - name: check rustfmt @@ -35,164 +30,29 @@ jobs: test: name: Test - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macOS-latest] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable - run: cargo fetch - name: cargo test build - # Note the use of release here means longer compile time, but much - # faster test execution time. If you don't have any heavy tests it - # might be faster to take off release and just compile in debug - run: cargo build --tests --release + run: cargo build --tests - name: cargo test - run: cargo test --release + run: cargo test - # TODO: Remove this check if you don't use cargo-deny in the repo deny-check: name: cargo-deny - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: EmbarkStudios/cargo-deny-action@v1 - # TODO: Remove this check if you don't publish the crate(s) from this repo publish-check: name: Publish Check - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable - run: cargo fetch - name: cargo publish check run: cargo publish --dry-run - - # TODO: Remove this job if you don't release binaries - # Replace occurances of $BIN_NAME with the name of your binary - release: - name: Release - needs: [test, deny-check] - if: startsWith(github.ref, 'refs/tags/') - strategy: - matrix: - os: [ubuntu-latest, macOS-latest, windows-latest] - include: - - os: ubuntu-latest - rust: stable - target: x86_64-unknown-linux-musl - bin: $BIN_NAME - # We don't enable the progress feature when targeting - # musl since there are some dependencies on shared libs - features: "" - - os: windows-latest - rust: stable - target: x86_64-pc-windows-msvc - bin: $BIN_NAME.exe - features: progress - - os: macOS-latest - rust: stable - target: x86_64-apple-darwin - bin: $BIN_NAME - features: progress - runs-on: ${{ matrix.os }} - steps: - - name: Install stable toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: ${{ matrix.rust }} - override: true - target: ${{ matrix.target }} - - name: Install musl tools - if: matrix.os == 'ubuntu-latest' - run: sudo apt-get install -y musl-tools - - name: Checkout - uses: actions/checkout@v3 - - run: cargo fetch --target ${{ matrix.target }} - - name: Release build - shell: bash - run: | - if [ "${{ matrix.features }}" != "" ]; then - cargo build --release --target ${{ matrix.target }} --features ${{ matrix.features }} - else - cargo build --release --target ${{ matrix.target }} - fi - - name: Package - shell: bash - run: | - name=$BIN_NAME - tag=$(git describe --tags --abbrev=0) - release_name="$name-$tag-${{ matrix.target }}" - release_tar="${release_name}.tar.gz" - mkdir "$release_name" - - if [ "${{ matrix.target }}" != "x86_64-pc-windows-msvc" ]; then - strip "target/${{ matrix.target }}/release/${{ matrix.bin }}" - fi - - cp "target/${{ matrix.target }}/release/${{ matrix.bin }}" "$release_name/" - cp README.md LICENSE-APACHE LICENSE-MIT "$release_name/" - tar czvf "$release_tar" "$release_name" - - rm -r "$release_name" - - # Windows environments in github actions don't have the gnu coreutils installed, - # which includes the shasum exe, so we just use powershell instead - if [ "${{ matrix.os }}" == "windows-latest" ]; then - echo "(Get-FileHash \"${release_tar}\" -Algorithm SHA256).Hash | Out-File -Encoding ASCII -NoNewline \"${release_tar}.sha256\"" | pwsh -c - - else - echo -n "$(shasum -ba 256 "${release_tar}" | cut -d " " -f 1)" > "${release_tar}.sha256" - fi - - name: Publish - uses: softprops/action-gh-release@v1 - with: - draft: true - files: "$BIN_NAME*" - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - # TODO: Remove this job if you don't publish container images on each release - # TODO: Create a repository on DockerHub with the same name as the GitHub repo - # TODO: Add the new repo to the buildbot group with read & write permissions - # TODO: Add the embarkbot dockerhub password to the repo secrets as DOCKERHUB_PASSWORD - publish-container-images: - name: Publish container images - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - needs: [test, deny-check] - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - name: Login to Dockerhub - uses: docker/login-action@v1 - with: - username: embarkbot - password: ${{ secrets.DOCKERHUB_PASSWORD }} - - name: Docker meta - id: docker_meta - uses: crazy-max/ghaction-docker-meta@v1 - with: - images: embarkstudios/${{ github.event.repository.name }} - tag-semver: | - {{version}} - {{major}}.{{minor}} - - name: Build and push - uses: docker/build-push-action@v2 - with: - context: . - file: ./Dockerfile - push: true - tags: ${{ steps.docker_meta.outputs.tags }} - labels: ${{ steps.docker_meta.outputs.labels }} diff --git a/.github/workflows/rustdoc-pages.yml b/.github/workflows/rustdoc-pages.yml deleted file mode 100644 index 2450699..0000000 --- a/.github/workflows/rustdoc-pages.yml +++ /dev/null @@ -1,64 +0,0 @@ -# Publishes docs built off latest git main branch to a GitHub Pages site. -# The docs root will then be served at https://$YOUR_USERNAME.github.io/$REPO_NAME/$CRATE_NAME/index.html -# For example, https://embarkstudios.github.io/presser/presser/index.html -# -# You must also go to the Pages settings for your repo and set it to serve from Actions for this to work -name: Publish Docs - -# TODO: Replace this line with the commented ones to actually run the action in your repo(s) -on: public -# on: -# workflow_dispatch: -# push: -# branches: [ "main" ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - name: Build Docs - - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Setup Rust Env - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - override: true - - name: Build Docs - run: RUSTDOCFLAGS="--cfg docs_build" cargo doc - - name: Setup Pages - id: pages - uses: actions/configure-pages@v2 - - name: Upload artifact - uses: actions/upload-pages-artifact@v1 - with: - # Upload entire doc folder - path: './target/doc' - - deploy: - name: Deploy to Pages - - needs: build - - # Grant GITHUB_TOKEN the permissions required to make a Pages deployment - permissions: - pages: write # to deploy to Pages - id-token: write # to verify the deployment originates from an appropriate source - - # Deploy to the github-pages environment - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - - # Specify runner + deployment step - runs-on: ubuntu-latest - - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v1 \ No newline at end of file diff --git a/.mergify.yml b/.mergify.yml deleted file mode 100644 index 51f3119..0000000 --- a/.mergify.yml +++ /dev/null @@ -1,24 +0,0 @@ -pull_request_rules: - - name: automatic merge when CI passes and 1 reviews - conditions: - - "#approved-reviews-by>=1" - - "#review-requested=0" - - "#changes-requested-reviews-by=0" - - "#review-threads-unresolved=0" - - base=main - - label!=block-automerge - # TODO: If you're not a Rust project and aren't using the bundled rust-ci workflows, - # remove these or change them to the relevant CI job names: - - check-success=Lint - - check-success=Build & Test (ubuntu-latest) - - check-success=Build & Test (windows-latest) - - check-success=Build & Test (macOS-latest) - - check-success=Publish Check - actions: - merge: - method: squash - - name: delete head branch after merge - conditions: - - merged - actions: - delete_head_branch: {} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d96b42..957b668 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,31 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate - -## [0.1.1] - 2019-09-03 -### Added -- New features go here in a bullet list - -### Changed -- Changes to existing functionality go here in a bullet list - -### Deprecated -- Mark features soon-to-be removed in a bullet list - -### Removed -- Features that have been removed in a bullet list - -### Fixed -- Bug fixes in a bullet list - -### Security -- Changes/fixes related to security vulnerabilities in a bullet list - -## [0.1.0] - 2019-09-02 +## [0.1.0] - 2024-02-20 ### Added -- Initial add of the thing +- Initial implementation -[Unreleased]: https://github.com/EmbarkStudios/$REPO_NAME/compare/0.1.1...HEAD -[0.1.1]: https://github.com/EmbarkStudios/$REPO_NAME/compare/0.1.0...0.1.1 -[0.1.0]: https://github.com/EmbarkStudios/$REPO_NAME/releases/tag/0.1.0 +[Unreleased]: https://github.com/EmbarkStudios/toml-span/compare/0.1.0...HEAD +[0.1.0]: https://github.com/EmbarkStudios/toml-span/releases/tag/0.1.0 diff --git a/release.toml b/release.toml index b915562..23fcd2b 100644 --- a/release.toml +++ b/release.toml @@ -1,5 +1,4 @@ pre-release-commit-message = "Release {{version}}" -dev-version = false tag-message = "Release {{version}}" tag-name = "{{version}}" pre-release-replacements = [ @@ -7,5 +6,5 @@ pre-release-replacements = [ { file = "CHANGELOG.md", search = "\\.\\.\\.HEAD", replace = "...{{tag_name}}" }, { file = "CHANGELOG.md", search = "ReleaseDate", replace = "{{date}}" }, { file = "CHANGELOG.md", search = "", replace = "\n## [Unreleased] - ReleaseDate" }, - { file = "CHANGELOG.md", search = "", replace = "\n[Unreleased]: https://github.com/EmbarkStudios/$REPO_NAME/compare/{{tag_name}}...HEAD" }, + { file = "CHANGELOG.md", search = "", replace = "\n[Unreleased]: https://github.com/EmbarkStudios/toml-span/compare/{{tag_name}}...HEAD" }, ] From ed0626d2a8d600b95e5a4abe04c98fc4222c31ae Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 15:10:35 +0100 Subject: [PATCH 13/16] Oops --- .github/workflows/rust-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 40fcc72..0b97bed 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -55,4 +55,4 @@ jobs: - uses: dtolnay/rust-toolchain@stable - run: cargo fetch - name: cargo publish check - run: cargo publish --dry-run + run: cargo publish --dry-run -p toml-span From 830f2a713c7ee53eefe15783ba4adeb6e3b9d003 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 16:39:21 +0100 Subject: [PATCH 14/16] Fix all clippy lints, improve some errors --- .cargo/config.toml | 82 +++++++++++++++++++ .github/workflows/rust-ci.yml | 4 + integ-tests/src/lib.rs | 2 +- integ-tests/tests/de.rs | 37 ++++++++- integ-tests/tests/parser.rs | 15 ++++ .../tests/snapshots/de__custom_error.snap | 17 ++++ .../snapshots/parser__bad_keys__dotted.snap | 13 +++ .../snapshots/parser__bad_keys__newline3.snap | 13 +++ integ-tests/tests/snapshots/parser__eof.snap | 11 +++ .../parser__table_array_implicit.snap | 12 +++ integ-tests/tests/tokens.rs | 24 +++--- toml-span/src/de.rs | 40 ++------- toml-span/src/de_helpers.rs | 22 ++--- toml-span/src/error.rs | 73 ++++++++--------- toml-span/src/span.rs | 2 +- toml-span/src/tokens.rs | 34 ++++---- toml-span/src/value/impl_serde.rs | 2 +- 17 files changed, 287 insertions(+), 116 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 integ-tests/tests/snapshots/de__custom_error.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__dotted.snap create mode 100644 integ-tests/tests/snapshots/parser__bad_keys__newline3.snap create mode 100644 integ-tests/tests/snapshots/parser__eof.snap create mode 100644 integ-tests/tests/snapshots/parser__table_array_implicit.snap diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..69564e3 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,82 @@ +[target.'cfg(all())'] +rustflags = [ + # BEGIN - Embark standard lints v6 for Rust 1.55+ + # do not change or add/remove here, but one can add exceptions after this section + # for more info see: + "-Dunsafe_code", + "-Wclippy::all", + "-Wclippy::await_holding_lock", + "-Wclippy::char_lit_as_u8", + "-Wclippy::checked_conversions", + "-Wclippy::dbg_macro", + "-Wclippy::debug_assert_with_mut_call", + "-Wclippy::doc_markdown", + "-Wclippy::empty_enum", + "-Wclippy::enum_glob_use", + "-Wclippy::exit", + "-Wclippy::expl_impl_clone_on_copy", + "-Wclippy::explicit_deref_methods", + "-Wclippy::explicit_into_iter_loop", + "-Wclippy::fallible_impl_from", + "-Wclippy::filter_map_next", + "-Wclippy::flat_map_option", + "-Wclippy::float_cmp_const", + "-Wclippy::fn_params_excessive_bools", + "-Wclippy::from_iter_instead_of_collect", + "-Wclippy::if_let_mutex", + "-Wclippy::implicit_clone", + "-Wclippy::imprecise_flops", + "-Wclippy::inefficient_to_string", + "-Wclippy::invalid_upcast_comparisons", + "-Wclippy::large_digit_groups", + "-Wclippy::large_stack_arrays", + "-Wclippy::large_types_passed_by_value", + "-Wclippy::let_unit_value", + "-Wclippy::linkedlist", + "-Wclippy::lossy_float_literal", + "-Wclippy::macro_use_imports", + "-Wclippy::manual_ok_or", + "-Wclippy::map_err_ignore", + "-Wclippy::map_flatten", + "-Wclippy::map_unwrap_or", + "-Wclippy::match_on_vec_items", + "-Wclippy::match_same_arms", + "-Wclippy::match_wild_err_arm", + "-Wclippy::match_wildcard_for_single_variants", + "-Wclippy::mem_forget", + "-Wclippy::mismatched_target_os", + "-Wclippy::missing_enforced_import_renames", + "-Wclippy::mut_mut", + "-Wclippy::mutex_integer", + "-Wclippy::needless_borrow", + "-Wclippy::needless_continue", + "-Wclippy::needless_for_each", + "-Wclippy::option_option", + "-Wclippy::path_buf_push_overwrite", + "-Wclippy::ptr_as_ptr", + "-Wclippy::rc_mutex", + "-Wclippy::ref_option_ref", + "-Wclippy::rest_pat_in_fully_bound_structs", + "-Wclippy::same_functions_in_if_condition", + "-Wclippy::semicolon_if_nothing_returned", + "-Wclippy::single_match_else", + "-Wclippy::string_add_assign", + "-Wclippy::string_add", + "-Wclippy::string_lit_as_bytes", + "-Wclippy::string_to_string", + "-Wclippy::todo", + "-Wclippy::trait_duplication_in_bounds", + "-Wclippy::unimplemented", + "-Wclippy::unnested_or_patterns", + "-Wclippy::unused_self", + "-Wclippy::useless_transmute", + "-Wclippy::verbose_file_reads", + "-Wclippy::zero_sized_map_values", + "-Wfuture_incompatible", + "-Wnonstandard_style", + "-Wrust_2018_idioms", + # END - Embark standard lints v6 for Rust 1.55+ +] + +[target.'cfg(target_env = "musl")'] +rustflags = ["-C", "target-feature=+crt-static"] diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 0b97bed..383b2cd 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -6,6 +6,10 @@ on: - "*" pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + name: CI jobs: lint: diff --git a/integ-tests/src/lib.rs b/integ-tests/src/lib.rs index f9b2a4b..502bffa 100644 --- a/integ-tests/src/lib.rs +++ b/integ-tests/src/lib.rs @@ -128,7 +128,7 @@ macro_rules! invalid_de { pub type File<'s> = codespan_reporting::files::SimpleFile<&'static str, &'s str>; pub fn emit_diags( - f: &File, + f: &File<'_>, error: impl IntoIterator>, ) -> String { let mut output = codespan_reporting::term::termcolor::NoColor::new(Vec::new()); diff --git a/integ-tests/tests/de.rs b/integ-tests/tests/de.rs index 1922074..8f0cbc0 100644 --- a/integ-tests/tests/de.rs +++ b/integ-tests/tests/de.rs @@ -171,7 +171,7 @@ impl<'de> Deserialize<'de> for Flattened { valid_de!(flattened, Flattened); -/// Just validates the Value::pointer/_mut methods work as expected +/// Validates the `Value::pointer/_mut` methods work as expected #[test] fn pointers() { let mut ba = toml_span::parse(include_str!("../data/basic_arrays.toml")).unwrap(); @@ -190,5 +190,38 @@ fn pointers() { "fourth:0.1" ); - assert!(dbg!(ba.pointer("/packages/3/crate")).is_none()); + assert!(ba.pointer("/packages/3/crate").is_none()); } + +#[derive(Debug)] +struct Ohno { + year: u8, +} + +impl<'de> Deserialize<'de> for Ohno { + fn deserialize(value: &mut Value<'de>) -> Result { + let mut th = TableHelper::new(value)?; + let year = th.required("year").ok(); + + if let Some(snbh) = th.optional_s::>("this-is-deprecated") { + th.errors.push( + ( + toml_span::ErrorKind::Custom("this-is-deprecated is deprecated".into()), + snbh.span, + ) + .into(), + ); + } + + th.finalize(Some(value))?; + Ok(Self { + year: year.unwrap(), + }) + } +} + +invalid_de!( + custom_error, + Ohno, + "year = 40_000\nthis-is-deprecated = 'this should not be here'" +); diff --git a/integ-tests/tests/parser.rs b/integ-tests/tests/parser.rs index 2ab14f9..007d442 100644 --- a/integ-tests/tests/parser.rs +++ b/integ-tests/tests/parser.rs @@ -36,6 +36,18 @@ bar = \"\"\"\\\r\n \r\n \r\n a\"\"\"" invalid!(newline_string, "a = \"\n\""); invalid!(newline_literal, "a = '\n'"); valid!(key_names); +// Ensures that a table with an array cannot then be followed by an array of the +// same name +invalid!( + table_array_implicit, + r#" +[[albums.songs]] +name = "Glory Days" + +[[albums]] +name = "Born in the USA" +"# +); mod stray_cr { use super::invalid; @@ -81,11 +93,13 @@ mod bad_keys { invalid!(none, "=3"); invalid!(empty_pipe, "\"\"|=3"); invalid!(newline2, "\"\n\"|=3"); + invalid!(newline3, "\"something\nsomething else\"=3"); invalid!(cr, "\"\r\"|=3"); invalid!(mutli_line, "''''''=3"); invalid!(multi_line2, r#"""""""=3"#); invalid!(multi_line3, "'''key'''=3"); invalid!(multi_line4, r#""""key"""=3"#); + invalid!(dotted, "a = 1\na.b = 2"); } valid!(table_names); @@ -115,6 +129,7 @@ invalid!(integer_range_negative, "a = -9223372036854775809"); invalid!(bare_number, "4"); valid!(inline_tables); +invalid!(eof, "key ="); mod bad_inline_tables { use super::invalid; diff --git a/integ-tests/tests/snapshots/de__custom_error.snap b/integ-tests/tests/snapshots/de__custom_error.snap new file mode 100644 index 0000000..e326d9c --- /dev/null +++ b/integ-tests/tests/snapshots/de__custom_error.snap @@ -0,0 +1,17 @@ +--- +source: integ-tests/tests/de.rs +expression: error +--- +error[out-of-range]: number is out of range of 'u8' + ┌─ custom_error:1:8 + │ +1 │ year = 40_000 + │ ^^^^^^ + +error[custom]: this-is-deprecated is deprecated + ┌─ custom_error:2:23 + │ +2 │ this-is-deprecated = 'this should not be here' + │ ^^^^^^^^^^^^^^^^^^^^^^^ + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__dotted.snap b/integ-tests/tests/snapshots/parser__bad_keys__dotted.snap new file mode 100644 index 0000000..f5e807c --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__dotted.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[dotted-key-invalid-type]: dotted key attempted to extend non-table type + ┌─ dotted:2:1 + │ +1 │ a = 1 + │ - non-table +2 │ a.b = 2 + │ ^^^^^^^ attempted to extend table here + + diff --git a/integ-tests/tests/snapshots/parser__bad_keys__newline3.snap b/integ-tests/tests/snapshots/parser__bad_keys__newline3.snap new file mode 100644 index 0000000..3bfd775 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__bad_keys__newline3.snap @@ -0,0 +1,13 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[invalid-char-in-string]: + ┌─ newline3:1:11 + │ +1 │ "something + │ ╭──────────^ +2 │ │ something else"=3 + │ ╰^ invalid character '\n' in string + + diff --git a/integ-tests/tests/snapshots/parser__eof.snap b/integ-tests/tests/snapshots/parser__eof.snap new file mode 100644 index 0000000..ebf088d --- /dev/null +++ b/integ-tests/tests/snapshots/parser__eof.snap @@ -0,0 +1,11 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[unexpected-eof]: unexpected end of file + ┌─ eof:1:6 + │ +1 │ key = + │ ^ + + diff --git a/integ-tests/tests/snapshots/parser__table_array_implicit.snap b/integ-tests/tests/snapshots/parser__table_array_implicit.snap new file mode 100644 index 0000000..f0c2b45 --- /dev/null +++ b/integ-tests/tests/snapshots/parser__table_array_implicit.snap @@ -0,0 +1,12 @@ +--- +source: integ-tests/tests/parser.rs +expression: error +--- +error[redefine-as-array]: table redefined as array + ┌─ table_array_implicit:5:1 + │ +5 │ ╭ [[albums]] +6 │ │ name = "Born in the USA" + │ ╰^ + + diff --git a/integ-tests/tests/tokens.rs b/integ-tests/tests/tokens.rs index be83678..0cede6e 100644 --- a/integ-tests/tests/tokens.rs +++ b/integ-tests/tests/tokens.rs @@ -4,16 +4,16 @@ use toml_span::tokens::{Error, Token, Tokenizer}; fn err(input: &str, err: Error) { let mut t = Tokenizer::new(input); - let token = t.next().unwrap_err(); + let token = t.step().unwrap_err(); assert_eq!(token, err); - assert!(t.next().unwrap().is_none()); + assert!(t.step().unwrap().is_none()); } #[test] fn strings() { fn t(input: &str, eval: &str, emultiline: bool) { let mut t = Tokenizer::new(input); - let (_, token) = t.next().unwrap().unwrap(); + let (_, token) = t.step().unwrap().unwrap(); if let Token::String { src, @@ -28,7 +28,7 @@ fn strings() { } else { panic!("not a string"); } - assert!(t.next().unwrap().is_none()); + assert!(t.step().unwrap().is_none()); } // Literal strings @@ -87,9 +87,9 @@ fn strings() { fn keylike() { fn t(input: &str) { let mut t = Tokenizer::new(input); - let (_, token) = t.next().unwrap().unwrap(); + let (_, token) = t.step().unwrap().unwrap(); assert_eq!(token, Token::Keylike(input)); - assert!(t.next().unwrap().is_none()); + assert!(t.step().unwrap().is_none()); } t("foo"); t("0bar"); @@ -103,10 +103,10 @@ fn keylike() { #[test] fn all() { - fn t(input: &str, expected: &[((usize, usize), Token, &str)]) { + fn t(input: &str, expected: &[((usize, usize), Token<'_>, &str)]) { let mut tokens = Tokenizer::new(input); - let mut actual: Vec<((usize, usize), Token, &str)> = Vec::new(); - while let Some((span, token)) = tokens.next().unwrap() { + let mut actual: Vec<((usize, usize), Token<'_>, &str)> = Vec::new(); + while let Some((span, token)) = tokens.step().unwrap() { actual.push((span.into(), token, &input[span.start..span.end])); } for (a, b) in actual.iter().zip(expected) { @@ -168,7 +168,7 @@ fn bare_cr_bad() { #[test] fn bad_comment() { let mut t = Tokenizer::new("#\u{0}"); - t.next().unwrap().unwrap(); - assert_eq!(t.next(), Err(Error::Unexpected(1, '\u{0}'))); - assert!(t.next().unwrap().is_none()); + t.step().unwrap().unwrap(); + assert_eq!(t.step(), Err(Error::Unexpected(1, '\u{0}'))); + assert!(t.step().unwrap().is_none()); } diff --git a/toml-span/src/de.rs b/toml-span/src/de.rs index 6fea82f..0911fd1 100644 --- a/toml-span/src/de.rs +++ b/toml-span/src/de.rs @@ -65,19 +65,6 @@ impl<'de, 'b> Ctx<'de, 'b> { } } -macro_rules! printc { - ($c:expr, $($arg:tt)*) => {{ - // let ctx = $c; - // for _ in 0..ctx.depth { - // eprint!(" "); - // } - - // eprint!("{}:{} {}:{} ", file!(), line!(), ctx.cur_parent, ctx.cur); - - // eprintln!($($arg)*); - }}; -} - fn deserialize_table<'de, 'b>( mut ctx: Ctx<'de, 'b>, tables: &'b mut [Table<'de>], @@ -86,7 +73,6 @@ fn deserialize_table<'de, 'b>( while ctx.cur_parent < ctx.max && ctx.cur < ctx.max { if let Some(values) = ctx.values.take() { for (key, val) in values { - printc!(&ctx, "{} => {val:?}", key.name); table_insert(table, key, val, ctx.de)?; } } @@ -115,7 +101,6 @@ fn deserialize_table<'de, 'b>( }; ctx.cur = pos; - printc!(&ctx, "next table"); // Test to see if we're duplicating our parent's table, and if so // then this is an error in the toml format @@ -158,7 +143,6 @@ fn deserialize_table<'de, 'b>( // decoding. if ctx.depth != ttable.header.len() { let key = ttable.header[ctx.depth].clone(); - printc!(&ctx, "need next table '{}'", key.name); if let Some((k, _)) = table.get_key_value(&key) { return Err(ctx.error( key.span.start, @@ -172,7 +156,6 @@ fn deserialize_table<'de, 'b>( let array = ttable.array && ctx.depth == ttable.header.len() - 1; ctx.cur += 1; - printc!(&ctx, "before"); let cctx = Ctx { depth: ctx.depth + if array { 0 } else { 1 }, @@ -207,11 +190,9 @@ fn deserialize_table<'de, 'b>( return Err(ctx.error(ttable.at, Some(ttable.end), ErrorKind::RedefineAsArray)); } - printc!(&ctx, "taking values"); ctx.values = ttable.values.take(); } - printc!(&ctx, "done"); Ok(ctx.cur_parent) } @@ -300,8 +281,6 @@ fn deserialize_array<'de, 'b>( }) .unwrap_or(ctx.max); - printc!(&ctx, "array enter"); - let actx = Ctx { values: Some( tables[ctx.cur_parent] @@ -323,7 +302,6 @@ fn deserialize_array<'de, 'b>( arr.push(Value::new(ValueInner::Table(table))); ctx.cur_parent = next; - printc!(&ctx, "array advance"); } Ok(ctx.cur_parent) @@ -433,11 +411,8 @@ impl<'a> Deserializer<'a> { values: Some(Vec::new()), array, }; - loop { - match header.next().map_err(|e| self.token_error(e))? { - Some(part) => cur_table.header.push(part), - None => break, - } + while let Some(part) = header.next().map_err(|e| self.token_error(e))? { + cur_table.header.push(part); } } Line::KeyValue(key, value) => { @@ -859,8 +834,12 @@ impl<'a> Deserializer<'a> { )) => { return self.add_dotted_key(key_parts, value, v); } - Some(&mut (_, Val { start, end, .. })) => { - return Err(self.error(start, Some(end), ErrorKind::DottedKeyInvalidType)); + Some(&mut (ref first, _)) => { + return Err(self.error( + key.span.start, + Some(value.end), + ErrorKind::DottedKeyInvalidType { first: first.span }, + )); } None => {} } @@ -922,7 +901,7 @@ impl<'a> Deserializer<'a> { } fn next(&mut self) -> Result)>, Error> { - self.tokens.next().map_err(|e| self.token_error(e)) + self.tokens.step().map_err(|e| self.token_error(e)) } fn peek(&mut self) -> Result)>, Error> { @@ -952,7 +931,6 @@ impl<'a> Deserializer<'a> { TokenError::UnterminatedString(at) => { self.error(at, None, ErrorKind::UnterminatedString) } - TokenError::NewlineInTableKey(at) => self.error(at, None, ErrorKind::NewlineInTableKey), TokenError::Wanted { at, expected, diff --git a/toml-span/src/de_helpers.rs b/toml-span/src/de_helpers.rs index d767427..0b2f2e0 100644 --- a/toml-span/src/de_helpers.rs +++ b/toml-span/src/de_helpers.rs @@ -27,7 +27,7 @@ where match s.parse() { Ok(v) => Ok(v), Err(err) => Err(Error { - kind: ErrorKind::Custom(format!("failed to parse string: {err}")), + kind: ErrorKind::Custom(format!("failed to parse string: {err}").into()), span: value.span, line_info: None, }), @@ -200,14 +200,16 @@ impl<'de> TableHelper<'de> { .map(|key| (key.name.into(), key.span)) .collect(); - self.errors.push(Error { - span: self.span, - kind: ErrorKind::UnexpectedKeys { - keys, - expected: self.expected.into_iter().map(String::from).collect(), - }, - line_info: None, - }) + self.errors.push( + ( + ErrorKind::UnexpectedKeys { + keys, + expected: self.expected.into_iter().map(String::from).collect(), + }, + self.span, + ) + .into(), + ); } if self.errors.is_empty() { @@ -252,7 +254,7 @@ macro_rules! integer { ValueInner::Integer(i) => { let i = i.try_into().map_err(|_| { DeserError::from(Error { - kind: ErrorKind::InvalidNumber, + kind: ErrorKind::OutOfRange(stringify!($num)), span: value.span, line_info: None, }) diff --git a/toml-span/src/error.rs b/toml-span/src/error.rs index aab3839..decd7f4 100644 --- a/toml-span/src/error.rs +++ b/toml-span/src/error.rs @@ -49,12 +49,13 @@ pub enum ErrorKind { /// EOF mark. UnterminatedString, - /// A newline was found in a table key. - NewlineInTableKey, - /// A number failed to parse. InvalidNumber, + /// The number in the toml file cannot be losslessly converted to the specified + /// number type + OutOfRange(&'static str), + /// Wanted one sort of token, but found another. Wanted { /// Expected token type. @@ -82,26 +83,10 @@ pub enum ErrorKind { /// A custom error which could be generated when deserializing a particular /// type. - Custom(String), - - /// A tuple with a certain number of elements was expected but something - /// else was found. - ExpectedTuple(usize), - - /// Expected table keys to be in increasing tuple index order, but something - /// else was found. - ExpectedTupleIndex { - /// Expected index. - expected: usize, - /// Key that was specified. - found: String, - }, - - /// An empty table was expected but entries were found. - ExpectedEmptyTable, + Custom(std::borrow::Cow<'static, str>), /// Dotted key attempted to extend something that is not a table. - DottedKeyInvalidType, + DottedKeyInvalidType { first: Span }, /// An unexpected key was encountered. /// @@ -133,12 +118,9 @@ impl Display for ErrorKind { match self { Self::UnexpectedEof => f.write_str("unexpected-eof"), Self::Custom(..) => f.write_str("custom"), - Self::DottedKeyInvalidType => f.write_str("dotted-key-invalid-type"), + Self::DottedKeyInvalidType { .. } => f.write_str("dotted-key-invalid-type"), Self::DuplicateKey { .. } => f.write_str("duplicate-key"), Self::DuplicateTable { .. } => f.write_str("duplicate-table"), - Self::ExpectedEmptyTable => f.write_str("expected-empty-table"), - Self::ExpectedTuple(..) => f.write_str("expected-tuple"), - Self::ExpectedTupleIndex { .. } => f.write_str("expected-tuple-index"), Self::UnexpectedKeys { .. } => f.write_str("unexpected-keys"), Self::UnquotedString => f.write_str("unquoted-string"), Self::MultilineStringKey => f.write_str("multiline-string-key"), @@ -147,11 +129,10 @@ impl Display for ErrorKind { Self::InvalidEscape(..) => f.write_str("invalid-escape"), Self::InvalidEscapeValue(..) => f.write_str("invalid-escape-value"), Self::InvalidHexEscape(..) => f.write_str("invalid-hex-escape"), - //Self::NewlineInString => f.write_str("newline-in-string"), Self::Unexpected(..) => f.write_str("unexpected"), Self::UnterminatedString => f.write_str("unterminated-string"), - Self::NewlineInTableKey => f.write_str("newline-in-table-key"), Self::InvalidNumber => f.write_str("invalid-number"), + Self::OutOfRange(_) => f.write_str("out-of-range"), Self::Wanted { .. } => f.write_str("wanted"), Self::MissingField(..) => f.write_str("missing-field"), Self::Deprecated { .. } => f.write_str("deprecated"), @@ -182,10 +163,10 @@ impl Display for Error { match &self.kind { ErrorKind::UnexpectedEof => f.write_str("unexpected eof encountered")?, ErrorKind::InvalidCharInString(c) => { - write!(f, "invalid character in string: `{}`", Escape(*c))? + write!(f, "invalid character in string: `{}`", Escape(*c))?; } ErrorKind::InvalidEscape(c) => { - write!(f, "invalid escape character in string: `{}`", Escape(*c))? + write!(f, "invalid escape character in string: `{}`", Escape(*c))?; } ErrorKind::InvalidHexEscape(c) => write!( f, @@ -195,11 +176,11 @@ impl Display for Error { ErrorKind::InvalidEscapeValue(c) => write!(f, "invalid escape value: `{c}`")?, ErrorKind::Unexpected(c) => write!(f, "unexpected character found: `{}`", Escape(*c))?, ErrorKind::UnterminatedString => f.write_str("unterminated string")?, - ErrorKind::NewlineInTableKey => f.write_str("found newline in table key")?, ErrorKind::Wanted { expected, found } => { write!(f, "expected {expected}, found {found}")?; } ErrorKind::InvalidNumber => f.write_str("invalid number")?, + ErrorKind::OutOfRange(kind) => write!(f, "out of range of '{kind}'")?, ErrorKind::DuplicateTable { name, .. } => { write!(f, "redefinition of table `{name}`")?; } @@ -208,15 +189,10 @@ impl Display for Error { } ErrorKind::RedefineAsArray => f.write_str("table redefined as array")?, ErrorKind::MultilineStringKey => { - f.write_str("multiline strings are not allowed for key")? + f.write_str("multiline strings are not allowed for key")?; } ErrorKind::Custom(message) => f.write_str(message)?, - ErrorKind::ExpectedTuple(l) => write!(f, "expected table with length {l}")?, - ErrorKind::ExpectedTupleIndex { expected, found } => { - write!(f, "expected table key `{expected}`, but was `{found}`")? - } - ErrorKind::ExpectedEmptyTable => f.write_str("expected empty table")?, - ErrorKind::DottedKeyInvalidType => { + ErrorKind::DottedKeyInvalidType { .. } => { f.write_str("dotted key attempted to extend non-table type")?; } ErrorKind::UnexpectedKeys { keys, expected } => write!( @@ -224,11 +200,11 @@ impl Display for Error { "unexpected keys in table: `{keys:?}`\nexpected: {expected:?}" )?, ErrorKind::UnquotedString => { - f.write_str("invalid TOML value, did you mean to use a quoted string?")? + f.write_str("invalid TOML value, did you mean to use a quoted string?")?; } ErrorKind::MissingField(field) => write!(f, "missing field '{field}' in table")?, ErrorKind::Deprecated { old, new } => { - write!(f, "field '{old}' is deprecated, '{new}' has replaced it")? + write!(f, "field '{old}' is deprecated, '{new}' has replaced it")?; } ErrorKind::UnexpectedValue { expected } => write!(f, "expected '{expected:?}'")?, } @@ -286,6 +262,9 @@ impl Error { ErrorKind::InvalidNumber => diag.with_labels(vec![ Label::primary(fid, self.span).with_message("unable to parse number") ]), + ErrorKind::OutOfRange(kind) => diag + .with_message(format!("number is out of range of '{kind}'")) + .with_labels(vec![Label::primary(fid, self.span)]), ErrorKind::Wanted { expected, .. } => diag .with_labels(vec![ Label::primary(fid, self.span).with_message(format!("expected {expected}")) @@ -333,7 +312,21 @@ impl Error { .with_labels(vec![ Label::primary(fid, self.span).with_message("unexpected value") ]), - kind => unimplemented!("{kind}"), + ErrorKind::UnexpectedEof => diag + .with_message("unexpected end of file") + .with_labels(vec![Label::primary(fid, self.span)]), + ErrorKind::DottedKeyInvalidType { first } => { + diag.with_message(self.to_string()).with_labels(vec![ + Label::primary(fid, self.span).with_message("attempted to extend table here"), + Label::secondary(fid, *first).with_message("non-table"), + ]) + } + ErrorKind::RedefineAsArray => diag + .with_message(self.to_string()) + .with_labels(vec![Label::primary(fid, self.span)]), + ErrorKind::Custom(msg) => diag + .with_message(msg.to_string()) + .with_labels(vec![Label::primary(fid, self.span)]), }; diag diff --git a/toml-span/src/span.rs b/toml-span/src/span.rs index 4994900..39eb5aa 100644 --- a/toml-span/src/span.rs +++ b/toml-span/src/span.rs @@ -110,7 +110,7 @@ where fn clone(&self) -> Self { Self { value: self.value.clone(), - span: self.span.clone(), + span: self.span, } } } diff --git a/toml-span/src/tokens.rs b/toml-span/src/tokens.rs index 69f2bb3..48dc4e9 100644 --- a/toml-span/src/tokens.rs +++ b/toml-span/src/tokens.rs @@ -34,7 +34,6 @@ pub enum Error { NewlineInString(usize), Unexpected(usize, char), UnterminatedString(usize), - NewlineInTableKey(usize), MultilineStringKey(usize, usize), Wanted { at: usize, @@ -73,7 +72,7 @@ impl<'a> Tokenizer<'a> { t } - pub fn next(&mut self) -> Result)>, Error> { + pub fn step(&mut self) -> Result)>, Error> { let (start, token) = match self.one() { Some((start, '\n')) => (start, Token::Newline), Some((start, ' ' | '\t')) => (start, self.whitespace_token(start)), @@ -99,7 +98,7 @@ impl<'a> Tokenizer<'a> { } pub fn peek(&mut self) -> Result)>, Error> { - self.clone().next() + self.clone().step() } pub fn eat(&mut self, expected: Token<'a>) -> Result { @@ -113,7 +112,7 @@ impl<'a> Tokenizer<'a> { Some(_) | None => return Ok(None), }; - drop(self.next()); + drop(self.step()); Ok(Some(span)) } @@ -126,7 +125,7 @@ impl<'a> Tokenizer<'a> { /// Expect the given token returning its span. pub fn expect_spanned(&mut self, expected: Token<'a>) -> Result { let current = self.current(); - match self.next()? { + match self.step()? { Some((span, found)) => { if expected == found { Ok(span) @@ -148,7 +147,7 @@ impl<'a> Tokenizer<'a> { pub fn table_key(&mut self) -> Result, Error> { let current = self.current(); - match self.next()? { + match self.step()? { Some((span, Token::Keylike(k))) => Ok(Key { span, name: k.into(), @@ -168,7 +167,8 @@ impl<'a> Tokenizer<'a> { } match src.find('\n') { None => Ok(Key { span, name: val }), - Some(i) => Err(Error::NewlineInTableKey(offset + i)), + // This is not reachable + Some(i) => Err(Error::InvalidCharInString(i, '\n')), } } Some((_, other)) => Err(Error::Wanted { @@ -200,7 +200,7 @@ impl<'a> Tokenizer<'a> { pub fn eat_newline_or_eof(&mut self) -> Result<(), Error> { let current = self.current(); - match self.next()? { + match self.step()? { None | Some((_, Token::Newline)) => Ok(()), Some((_, other)) => Err(Error::Wanted { at: current, @@ -245,7 +245,7 @@ impl<'a> Tokenizer<'a> { fn comment_token(&mut self, start: usize) -> Token<'a> { while let Some((_, ch)) = self.chars.clone().next() { - if ch != '\t' && (ch < '\u{20}' || ch > '\u{10ffff}') { + if ch != '\t' && !('\u{20}'..='\u{10ffff}').contains(&ch) { break; } self.one(); @@ -258,6 +258,7 @@ impl<'a> Tokenizer<'a> { /// string and its span they won't know the actual begin/end which can /// be needed for doing substring indices (eg reporting error messages /// when parsing a string) + #[allow(clippy::type_complexity)] fn read_string( &mut self, delim: char, @@ -359,7 +360,7 @@ impl<'a> Tokenizer<'a> { fn literal_string(&mut self, start: usize) -> Result<(Span, Token<'a>), Error> { self.read_string('\'', start, &mut |_me, val, _multi, i, ch| { - if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') { + if ch == '\u{09}' || (ch != '\u{7f}' && ('\u{20}'..='\u{10ffff}').contains(&ch)) { val.push(ch); Ok(()) } else { @@ -418,7 +419,7 @@ impl<'a> Tokenizer<'a> { } Ok(()) } - ch if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}') => { + ch if ch == '\u{09}' || (ch != '\u{7f}' && ('\u{20}'..='\u{10ffff}').contains(&ch)) => { val.push(ch); Ok(()) } @@ -428,9 +429,9 @@ impl<'a> Tokenizer<'a> { fn hex(&mut self, start: usize, i: usize) -> Result { let mut buf = [0; N]; - for i in 0..N { + for b in buf.iter_mut() { match self.one() { - Some((_, ch)) if ch as u32 <= 0x7F && ch.is_ascii_hexdigit() => buf[i] = ch as u8, + Some((_, ch)) if ch as u32 <= 0x7F && ch.is_ascii_hexdigit() => *b = ch as u8, Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)), None => return Err(Error::UnterminatedString(start)), } @@ -522,12 +523,9 @@ impl MaybeString { } } +#[inline] fn is_keylike(ch: char) -> bool { - ('A' <= ch && ch <= 'Z') - || ('a' <= ch && ch <= 'z') - || ('0' <= ch && ch <= '9') - || ch == '-' - || ch == '_' + ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' } impl<'a> Token<'a> { diff --git a/toml-span/src/value/impl_serde.rs b/toml-span/src/value/impl_serde.rs index a682b45..31a4a60 100644 --- a/toml-span/src/value/impl_serde.rs +++ b/toml-span/src/value/impl_serde.rs @@ -10,7 +10,7 @@ impl<'de> serde::Serialize for Value<'de> { S: serde::Serializer, { match self.as_ref() { - ValueInner::String(s) => ser.serialize_str(&s), + ValueInner::String(s) => ser.serialize_str(s), ValueInner::Integer(i) => ser.serialize_i64(*i), ValueInner::Float(f) => ser.serialize_f64(*f), ValueInner::Boolean(b) => ser.serialize_bool(*b), From 104232025531377500d3fbe1add07608c8e821fe Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 16:49:04 +0100 Subject: [PATCH 15/16] Add deny --- Cargo.toml | 6 ++++++ deny.toml | 14 ++++++++++++++ integ-tests/Cargo.toml | 3 ++- toml-span/Cargo.toml | 8 ++++++-- 4 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 deny.toml diff --git a/Cargo.toml b/Cargo.toml index dc2a541..2f4e5ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,12 @@ resolver = "2" members = ["integ-tests", "toml-span"] +[workspace.package] +authors = ["Embark "] +edition = "2021" +license = "MIT OR Apache-2.0" +homepage = "https://github.com/EmbarkStudios/toml-span" + [workspace.dependencies] toml-span = { path = "toml-span" } proc-macro2 = "1.0" diff --git a/deny.toml b/deny.toml new file mode 100644 index 0000000..c14662f --- /dev/null +++ b/deny.toml @@ -0,0 +1,14 @@ +targets = [ + { triple = "x86_64-unknown-linux" }, + { triple = "x86_64-unknown-musl" }, + { triple = "x86_64-apple-darwin" }, + { triple = "aarch64-apple-darwin" }, + { triple = "x86_64-pc-windows-msvc" }, +] +all-features = true + +[bans] +multiple-versions = "deny" + +[licenses] +allow = ["MIT", "Apache-2.0"] diff --git a/integ-tests/Cargo.toml b/integ-tests/Cargo.toml index 015b7e7..659481d 100644 --- a/integ-tests/Cargo.toml +++ b/integ-tests/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "integ-tests" version = "0.1.0" -edition = "2021" +edition.workspace = true +license.workspace = true publish = false [dependencies] diff --git a/toml-span/Cargo.toml b/toml-span/Cargo.toml index 829a1a9..b50b004 100644 --- a/toml-span/Cargo.toml +++ b/toml-span/Cargo.toml @@ -2,8 +2,12 @@ name = "toml-span" version = "0.1.0" description = "Toml parser and deserializer that preserves span information" -license = "Apache-2.0 OR MIT" -edition = "2021" +license.workspace = true +edition.workspace = true +homepage.workspace = true +documentation = "https://docs.rs/toml-span" +readme = "README.md" +categories = ["parser-implementations", "config"] [features] serde = ["dep:serde"] From 3997e4bfec504e5ed30ae04940daf35e42d68822 Mon Sep 17 00:00:00 2001 From: Jake Shadle Date: Tue, 20 Feb 2024 16:59:01 +0100 Subject: [PATCH 16/16] Move README to toml-span, add symlink to root --- README.md | 404 +------------------------------------------- toml-span/README.md | 403 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 404 insertions(+), 403 deletions(-) mode change 100644 => 120000 README.md create mode 100644 toml-span/README.md diff --git a/README.md b/README.md deleted file mode 100644 index 7ea0aab..0000000 --- a/README.md +++ /dev/null @@ -1,403 +0,0 @@ - - - - - - -
- -# `↔️ toml-span` - -**Span-preserving toml deserializer** - -[![Embark](https://img.shields.io/badge/embark-open%20source-blueviolet.svg)](https://embark.dev) -[![Embark](https://img.shields.io/badge/discord-ark-%237289da.svg?logo=discord)](https://discord.gg/dAuKfZS) -[![Crates.io](https://img.shields.io/crates/v/rust-gpu.svg)](https://crates.io/crates/toml-span) -[![Docs](https://docs.rs/toml-span/badge.svg)](https://docs.rs/toml-span) -[![dependency status](https://deps.rs/repo/github/EmbarkStudios/toml-span/status.svg)](https://deps.rs/repo/github/EmbarkStudios/toml-span) -[![Build status](https://github.com/EmbarkStudios/toml-span/workflows/CI/badge.svg)](https://github.com/EmbarkStudios/toml-span/actions) -
- -## Differences from `toml` - -First off I just want to be up front and clear about the differences/limitations of this crate versus `toml` - -1. No `serde` support for deserialization, there is a `serde` feature, but that only enables serialization of the `Value` and `Spanned` types. -1. No toml serialization. This crate is only intended to be a span preserving deserializer, there is no intention to provide serialization to toml, especially the advanced format preserving kind provided by `toml-edit`. -1. No datetime deserialization. It would be trivial to add support for this (behind an optional feature), I just have no use for it at the moment. PRs welcome. - -## Why does this crate exist? - -### The problem - -This crate was specifically made to suit the needs of [cargo-deny], namely, that it can always retrieve the span of any toml item that it wants to. While the [toml](https://docs.rs/toml/latest/toml/) crate can also produce span information via [toml::Spanned](https://docs.rs/toml/latest/toml/struct.Spanned.html) there is one rather significant limitation, namely, that it must pass through [serde](https://docs.rs/serde/latest/serde/). While in simple cases the `Spanned` type works quite well, eg. - -```rust -#[derive(serde::Deserialize)] -struct Simple { - /// This works just fine - simple_string: toml::Spanned, -} -``` - -As soon as you have a [more complicated scenario](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=aeb611bbe387538d2ebb6780055b3167), the mechanism that `toml` uses to get the span information breaks down. - -```rust -#[derive(serde::Deserialize)] -#[serde(untagged)] -enum Ohno { - Integer(u32), - SpannedString(toml::Spanned), -} - -#[derive(serde::Deserialize)] -struct Root { - integer: Ohno, - string: Ohno -} - -fn main() { - let toml = r#" -integer = 42 -string = "we want this to be spanned" -"#; - - let parsed: Root = toml::from_str(toml).expect("failed to deserialize toml"); -} -``` - -```text -thread 'main' panicked at src/main.rs:20:45: -failed to deserialize toml: Error { inner: Error { inner: TomlError { message: "data did not match any variant of untagged enum Ohno", original: Some("\ninteger = 42\nstring = \"we want this to be spanned\"\n"), keys: ["string"], span: Some(23..51) } } } -``` - -To understand why this fails we can look at what `#[derive(serde::Deserialize)]` expand to for `Ohno` in HIR. - -```rust -#[allow(unused_extern_crates, clippy :: useless_attribute)] -extern crate serde as _serde; -#[automatically_derived] -impl <'de> _serde::Deserialize<'de> for Ohno { - fn deserialize<__D>(__deserializer: __D) - -> _serde::__private::Result where - __D: _serde::Deserializer<'de> { - let __content = - match #[lang = "branch"](<_serde::__private::de::Content as - _serde::Deserialize>::deserialize(__deserializer)) { - #[lang = "Break"] { 0: residual } => - #[allow(unreachable_code)] - return #[lang = "from_residual"](residual), - #[lang = "Continue"] { 0: val } => - #[allow(unreachable_code)] - val, - }; - let __deserializer = - _serde::__private::de::ContentRefDeserializer<, , - __D::Error>::new(&__content); - if let _serde::__private::Ok(__ok) = - _serde::__private::Result::map(::deserialize(__deserializer), - Ohno::Integer) { return _serde::__private::Ok(__ok); } - if let _serde::__private::Ok(__ok) = - _serde::__private::Result::map( as - _serde::Deserialize>::deserialize(__deserializer), - Ohno::SpannedString) { return _serde::__private::Ok(__ok); } - _serde::__private::Err(_serde::de::Error::custom("data did not match any variant of untagged enum Ohno")) - } - } }; -``` - -What serde does in the untagged case is first deserialize into `_serde::__private::de::Content`, an internal API container that is easiest to think of as something like `serde_json::Value`. This is because serde speculatively parses each enum variant until one succeeds by passing a `ContentRefDeserializer` that just borrows the deserialized `Content` from earlier to satisfy the serde deserialize API consuming the `Deserializer`. The problem comes because of how [`toml::Spanned`](https://docs.rs/serde_spanned/0.6.5/src/serde_spanned/spanned.rs.html#161-212) works, namely that it uses a hack to workaround the limitations of the serde API in order to "deserialize" the item as well as its span information, by the `Spanned` object specifically requesting a set of keys from the `toml::Deserializer` impl so that it can [encode](https://github.com/toml-rs/toml/blob/c4b62fda23343037ebe5ea93db9393cb25fcf233/crates/toml_edit/src/de/spanned.rs#L27-L70) the span information as if it was a struct to satisfy serde. But serde doesn't know that when it deserializes the `Content` object, it just knows that the Deserializer reports it has a string, int or what have you, and deserializes that, "losing" the span information. This problem also affects things like `#[serde(flatten)]` for slightly different reasons, but they all basically come down to the serde API not truly supporting span information, nor [any plans](https://github.com/serde-rs/serde/issues/1811) to. - -### How `toml-span` is different - -This crate works by just...not using `serde`. The core of the crate is based off of [basic-toml](https://github.com/dtolnay/basic-toml) which itself a fork of `toml v0.5` before it added a ton of features an complexity that...well, is not needed by [cargo-deny] or many other crates that only need deserialization. - -Removing `serde` support means that while deserialization must be manually written, which can be tedious in some cases, while doing the porting of [cargo-deny] I actually came to appreciate it more and more due to a couple of things. - -1. Maximal control. `toml-span` does an initial deserialization pass into `toml_span::value::Value` which keeps span information for both keys and values, and provides helpers (namely `TableHelper`), but other than satisfying the `toml_span::Deserialize` trait doesn't restrict you in how you want to deserialize your values, and you don't even have to use that if you don't want to. -2. While it's slower to manually write deserialization code rather than just putting on a few serde attributes, the truth is that that initial convenience carries a compile time cost in terms of serde_derive and all of its dependencies, as well as all of the code that is generated, for...ever. This is fine when you are prototyping, but becomes quite wasteful once you have (mostly/somewhat) stabilized your data format. -3. (optional) Span-based errors. `toml-span` provides the `reporting` feature that can be enabled to have `toml_span::Error` be able to be converted into a [Diagnostic](https://docs.rs/codespan-reporting/latest/codespan_reporting/diagnostic/struct.Diagnostic.html) which can provide nice error output if you use the `codespan-reporting` crate. - -## Usage - -### Simple - -The most simple use case for `toml-span` is just as slimmer version of `toml` that also has a pointer API similar to [serde_json](https://docs.rs/serde_json/latest/serde_json/enum.Value.html#method.pointer) allowing easy piecemeal deserialization of a toml document. - -#### `toml` version - -```rust -fn is_crates_io_sparse(config: &toml::Value) -> Option { - config - .get("registries") - .and_then(|v| v.get("crates-io")) - .and_then(|v| v.get("protocol")) - .and_then(|v| v.as_str()) - .and_then(|v| match v { - "sparse" => Some(true), - "git" => Some(false), - _ => None, - }) -} -``` - -#### `toml-span` version - -```rust -fn is_crates_io_sparse(config: &toml_span::Value) -> Option { - match config.pointer("/registries/crates-io/protocol").and_then(|p| p.as_str())? { - "sparse" => Some(true), - "git" => Some(false), - _ => None - } -} -``` - -### Common - -Of course the most common case is deserializing toml into Rust containers. - -#### `toml` version - -```rust -#[derive(Deserialize, Clone)] -#[cfg_attr(test, derive(Debug, PartialEq, Eq))] -#[serde(rename_all = "kebab-case", deny_unknown_fields)] -pub struct CrateBan { - pub name: Spanned, - pub version: Option, - /// One or more crates that will allow this crate to be used if it is a - /// direct dependency - pub wrappers: Option>>>, - /// Setting this to true will only emit an error if multiple - // versions of the crate are found - pub deny_multiple_versions: Option>, -} -``` - -#### `toml-span` version - -The following code is much more verbose (before proc macros run at least), but show cases something that moving [cargo-deny] to `toml-span` allowed, namely, `PackageSpec`. - -Before `toml-span`, all cases where a user specifies a crate spec, (ie, name + optional version requirement) was done via two separate fields, `name` and `version`. This was quite verbose, as in many cases not only is `version` not specified, but also could be just a string if the user doesn't need/want to provide other fields. Normally one would use the [string or struct](https://serde.rs/string-or-struct.html) idiom but this was impossible due to how I wanted to reorganize the data to have the package spec as either a string or struct, _as well as_ optional data that is flattened to the same level as the package spec. But since `toml-span` changes how deserialization is done, this change was quite trivial after the initial work of getting the crate stood up was done. - -```rust -pub type CrateBan = PackageSpecOrExtended; - -#[cfg_attr(test, derive(Debug, PartialEq, Eq))] -pub struct CrateBanExtended { - /// One or more crates that will allow this crate to be used if it is a - /// direct dependency - pub wrappers: Option>>>, - /// Setting this to true will only emit an error if multiple versions of the - /// crate are found - pub deny_multiple_versions: Option>, - /// The reason for banning the crate - pub reason: Option, - /// The crate to use instead of the banned crate, could be just the crate name - /// or a URL - pub use_instead: Option>, -} - -impl<'de> Deserialize<'de> for CrateBanExtended { - fn deserialize(value: &mut Value<'de>) -> Result { - // The table helper provides convenience wrappers around a Value::Table, which - // is just a BTreeMap - let mut th = TableHelper::new(value)?; - - // Since we specify the keys manually there is no need for serde(rename/rename_all) - let wrappers = th.optional("wrappers"); - let deny_multiple_versions = th.optional("deny-multiple-versions"); - let reason = th.optional_s("reason"); - let use_instead = th.optional("use-instead"); - // Specifying None means that any keys that still exist in the table are - // unknown, producing an error the same as with serde(deny_unknown_fields) - th.finalize(None)?; - - Ok(Self { - wrappers, - deny_multiple_versions, - reason: reason.map(Reason::from), - use_instead, - }) - } -} - -#[derive(Clone, PartialEq, Eq)] -pub struct PackageSpec { - pub name: Spanned, - pub version_req: Option, -} - -impl<'de> Deserialize<'de> for PackageSpec { - fn deserialize(value: &mut Value<'de>) -> Result { - use std::borrow::Cow; - - struct Ctx<'de> { - inner: Cow<'de, str>, - split: Option<(usize, bool)>, - span: Span, - } - - impl<'de> Ctx<'de> { - fn from_str(bs: Cow<'de, str>, span: Span) -> Self { - let split = bs - .find('@') - .map(|i| (i, true)) - .or_else(|| bs.find(':').map(|i| (i, false))); - Self { - inner: bs, - split, - span, - } - } - } - - let ctx = match value.take() { - ValueInner::String(s) => Ctx::from_str(s, value.span), - ValueInner::Table(tab) => { - let mut th = TableHelper::from((tab, value.span)); - - if let Some(mut val) = th.table.remove(&"crate".into()) { - let s = val.take_string(Some("a crate spec"))?; - th.finalize(Some(value))?; - - Ctx::from_str(s, val.span) - } else { - // Encourage user to use the 'crate' spec instead - let name = th.required("name").map_err(|e| { - if matches!(e.kind, toml_span::ErrorKind::MissingField(_)) { - (toml_span::ErrorKind::MissingField("crate"), e.span).into() - } else { - e - } - })?; - let version = th.optional::>>("version"); - - // We return all the keys we haven't deserialized back to the value, - // so that further deserializers can use them as this spec is - // always embedded in a larger structure - th.finalize(Some(value))?; - - let version_req = if let Some(vr) = version { - Some(vr.value.parse().map_err(|e: semver::Error| { - toml_span::Error::from(( - toml_span::ErrorKind::Custom(e.to_string()), - vr.span, - )) - })?) - } else { - None - }; - - return Ok(Self { name, version_req }); - } - } - other => return Err(expected("a string or table", other, value.span).into()), - }; - - let (name, version_req) = if let Some((i, make_exact)) = ctx.split { - let mut v: VersionReq = ctx.inner[i + 1..].parse().map_err(|e: semver::Error| { - toml_span::Error::from(( - toml_span::ErrorKind::Custom(e.to_string()), - Span::new(ctx.span.start + i + 1, ctx.span.end), - )) - })?; - if make_exact { - if let Some(comp) = v.comparators.get_mut(0) { - comp.op = semver::Op::Exact; - } - } - - ( - Spanned::with_span( - ctx.inner[..i].into(), - Span::new(ctx.span.start, ctx.span.start + i), - ), - Some(v), - ) - } else { - (Spanned::with_span(ctx.inner.into(), ctx.span), None) - }; - - Ok(Self { name, version_req }) - } -} - -pub struct PackageSpecOrExtended { - pub spec: PackageSpec, - pub inner: Option, -} - -impl PackageSpecOrExtended { - pub fn try_convert(self) -> Result, E> - where - V: TryFrom, - { - let inner = if let Some(i) = self.inner { - Some(V::try_from(i)?) - } else { - None - }; - - Ok(PackageSpecOrExtended { - spec: self.spec, - inner, - }) - } - - pub fn convert(self) -> PackageSpecOrExtended - where - V: From, - { - PackageSpecOrExtended { - spec: self.spec, - inner: self.inner.map(V::from), - } - } -} - -impl<'de, T> toml_span::Deserialize<'de> for PackageSpecOrExtended -where - T: toml_span::Deserialize<'de>, -{ - fn deserialize(value: &mut Value<'de>) -> Result { - let spec = PackageSpec::deserialize(value)?; - - // If more keys exist in the table (or string) then try to deserialize - // the rest as the "extended" portion - let inner = if value.has_keys() { - Some(T::deserialize(value)?) - } else { - None - }; - - Ok(Self { spec, inner }) - } -} -``` - -## Contributing - -[![Contributor Covenant](https://img.shields.io/badge/contributor%20covenant-v1.4-ff69b4.svg)](CODE_OF_CONDUCT.md) - -We welcome community contributions to this project. - -Please read our [Contributor Guide](CONTRIBUTING.md) for more information on how to get started. -Please also read our [Contributor Terms](CONTRIBUTING.md#contributor-terms) before you make any contributions. - -Any contribution intentionally submitted for inclusion in an Embark Studios project, shall comply with the Rust standard licensing model (MIT OR Apache 2.0) and therefore be dual licensed as described below, without any additional terms or conditions: - -### License - -This contribution is dual licensed under EITHER OF - -- Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ) -- MIT license ([LICENSE-MIT](LICENSE-MIT) or ) - -at your option. - -For clarity, "your" refers to Embark or any other licensee/user of the contribution. - -[cargo-deny]: https://github.com/EmbarkStudios/cargo-deny diff --git a/README.md b/README.md new file mode 120000 index 0000000..07a3af7 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +toml-span/README.md \ No newline at end of file diff --git a/toml-span/README.md b/toml-span/README.md new file mode 100644 index 0000000..7ea0aab --- /dev/null +++ b/toml-span/README.md @@ -0,0 +1,403 @@ + + + + + + +
+ +# `↔️ toml-span` + +**Span-preserving toml deserializer** + +[![Embark](https://img.shields.io/badge/embark-open%20source-blueviolet.svg)](https://embark.dev) +[![Embark](https://img.shields.io/badge/discord-ark-%237289da.svg?logo=discord)](https://discord.gg/dAuKfZS) +[![Crates.io](https://img.shields.io/crates/v/rust-gpu.svg)](https://crates.io/crates/toml-span) +[![Docs](https://docs.rs/toml-span/badge.svg)](https://docs.rs/toml-span) +[![dependency status](https://deps.rs/repo/github/EmbarkStudios/toml-span/status.svg)](https://deps.rs/repo/github/EmbarkStudios/toml-span) +[![Build status](https://github.com/EmbarkStudios/toml-span/workflows/CI/badge.svg)](https://github.com/EmbarkStudios/toml-span/actions) +
+ +## Differences from `toml` + +First off I just want to be up front and clear about the differences/limitations of this crate versus `toml` + +1. No `serde` support for deserialization, there is a `serde` feature, but that only enables serialization of the `Value` and `Spanned` types. +1. No toml serialization. This crate is only intended to be a span preserving deserializer, there is no intention to provide serialization to toml, especially the advanced format preserving kind provided by `toml-edit`. +1. No datetime deserialization. It would be trivial to add support for this (behind an optional feature), I just have no use for it at the moment. PRs welcome. + +## Why does this crate exist? + +### The problem + +This crate was specifically made to suit the needs of [cargo-deny], namely, that it can always retrieve the span of any toml item that it wants to. While the [toml](https://docs.rs/toml/latest/toml/) crate can also produce span information via [toml::Spanned](https://docs.rs/toml/latest/toml/struct.Spanned.html) there is one rather significant limitation, namely, that it must pass through [serde](https://docs.rs/serde/latest/serde/). While in simple cases the `Spanned` type works quite well, eg. + +```rust +#[derive(serde::Deserialize)] +struct Simple { + /// This works just fine + simple_string: toml::Spanned, +} +``` + +As soon as you have a [more complicated scenario](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=aeb611bbe387538d2ebb6780055b3167), the mechanism that `toml` uses to get the span information breaks down. + +```rust +#[derive(serde::Deserialize)] +#[serde(untagged)] +enum Ohno { + Integer(u32), + SpannedString(toml::Spanned), +} + +#[derive(serde::Deserialize)] +struct Root { + integer: Ohno, + string: Ohno +} + +fn main() { + let toml = r#" +integer = 42 +string = "we want this to be spanned" +"#; + + let parsed: Root = toml::from_str(toml).expect("failed to deserialize toml"); +} +``` + +```text +thread 'main' panicked at src/main.rs:20:45: +failed to deserialize toml: Error { inner: Error { inner: TomlError { message: "data did not match any variant of untagged enum Ohno", original: Some("\ninteger = 42\nstring = \"we want this to be spanned\"\n"), keys: ["string"], span: Some(23..51) } } } +``` + +To understand why this fails we can look at what `#[derive(serde::Deserialize)]` expand to for `Ohno` in HIR. + +```rust +#[allow(unused_extern_crates, clippy :: useless_attribute)] +extern crate serde as _serde; +#[automatically_derived] +impl <'de> _serde::Deserialize<'de> for Ohno { + fn deserialize<__D>(__deserializer: __D) + -> _serde::__private::Result where + __D: _serde::Deserializer<'de> { + let __content = + match #[lang = "branch"](<_serde::__private::de::Content as + _serde::Deserialize>::deserialize(__deserializer)) { + #[lang = "Break"] { 0: residual } => + #[allow(unreachable_code)] + return #[lang = "from_residual"](residual), + #[lang = "Continue"] { 0: val } => + #[allow(unreachable_code)] + val, + }; + let __deserializer = + _serde::__private::de::ContentRefDeserializer<, , + __D::Error>::new(&__content); + if let _serde::__private::Ok(__ok) = + _serde::__private::Result::map(::deserialize(__deserializer), + Ohno::Integer) { return _serde::__private::Ok(__ok); } + if let _serde::__private::Ok(__ok) = + _serde::__private::Result::map( as + _serde::Deserialize>::deserialize(__deserializer), + Ohno::SpannedString) { return _serde::__private::Ok(__ok); } + _serde::__private::Err(_serde::de::Error::custom("data did not match any variant of untagged enum Ohno")) + } + } }; +``` + +What serde does in the untagged case is first deserialize into `_serde::__private::de::Content`, an internal API container that is easiest to think of as something like `serde_json::Value`. This is because serde speculatively parses each enum variant until one succeeds by passing a `ContentRefDeserializer` that just borrows the deserialized `Content` from earlier to satisfy the serde deserialize API consuming the `Deserializer`. The problem comes because of how [`toml::Spanned`](https://docs.rs/serde_spanned/0.6.5/src/serde_spanned/spanned.rs.html#161-212) works, namely that it uses a hack to workaround the limitations of the serde API in order to "deserialize" the item as well as its span information, by the `Spanned` object specifically requesting a set of keys from the `toml::Deserializer` impl so that it can [encode](https://github.com/toml-rs/toml/blob/c4b62fda23343037ebe5ea93db9393cb25fcf233/crates/toml_edit/src/de/spanned.rs#L27-L70) the span information as if it was a struct to satisfy serde. But serde doesn't know that when it deserializes the `Content` object, it just knows that the Deserializer reports it has a string, int or what have you, and deserializes that, "losing" the span information. This problem also affects things like `#[serde(flatten)]` for slightly different reasons, but they all basically come down to the serde API not truly supporting span information, nor [any plans](https://github.com/serde-rs/serde/issues/1811) to. + +### How `toml-span` is different + +This crate works by just...not using `serde`. The core of the crate is based off of [basic-toml](https://github.com/dtolnay/basic-toml) which itself a fork of `toml v0.5` before it added a ton of features an complexity that...well, is not needed by [cargo-deny] or many other crates that only need deserialization. + +Removing `serde` support means that while deserialization must be manually written, which can be tedious in some cases, while doing the porting of [cargo-deny] I actually came to appreciate it more and more due to a couple of things. + +1. Maximal control. `toml-span` does an initial deserialization pass into `toml_span::value::Value` which keeps span information for both keys and values, and provides helpers (namely `TableHelper`), but other than satisfying the `toml_span::Deserialize` trait doesn't restrict you in how you want to deserialize your values, and you don't even have to use that if you don't want to. +2. While it's slower to manually write deserialization code rather than just putting on a few serde attributes, the truth is that that initial convenience carries a compile time cost in terms of serde_derive and all of its dependencies, as well as all of the code that is generated, for...ever. This is fine when you are prototyping, but becomes quite wasteful once you have (mostly/somewhat) stabilized your data format. +3. (optional) Span-based errors. `toml-span` provides the `reporting` feature that can be enabled to have `toml_span::Error` be able to be converted into a [Diagnostic](https://docs.rs/codespan-reporting/latest/codespan_reporting/diagnostic/struct.Diagnostic.html) which can provide nice error output if you use the `codespan-reporting` crate. + +## Usage + +### Simple + +The most simple use case for `toml-span` is just as slimmer version of `toml` that also has a pointer API similar to [serde_json](https://docs.rs/serde_json/latest/serde_json/enum.Value.html#method.pointer) allowing easy piecemeal deserialization of a toml document. + +#### `toml` version + +```rust +fn is_crates_io_sparse(config: &toml::Value) -> Option { + config + .get("registries") + .and_then(|v| v.get("crates-io")) + .and_then(|v| v.get("protocol")) + .and_then(|v| v.as_str()) + .and_then(|v| match v { + "sparse" => Some(true), + "git" => Some(false), + _ => None, + }) +} +``` + +#### `toml-span` version + +```rust +fn is_crates_io_sparse(config: &toml_span::Value) -> Option { + match config.pointer("/registries/crates-io/protocol").and_then(|p| p.as_str())? { + "sparse" => Some(true), + "git" => Some(false), + _ => None + } +} +``` + +### Common + +Of course the most common case is deserializing toml into Rust containers. + +#### `toml` version + +```rust +#[derive(Deserialize, Clone)] +#[cfg_attr(test, derive(Debug, PartialEq, Eq))] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub struct CrateBan { + pub name: Spanned, + pub version: Option, + /// One or more crates that will allow this crate to be used if it is a + /// direct dependency + pub wrappers: Option>>>, + /// Setting this to true will only emit an error if multiple + // versions of the crate are found + pub deny_multiple_versions: Option>, +} +``` + +#### `toml-span` version + +The following code is much more verbose (before proc macros run at least), but show cases something that moving [cargo-deny] to `toml-span` allowed, namely, `PackageSpec`. + +Before `toml-span`, all cases where a user specifies a crate spec, (ie, name + optional version requirement) was done via two separate fields, `name` and `version`. This was quite verbose, as in many cases not only is `version` not specified, but also could be just a string if the user doesn't need/want to provide other fields. Normally one would use the [string or struct](https://serde.rs/string-or-struct.html) idiom but this was impossible due to how I wanted to reorganize the data to have the package spec as either a string or struct, _as well as_ optional data that is flattened to the same level as the package spec. But since `toml-span` changes how deserialization is done, this change was quite trivial after the initial work of getting the crate stood up was done. + +```rust +pub type CrateBan = PackageSpecOrExtended; + +#[cfg_attr(test, derive(Debug, PartialEq, Eq))] +pub struct CrateBanExtended { + /// One or more crates that will allow this crate to be used if it is a + /// direct dependency + pub wrappers: Option>>>, + /// Setting this to true will only emit an error if multiple versions of the + /// crate are found + pub deny_multiple_versions: Option>, + /// The reason for banning the crate + pub reason: Option, + /// The crate to use instead of the banned crate, could be just the crate name + /// or a URL + pub use_instead: Option>, +} + +impl<'de> Deserialize<'de> for CrateBanExtended { + fn deserialize(value: &mut Value<'de>) -> Result { + // The table helper provides convenience wrappers around a Value::Table, which + // is just a BTreeMap + let mut th = TableHelper::new(value)?; + + // Since we specify the keys manually there is no need for serde(rename/rename_all) + let wrappers = th.optional("wrappers"); + let deny_multiple_versions = th.optional("deny-multiple-versions"); + let reason = th.optional_s("reason"); + let use_instead = th.optional("use-instead"); + // Specifying None means that any keys that still exist in the table are + // unknown, producing an error the same as with serde(deny_unknown_fields) + th.finalize(None)?; + + Ok(Self { + wrappers, + deny_multiple_versions, + reason: reason.map(Reason::from), + use_instead, + }) + } +} + +#[derive(Clone, PartialEq, Eq)] +pub struct PackageSpec { + pub name: Spanned, + pub version_req: Option, +} + +impl<'de> Deserialize<'de> for PackageSpec { + fn deserialize(value: &mut Value<'de>) -> Result { + use std::borrow::Cow; + + struct Ctx<'de> { + inner: Cow<'de, str>, + split: Option<(usize, bool)>, + span: Span, + } + + impl<'de> Ctx<'de> { + fn from_str(bs: Cow<'de, str>, span: Span) -> Self { + let split = bs + .find('@') + .map(|i| (i, true)) + .or_else(|| bs.find(':').map(|i| (i, false))); + Self { + inner: bs, + split, + span, + } + } + } + + let ctx = match value.take() { + ValueInner::String(s) => Ctx::from_str(s, value.span), + ValueInner::Table(tab) => { + let mut th = TableHelper::from((tab, value.span)); + + if let Some(mut val) = th.table.remove(&"crate".into()) { + let s = val.take_string(Some("a crate spec"))?; + th.finalize(Some(value))?; + + Ctx::from_str(s, val.span) + } else { + // Encourage user to use the 'crate' spec instead + let name = th.required("name").map_err(|e| { + if matches!(e.kind, toml_span::ErrorKind::MissingField(_)) { + (toml_span::ErrorKind::MissingField("crate"), e.span).into() + } else { + e + } + })?; + let version = th.optional::>>("version"); + + // We return all the keys we haven't deserialized back to the value, + // so that further deserializers can use them as this spec is + // always embedded in a larger structure + th.finalize(Some(value))?; + + let version_req = if let Some(vr) = version { + Some(vr.value.parse().map_err(|e: semver::Error| { + toml_span::Error::from(( + toml_span::ErrorKind::Custom(e.to_string()), + vr.span, + )) + })?) + } else { + None + }; + + return Ok(Self { name, version_req }); + } + } + other => return Err(expected("a string or table", other, value.span).into()), + }; + + let (name, version_req) = if let Some((i, make_exact)) = ctx.split { + let mut v: VersionReq = ctx.inner[i + 1..].parse().map_err(|e: semver::Error| { + toml_span::Error::from(( + toml_span::ErrorKind::Custom(e.to_string()), + Span::new(ctx.span.start + i + 1, ctx.span.end), + )) + })?; + if make_exact { + if let Some(comp) = v.comparators.get_mut(0) { + comp.op = semver::Op::Exact; + } + } + + ( + Spanned::with_span( + ctx.inner[..i].into(), + Span::new(ctx.span.start, ctx.span.start + i), + ), + Some(v), + ) + } else { + (Spanned::with_span(ctx.inner.into(), ctx.span), None) + }; + + Ok(Self { name, version_req }) + } +} + +pub struct PackageSpecOrExtended { + pub spec: PackageSpec, + pub inner: Option, +} + +impl PackageSpecOrExtended { + pub fn try_convert(self) -> Result, E> + where + V: TryFrom, + { + let inner = if let Some(i) = self.inner { + Some(V::try_from(i)?) + } else { + None + }; + + Ok(PackageSpecOrExtended { + spec: self.spec, + inner, + }) + } + + pub fn convert(self) -> PackageSpecOrExtended + where + V: From, + { + PackageSpecOrExtended { + spec: self.spec, + inner: self.inner.map(V::from), + } + } +} + +impl<'de, T> toml_span::Deserialize<'de> for PackageSpecOrExtended +where + T: toml_span::Deserialize<'de>, +{ + fn deserialize(value: &mut Value<'de>) -> Result { + let spec = PackageSpec::deserialize(value)?; + + // If more keys exist in the table (or string) then try to deserialize + // the rest as the "extended" portion + let inner = if value.has_keys() { + Some(T::deserialize(value)?) + } else { + None + }; + + Ok(Self { spec, inner }) + } +} +``` + +## Contributing + +[![Contributor Covenant](https://img.shields.io/badge/contributor%20covenant-v1.4-ff69b4.svg)](CODE_OF_CONDUCT.md) + +We welcome community contributions to this project. + +Please read our [Contributor Guide](CONTRIBUTING.md) for more information on how to get started. +Please also read our [Contributor Terms](CONTRIBUTING.md#contributor-terms) before you make any contributions. + +Any contribution intentionally submitted for inclusion in an Embark Studios project, shall comply with the Rust standard licensing model (MIT OR Apache 2.0) and therefore be dual licensed as described below, without any additional terms or conditions: + +### License + +This contribution is dual licensed under EITHER OF + +- Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or ) + +at your option. + +For clarity, "your" refers to Embark or any other licensee/user of the contribution. + +[cargo-deny]: https://github.com/EmbarkStudios/cargo-deny