jsoncutil/src/parser.rs

1155 lines
34 KiB
Rust
Raw Normal View History

2024-09-09 15:50:53 -04:00
// use anyhow::Result;
use std::collections::VecDeque;
use std::io::BufRead;
use std::io::ErrorKind;
use std::io::Write;
use derivative::Derivative;
use crate::indentor::Indentor;
const INDENT: &[u8] = b" ";
const RECORD_SEPARATOR: &[u8] = b"\n";
const NEWLINE: &[u8] = b"\n";
const C_CR: u8 = b'\r';
const C_LF: u8 = b'\n';
const C_TAB: u8 = b'\t';
const C_SPACE: u8 = b' ';
const C_COMMA: u8 = b',';
const C_COLON: u8 = b':';
const C_QUOTE: u8 = b'"';
const C_BACKSLASH: u8 = b'\\';
const C_LEFT_BRACE: u8 = b'{';
const C_LEFT_BRACKET: u8 = b'[';
const C_RIGHT_BRACE: u8 = b'}';
const C_RIGHT_BRACKET: u8 = b']';
const C_SLASH: u8 = b'/';
const C_STAR: u8 = b'*';
const C_PLUS: u8 = b'+';
const C_DOT: u8 = b'.';
const C_MINUS: u8 = b'-';
const C_E: u8 = b'-';
/// Mode of operation of ouptut of the parser
#[derive(Debug, PartialEq, Eq)]
pub enum Mode {
/// Add trailing commas, and do not strip comments
Jsoncc,
/// Strip comments, and add whitespace and newlines
Json,
/// Strip comments, and strip all optional whitespace
CompactJson,
}
impl Mode {
/// Check if the mode wants to keep comments or strip them
fn keep_comments(&self) -> bool {
match self {
Mode::Jsoncc => true,
Mode::Json | Mode::CompactJson => false,
}
}
}
impl Default for Mode {
fn default() -> Self {
Self::Jsoncc
}
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, thiserror::Error)]
pub enum Error {
/// The input buffer is empty, but we need a token
#[error("Buffer unexpectedly empty")]
BufferEmpty,
/// Bytes that look like a value (true, false, null, a number, or a string) was found in the wrong position
#[error("Unexpected value type")]
UnexpectedValue,
/// A byte was found in an unexpected position
#[error("Unexpected char {0:?}")]
UnexpectedChar(char),
/// A collection end token was found in an unexpected position
#[error("Unexpected collection ending")]
UnexpectedCollectionEnd,
/// An IO error occured when reading or writing
#[error("IO Error: {0}")]
Io(#[from] std::io::Error),
}
impl Error {
pub fn is_eof(&self) -> bool {
matches!(self, Self::Io(e) if e.kind() == ErrorKind::UnexpectedEof)
}
}
/// A token found in the input stream
///
/// This does not track `:` or `,` for two reasons:
///
/// 1. All input is jsoncc, which has optional `,`. `,` provides no extra information as the next token would need to be checked to decide if the current value is the last value
/// 1. `:` state is derived by the [`CollectionState::Object`] `awaiting_key` field
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Token {
/// We have reached an EOF at a position that is not in a value
Eof,
/// The root of the input
///
/// Note that there can be multiple root tokens. For example, for input `{}{}`, root tokens are sent at these positions: `^{}^{}`
Root,
/// The start of an object or array
CollectionStart { ty: CollectionType },
/// The end of an object or array
CollectionEnd { ty: CollectionType },
/// A block or line comment
Comment {
ty: CommentType,
/// Should this comment be on its own line?
///
/// This is derived from the input.
/// If the comment is read on a line with only whitespace tokens, this is set to true
own_line: bool,
},
/// A value that is not a collection
Value { ty: ValueType, first_char: u8 },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CollectionType {
Object,
Array,
}
#[derive(Debug, Clone, Copy)]
enum CollectionState {
Object { awaiting_key: bool },
Array,
}
impl CollectionState {
fn ty(&self) -> CollectionType {
match self {
Self::Object { awaiting_key: _ } => CollectionType::Object,
Self::Array => CollectionType::Array,
}
}
}
impl CollectionType {
fn as_state(&self) -> CollectionState {
match self {
Self::Object => CollectionState::Object { awaiting_key: true },
Self::Array => CollectionState::Array,
}
}
fn start_str(&self) -> &'static str {
match self {
Self::Object => "{",
Self::Array => "[",
}
}
fn end_str(&self) -> &'static str {
match self {
Self::Object => "}",
Self::Array => "]",
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum CommentType {
Line,
Block,
}
impl CommentType {
fn start_str(&self) -> &'static str {
match self {
Self::Line => "//",
Self::Block => "/*",
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ValueType {
// TODO: Do we want to allow the possibility of unquoted object keys?
// Unquoted values would be a bad idea. For example, there would be ambiguity for {x: true} (is it {"x": "true"} or {"x": true})
// You could force `true`/`false`/`null`/numbers to be non-strings, but then you end up with the yaml `yes`/`no` problem
// Also, if someone types a number like `-1.4e4.`, we don't want that converted to a string, we should keep it as an (invalid) number
// UnquotedString,
String,
Number,
Boolean,
Null,
}
#[derive(Derivative)]
#[derivative(Debug)]
pub struct Parser<R, W>
where
R: BufRead,
W: Write,
{
/// Input reader
#[derivative(Debug = "ignore")]
input: R,
/// Output writer
#[derivative(Debug = "ignore")]
write: W,
/// Stack tracking the state of the parser
///
/// When descending into an array or object, push a [`CollectionState`] here
state_stack: VecDeque<CollectionState>,
/// The current token the parser has received
current_token: Token,
// TODO: This can be used to add whitespace (if [`num_empty_lines`] > 1)
/// The number of empty lines read from [`input`] in a row
///
/// Empty lines are lines that only contain whitespace
num_empty_lines: u8,
/// The mode of operation of the parser
mode: Mode,
/// Buffered indent strings so repeated calls do not have to repeated call [`std::io::repeat`]
indentor: Indentor,
}
impl<R, W> Parser<R, W>
where
R: BufRead,
W: Write,
{
pub fn new(mode: Mode, read: R, write: W) -> Self {
Self {
input: read,
write,
state_stack: VecDeque::new(),
current_token: Token::Root,
num_empty_lines: 0,
mode,
indentor: Indentor::new(INDENT),
}
}
/// Send the rest of the input to the writer until the end of the comment is reached
fn drain_comment(&mut self, ty: &CommentType) -> Result<()> {
let mut maybe_block_end = false;
loop {
let buf = self.input.fill_buf()?;
if buf.is_empty() {
return Err(Error::BufferEmpty);
}
match ty {
CommentType::Line => match line_comment_end(buf) {
Some(idx) => {
if self.mode.keep_comments() {
self.write.write_all(&buf[0..idx])?;
}
self.input.consume(idx);
break;
}
None => {
if self.mode.keep_comments() {
self.write.write_all(buf)?;
}
let len = buf.len();
self.input.consume(len);
}
},
CommentType::Block => {
if maybe_block_end && buf[0] == b'/' {
// We ended the block comment
if self.mode.keep_comments() {
self.write.write_all(b"/")?;
}
break;
}
maybe_block_end = false;
match block_comment_end(buf) {
BlockCommentEnd::Position(idx) => {
if self.mode.keep_comments() {
self.write.write_all(&buf[0..idx])?;
}
self.input.consume(idx);
break;
}
BlockCommentEnd::MaybeEnd => {
if self.mode.keep_comments() {
self.write.write_all(buf)?;
}
let len = buf.len();
self.input.consume(len);
maybe_block_end = true;
}
BlockCommentEnd::None => {
if self.mode.keep_comments() {
self.write.write_all(buf)?;
}
let len = buf.len();
self.input.consume(len);
}
}
}
}
}
Ok(())
}
/// Send the rest of the input to the writer until the end of the value is reached
fn drain_value(&mut self, ty: &ValueType, first_char: u8) -> Result<()> {
match (ty, first_char) {
(ValueType::String, C_QUOTE) => {
let mut next_char_escaped = false;
self.write([C_QUOTE])?;
// Loop until we are done with the string
loop {
if next_char_escaped {
// The previous buffer ended in `\`
// Send this character out
let next_char = self.next_char()?;
self.write([next_char])?;
}
next_char_escaped = false;
let buf = self.input.fill_buf()?;
if buf.is_empty() {
return Err(Error::BufferEmpty);
}
match string_end(buf) {
StringEnd::Position(idx) => {
self.write.write_all(&buf[0..idx])?;
self.input.consume(idx);
break;
}
StringEnd::MaybeEnd => {
self.write.write_all(buf)?;
let len = buf.len();
self.input.consume(len);
next_char_escaped = true;
}
StringEnd::None => {
self.write.write_all(buf)?;
let len = buf.len();
self.input.consume(len);
}
}
}
let next_char = self.next_char()?;
self.write([next_char])?;
Ok(())
}
(ValueType::Number, mut c) => {
loop {
self.write([c])?;
c = self.peek_next_char()?;
// Any of the json numerical characters
if c == C_PLUS
|| c == C_MINUS
|| c == C_DOT
|| (c as char).is_ascii_digit()
|| c == C_E
{
self.next_char()?;
} else {
break;
}
}
Ok(())
}
(ValueType::Boolean, b't') => {
let mut chr = [0_u8; 3];
self.input.read_exact(&mut chr)?;
if chr == *b"rue" {
self.write("true")?;
Ok(())
} else {
Err(Error::UnexpectedValue)
}
}
(ValueType::Boolean, b'f') => {
let mut chr = [0_u8; 4];
self.input.read_exact(&mut chr)?;
if chr == *b"alse" {
self.write("false")?;
Ok(())
} else {
Err(Error::UnexpectedValue)
}
}
(ValueType::Null, b'n') => {
let mut chr = [0_u8; 3];
self.input.read_exact(&mut chr)?;
if chr == *b"ull" {
self.write("null")?;
Ok(())
} else {
Err(Error::UnexpectedValue)
}
}
_ => {
eprintln!("Value type: {ty:?}, with first char {first_char:?}");
Err(Error::UnexpectedValue)
}
}
}
/// Write some bytes to the writer
fn write(&mut self, buf: impl AsRef<[u8]>) -> Result<()> {
// eprintln!("### Writing {:?}", String::from_utf8_lossy(buf.as_ref()));
self.write.write_all(buf.as_ref())?;
Ok(())
}
/// Write the record separator to the writer
fn record_separator(&mut self) -> Result<()> {
self.write(RECORD_SEPARATOR)?;
Ok(())
}
/// Add extra padding after `:` or before the `//`/`/*` in a comment, if the format requests it
fn extra_spacing(&mut self) -> Result<()> {
match self.mode {
Mode::Jsoncc | Mode::Json => self.write(" ")?,
Mode::CompactJson => {}
}
Ok(())
}
/// Add a comma only if we are not at the root level
fn comma(&mut self) -> Result<()> {
// We don't want a comma if this is a root element
if self.state_stack.is_empty() {
return Ok(());
}
self.write(",")?;
Ok(())
}
/// Add a trailing comma only if we are not at the root level and we are in [`Mode::Jsoncc`]
fn trailing_comma(&mut self) -> Result<()> {
match self.mode {
Mode::Jsoncc => self.comma()?,
Mode::Json | Mode::CompactJson => {}
}
Ok(())
}
/// Write a newline and add indentation
fn newline(&mut self) -> Result<()> {
match self.mode {
Mode::Jsoncc | Mode::Json => {
self.write(NEWLINE)?;
self.write
.write_all(self.indentor.get_indent(self.state_stack.len()))?;
}
Mode::CompactJson => {}
}
Ok(())
}
/// Leave a collection
///
/// Call this after you see a `]` or `}` token, and you want the parser to ensure that we were in the right kind of collection before leaving it
fn exit_collection(&mut self, ty: &CollectionType) -> Result<()> {
if Some(*ty)
!= self
.state_stack
.pop_back()
.as_ref()
.map(CollectionState::ty)
{
return Err(Error::UnexpectedCollectionEnd);
}
Ok(())
}
/// Format the reader into the writer and consume the [`Parser`] by reading tokens and sending formatted output
///
/// Generally, the writer state ends with each token written with the ending `:` as required by the next token
/// A `,` is decided if the `current_token` is a value and the next token is something that warrants a `,` (either another value, a collection, or a collection end in jsoncc mode)
///
/// For example:
/// ```text
/// ["a", "b"]
/// ^
/// ```
///
/// At this position, `self::current_token` is a `Value` (representing `"a"`) and `next_token` represents `"b"`, so we know a `,` has been written
/// In Jsoncc/Json mode, write a newline, indent, and flush the `"b"` Value
pub fn format_buf(mut self) -> Result<()> {
loop {
// eprintln!("========================================================");
// eprintln!("{:?}", self);
let mut next_token = self.get_next_token()?;
// eprintln!("{:#?}\n{:#?}", self.current_token, next_token);
// eprintln!();
match (self.current_token, &next_token) {
(Token::Root, Token::CollectionStart { ty }) => {
self.state_stack.push_back(ty.as_state());
self.write(ty.start_str())?;
}
(Token::Root, Token::CollectionEnd { ty }) => {
self.exit_collection(ty)?;
self.write(ty.end_str())?;
self.write(ty.end_str())?;
}
(Token::Root, Token::Comment { ty, own_line: _ }) => {
self.write(ty.start_str())?;
self.drain_comment(ty)?;
}
(Token::Root, Token::Value { ty, first_char }) => {
self.drain_value(ty, *first_char)?;
next_token = Token::Root;
}
(Token::CollectionStart { ty: _ }, Token::CollectionStart { ty }) => {
self.newline()?;
self.write(ty.start_str())?;
self.state_stack.push_back(ty.as_state());
}
(Token::CollectionStart { ty: _ }, Token::CollectionEnd { ty }) => {
// `{}` or `[]`
self.exit_collection(ty)?;
self.write(ty.end_str())?;
}
(Token::CollectionStart { ty: _ }, Token::Comment { ty, own_line: _ }) => {
// Force own_line to be true
self.newline()?;
self.write(ty.start_str())?;
self.drain_comment(ty)?;
}
(Token::CollectionStart { ty: _ }, Token::Value { ty, first_char }) => {
self.newline()?;
self.drain_value(ty, *first_char)?;
if self.is_awaiting_key()? {
self.write(":")?;
}
self.toggle_awaiting_key()?;
}
(Token::CollectionEnd { ty: _ }, Token::CollectionStart { ty }) => {
self.comma()?;
self.newline()?;
self.write(ty.start_str())?;
self.state_stack.push_back(ty.as_state());
}
(Token::CollectionEnd { ty: _ }, Token::CollectionEnd { ty }) => {
self.trailing_comma()?;
self.exit_collection(ty)?;
self.newline()?;
self.write(ty.end_str())?;
}
(Token::CollectionEnd { ty: _ }, Token::Comment { ty, own_line: _ }) => {
// Force own_line to be true
self.trailing_comma()?;
self.newline()?;
self.write(ty.start_str())?;
self.drain_comment(ty)?;
}
(Token::CollectionEnd { ty: _ }, Token::Value { ty, first_char }) => {
self.comma()?;
self.newline()?;
self.drain_value(ty, *first_char)?;
if self.is_awaiting_key()? {
self.write(":")?;
}
self.toggle_awaiting_key()?;
}
(Token::Comment { ty: _, own_line: _ }, Token::CollectionStart { ty }) => {
self.newline()?;
self.write(ty.start_str())?;
self.state_stack.push_back(ty.as_state());
}
(Token::Comment { ty: _, own_line: _ }, Token::CollectionEnd { ty }) => {
self.exit_collection(ty)?;
self.newline()?;
self.write(ty.end_str())?;
// self.trailing_comma()?;
}
(Token::Comment { ty: _, own_line: _ }, Token::Comment { ty, own_line: _ }) => {
// Force own_line to be true
self.newline()?;
self.write(ty.start_str())?;
self.drain_comment(ty)?;
}
(Token::Comment { ty: _, own_line: _ }, Token::Value { ty, first_char }) => {
self.newline()?;
self.drain_value(ty, *first_char)?;
if self.is_awaiting_key()? {
self.write(":")?;
}
self.toggle_awaiting_key()?;
}
(
Token::Value {
ty: _,
first_char: _,
},
Token::CollectionStart { ty },
) => {
if self.is_awaiting_key()? {
self.comma()?;
self.newline()?;
} else {
self.extra_spacing()?;
}
self.write(ty.start_str())?;
self.toggle_awaiting_key()?;
self.state_stack.push_back(ty.as_state());
}
(
Token::Value {
ty: _,
first_char: _,
},
Token::CollectionEnd { ty },
) => {
self.trailing_comma()?;
self.exit_collection(ty)?;
self.newline()?;
self.write(ty.end_str())?;
}
(
Token::Value {
ty: _,
first_char: _,
},
Token::Comment { ty, own_line },
) => {
if self.is_awaiting_key()? {
self.comma()?;
}
if *own_line {
self.newline()?;
} else {
self.extra_spacing()?;
}
self.write(ty.start_str())?;
self.drain_comment(ty)?;
}
(
Token::Value {
ty: _,
first_char: _,
},
Token::Value { ty, first_char },
) => {
if self.is_awaiting_key()? {
self.comma()?;
self.newline()?;
} else {
// The previous value was an object key, so put a space after the `:`
self.extra_spacing()?;
}
self.drain_value(ty, *first_char)?;
if self.is_awaiting_key()? {
self.write(":")?;
}
self.toggle_awaiting_key()?;
}
(Token::Root, Token::Eof) if self.state_stack.is_empty() => {
// We read the whole file successfully!
return Ok(());
}
(a, b) => {
panic!("Invalid state transition: {a:?} => {b:?}")
}
}
if (matches!(next_token, Token::CollectionEnd { .. }) || next_token == Token::Root)
&& self.state_stack.is_empty()
{
self.record_separator()?;
next_token = Token::Root;
}
self.current_token = next_token;
}
}
/// Search for a token while in [`ParserMode::Normal`]
fn get_next_token(&mut self) -> Result<Token> {
let ret = loop {
let chr = self.next_char();
if Err(true) == chr.as_ref().map_err(Error::is_eof) {
// TODO: If our nested depth is 0, this is just a Root token??
break Ok(Token::Eof);
}
let chr = chr?;
// eprintln!("Got next char: {:?}", chr as char);
break Ok(match chr {
C_CR | C_LF => {
self.num_empty_lines = self.num_empty_lines.saturating_add(1);
continue;
}
C_TAB | C_SPACE => continue,
// C_COMMA => Token::Comma,
C_COLON => continue,
// TODO: Allow unquoted strings?
C_QUOTE => Token::Value {
ty: ValueType::String,
first_char: b'"',
},
// C_BACKSLASH => {}
C_LEFT_BRACE => Token::CollectionStart {
ty: CollectionType::Object,
},
C_LEFT_BRACKET => Token::CollectionStart {
ty: CollectionType::Array,
},
C_RIGHT_BRACE => Token::CollectionEnd {
ty: CollectionType::Object,
},
C_RIGHT_BRACKET => Token::CollectionEnd {
ty: CollectionType::Array,
},
C_SLASH => {
// We can't send comment tokens if using json
let maybe_next_token_ty = match self.next_char()? {
C_SLASH => CommentType::Line,
C_STAR => CommentType::Block,
c => {
eprintln!("{:#?}", self);
eprintln!("X {:?}", (c as char));
break Err(Error::UnexpectedChar(c as char));
}
};
if self.mode.keep_comments() {
Token::Comment {
ty: maybe_next_token_ty,
own_line: self.num_empty_lines > 0,
}
} else {
// We need to drain this comment by reading the buffer
// This function won't write anything in json modes
self.drain_comment(&maybe_next_token_ty)?;
self.num_empty_lines = 0;
continue;
}
}
C_COMMA => continue,
c @ b't' | c @ b'f' => Token::Value {
ty: ValueType::Boolean,
first_char: c,
},
c @ b'n' => Token::Value {
ty: ValueType::Null,
first_char: c,
},
c @ C_PLUS | c @ C_MINUS | c if (c as char).is_ascii_digit() => Token::Value {
ty: ValueType::Number,
first_char: c,
},
c => {
eprintln!("Unexpected char?? {self:#?}");
break Err(Error::UnexpectedChar(c as char));
}
});
};
self.num_empty_lines = 0;
ret
}
/// Check the next char without consuming it
fn peek_next_char(&mut self) -> Result<u8> {
self.input
.fill_buf()?
.first()
.ok_or(Error::BufferEmpty)
.copied()
}
/// Consume the next character from the reader
fn next_char(&mut self) -> Result<u8> {
let mut chr = [0_u8];
self.input.read_exact(&mut chr)?;
Ok(chr[0])
}
/// Returns `true` if we are in an object and the next value is actually an object key
fn is_awaiting_key(&self) -> Result<bool> {
Ok(
match self.state_stack.back().ok_or(Error::UnexpectedValue)? {
CollectionState::Object { awaiting_key } => *awaiting_key,
CollectionState::Array => false,
},
)
}
/// Toggles the `awaiting_key` value. Called after reading a value
///
/// Has no affect if the current collection is an array, so this is safe to call after reading any value or CollectionEnd token
fn toggle_awaiting_key(&mut self) -> Result<()> {
match self.state_stack.back_mut().ok_or(Error::UnexpectedValue)? {
CollectionState::Object { awaiting_key } => *awaiting_key = !*awaiting_key,
CollectionState::Array => {}
}
Ok(())
}
}
/// Gets the position in a buf that a block comment ends
/// ```text
/// /* abc */ def
/// ^
/// ```
fn block_comment_end(buf: &[u8]) -> BlockCommentEnd {
for star_idx in memchr::memchr_iter(C_STAR, buf) {
match buf.get(star_idx + 1) {
Some(&C_SLASH) => {
// We found `*/` at position `star_idx`
return BlockCommentEnd::Position(star_idx + 2);
}
Some(_) => {}
None => {
// We found `*` at the end of the buffer
return BlockCommentEnd::MaybeEnd;
}
}
}
BlockCommentEnd::None
}
/// Gets the position in a buf that the string ends
/// ```text
/// xyzabc": 123,
/// ^
/// ```
/// Note that the `xyzabc` is part of a string, but the start of the string must have come from a previous buffer
fn string_end(buf: &[u8]) -> StringEnd {
let mut n = 0;
loop {
match memchr::memchr2(C_QUOTE, C_BACKSLASH, &buf[n..])
.and_then(|idx| Some((idx, buf.get(idx + n)?)))
{
Some((idx, &C_QUOTE)) => {
n += idx;
return StringEnd::Position(n);
}
Some((idx, &C_BACKSLASH)) => {
n += idx;
// We found a `\` at the end of the buf
if buf.len() == n + 1 {
// The `/` is at the end of `buf`
return StringEnd::MaybeEnd;
} else {
// The end of the string won't be the `\` and the next byte
n += 2;
}
}
Some((idx, chr)) => {
eprintln!("Buf: {:?}", String::from_utf8(buf.to_vec()));
panic!(
"memchr2 returned unexpected result ({} @ {})",
*chr as char,
idx + n
);
}
None => {
// There are no `"` in the string, so we know the rest of the buf is just part of the string
return StringEnd::None;
}
}
}
}
/// Gets the position in a buf that a line comment ends
fn line_comment_end(buf: &[u8]) -> Option<usize> {
memchr::memchr2(C_CR, C_LF, buf)
}
/// A case that a buf ends in a block comment ending `*/`
enum BlockCommentEnd {
/// The block comment ended at this position
Position(usize),
/// The buffer did not have any `*/`, but it ended in a `*`
MaybeEnd,
/// The block comment does not end in this buf
None,
}
/// A case that a buf ends in a string ending `"` that was not escaped by `\`
#[derive(PartialEq, Eq, Debug)]
enum StringEnd {
/// The string ended at this position
Position(usize),
/// The buffer did not have any unescaped `"`, but it ended in a `*`
MaybeEnd,
/// The string does not end in this buf
None,
}
#[cfg(test)]
mod tests {
use std::io::{BufReader, BufWriter};
use super::*;
fn format_to_string(input: &[u8], mode: Mode) -> String {
let mut output = vec![];
Parser::new(
mode,
BufReader::new(input),
&mut BufWriter::new(&mut output),
)
.format_buf()
.unwrap();
String::from_utf8(output).unwrap()
}
#[test]
fn test_string_end() {
assert_eq!(string_end(br#"ABC"#), StringEnd::None);
assert_eq!(string_end(br#"ABC\"#), StringEnd::MaybeEnd);
assert_eq!(string_end(br#"ABC""#), StringEnd::Position(3));
}
#[test]
fn test_formatting() {
let x = r#"[]
{}
[]
{
"a": "b"
}
{"a":"b"}
{
"a": "b",
}
{"a":"b",}
[]
"#;
eprintln!("{}", format_to_string(x.as_bytes(), Mode::Jsoncc));
assert_eq!(
format_to_string(x.as_bytes(), Mode::Jsoncc),
r#"[]
{}
[]
{
"a": "b",
}
{
"a": "b",
}
{
"a": "b",
}
{
"a": "b",
}
[]
"#
);
assert_eq!(
format_to_string(x.as_bytes(), Mode::Json),
r#"[]
{}
[]
{
"a": "b"
}
{
"a": "b"
}
{
"a": "b"
}
{
"a": "b"
}
[]
"#
);
assert_eq!(
format_to_string(x.as_bytes(), Mode::CompactJson),
r#"[]
{}
[]
{"a":"b"}
{"a":"b"}
{"a":"b"}
{"a":"b"}
[]
"#
);
}
#[test]
fn test_formatting_comments() {
let x = r#"[]
{
/*1*/
}
[
/*2*/
]
{
//
"a": "b",
}
{
//
"a": "b",
}
{
/*1*/
"a": "b", /*2*/
/*3*/
"c":"d",
/*4*/ "e":"f"/*5*/, /*6*/
}
{/*w*/
/*x*/
"a"/*y*/:/*z*/"b",/*a*/
}
[]"#;
eprintln!("{}", format_to_string(x.as_bytes(), Mode::Json));
assert_eq!(
format_to_string(x.as_bytes(), Mode::Jsoncc),
r#"[]
{
/*1*/
}
[
/*2*/
]
{
//
"a": "b",
}
{
//
"a": "b",
}
{
/*1*/
"a": "b", /*2*/
/*3*/
"c": "d",
/*4*/
"e": "f", /*5*/
/*6*/
}
{
/*w*/
/*x*/
"a": /*y*/
/*z*/
"b", /*a*/
}
[]
"#
);
assert_eq!(
format_to_string(x.as_bytes(), Mode::CompactJson),
r#"[]
{}
[]
{"a":"b"}
{"a":"b"}
{"a":"b","c":"d","e":"f"}
{"a":"b"}
[]
"#
);
assert_eq!(
format_to_string(x.as_bytes(), Mode::Json),
r#"[]
{}
[]
{
"a": "b"
}
{
"a": "b"
}
{
"a": "b",
"c": "d",
"e": "f"
}
{
"a": "b"
}
[]
"#
);
}
// static G: AtomicUsize = AtomicUsize::new(0);
// fn fork(i: &str) -> Vec<String> {
// let a = i.replacen(
// "_",
// &format!("/*{}*/", G.fetch_add(1, Ordering::Relaxed)),
// 1,
// );
// let b = i.replacen("_", "", 1);
// if a.contains("_") {
// let mut ret = fork(&a);
// ret.append(&mut fork(&b));
// ret
// } else {
// vec![a, b]
// }
// }
}