From 9f03f43abfd7f56aadadb8aced31fb782390f657 Mon Sep 17 00:00:00 2001 From: Austen Adler Date: Tue, 18 Apr 2023 14:55:02 -0400 Subject: [PATCH] Start working on wide addresses --- Cargo.lock | 1 + Cargo.toml | 1 + src/lib.rs | 1 + src/wide.rs | 148 +++++++++++++++++++++++++++++++++++++++++++++++ words/src/lib.rs | 2 +- 5 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 src/wide.rs diff --git a/Cargo.lock b/Cargo.lock index cd80b03..aeac209 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2081,6 +2081,7 @@ version = "0.1.0" dependencies = [ "csv", "geoutils", + "nom", "s2", "serde", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 29734f0..ba25035 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ default = [] s2 = {version="0.0.12", default-features=false} thiserror = "1.0.38" words = {path="./words"} +nom = "*" [dev-dependencies] csv = "1.1" diff --git a/src/lib.rs b/src/lib.rs index ad242c5..0ce58f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ #![allow(clippy::module_name_repetitions)] pub mod v0; +pub mod wide; use conversions::lat_lon_to_cellid; pub use s2::s1::angle::Angle; use std::{ diff --git a/src/wide.rs b/src/wide.rs new file mode 100644 index 0000000..4992a5b --- /dev/null +++ b/src/wide.rs @@ -0,0 +1,148 @@ +use std::{fmt::Display, str::FromStr}; + +use nom::{ + branch::alt, + bytes::complete::tag_no_case, + character::complete::{self, alpha1, space0}, + combinator::{eof, map, map_opt}, + complete::tag, + multi::many1, + sequence::{pair, tuple}, + IResult, +}; +use words::Word; + +use crate::{Address, Error, Number}; + +// TODO: Remove the english pronounciations of this separator from the wordlist +pub const SEPARATOR: &str = "AND"; +pub const SEPARATOR_CHAR: &str = "&"; + +pub struct Addresses<'a> { + addresses: Vec>, +} + +impl<'a> Addresses<'a> { + // fn to_compact_tokens(&self) -> impl Iterator> { + + // } +} + +impl<'a> From>> for Addresses<'a> { + fn from(addresses: Vec>) -> Self { + Self { addresses } + } +} + +impl Display for Addresses<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl FromStr for Addresses<'static> { + type Err = Error; + + fn from_str(s: &str) -> Result { + todo!() + } +} + +#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)] +enum Token<'a> { + Number(Number), + Word(&'a Word<'a>), + WideSeparator, +} + +impl Token<'static> { + fn parse_stream(i: &str) -> IResult<&str, Vec> { + tuple(( + space0, + many1(map(pair(Self::parse, space0), |(t, _)| t)), + eof, + ))(i) + .map(|(_i, (_space, tokens, _eof))| ("", tokens)) + } + + fn parse(i: &str) -> IResult<&str, Self> { + alt(( + map_opt(alpha1, |maybe_word| { + words::get_word(maybe_word).map(Self::Word) + }), + map(complete::u32, Self::Number), + map( + alt((tag_no_case(SEPARATOR), tag_no_case(SEPARATOR_CHAR))), + |_| Self::WideSeparator, + ), + ))(i) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_token() { + assert_eq!(Token::parse("&"), Ok(("", Token::WideSeparator))); + assert_eq!(Token::parse("AnD"), Ok(("", Token::WideSeparator))); + assert_eq!(Token::parse("AnD "), Ok((" ", Token::WideSeparator))); + assert_eq!(Token::parse("AnD&& "), Ok(("&& ", Token::WideSeparator))); + + assert_eq!(Token::parse("1234&&"), Ok(("&&", Token::Number(1234)))); + + assert_eq!( + Token::parse("clarify"), + Ok(("", Token::Word(words::get_word("clarify").unwrap()))) + ); + + assert!(Token::parse("").is_err()); + } + + #[test] + fn test_parse_token_stream() { + assert_eq!( + Token::parse_stream(" & "), + Ok(("", vec![Token::WideSeparator])) + ); + assert_eq!( + Token::parse_stream("AnD"), + Ok(("", vec![Token::WideSeparator,])) + ); + assert_eq!( + Token::parse_stream("AnD "), + Ok(("", vec![Token::WideSeparator])) + ); + assert_eq!( + Token::parse_stream("AnD&& "), + Ok(( + "", + vec![ + Token::WideSeparator, + Token::WideSeparator, + Token::WideSeparator, + ] + )) + ); + + assert_eq!( + Token::parse_stream("1234&&"), + Ok(( + "", + vec![ + Token::Number(1234), + Token::WideSeparator, + Token::WideSeparator, + ] + )) + ); + + assert_eq!( + Token::parse_stream("clarify"), + Ok(("", vec![Token::Word(words::get_word("clarify").unwrap())])) + ); + + assert!(Token::parse_stream("clarify _").is_err()); + } +} diff --git a/words/src/lib.rs b/words/src/lib.rs index 3f2dcd3..97e2992 100644 --- a/words/src/lib.rs +++ b/words/src/lib.rs @@ -2,7 +2,7 @@ use std::fmt::Display; include!(concat!(env!("OUT_DIR"), "/codegen.rs")); -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] /// A word struct pub struct Word<'a> { /// The word itself