Start working on wide addresses

This commit is contained in:
Austen Adler 2023-04-18 14:55:02 -04:00
parent 7688c54fb8
commit 9f03f43abf
5 changed files with 152 additions and 1 deletions

1
Cargo.lock generated
View File

@ -2081,6 +2081,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"csv", "csv",
"geoutils", "geoutils",
"nom",
"s2", "s2",
"serde", "serde",
"thiserror", "thiserror",

View File

@ -21,6 +21,7 @@ default = []
s2 = {version="0.0.12", default-features=false} s2 = {version="0.0.12", default-features=false}
thiserror = "1.0.38" thiserror = "1.0.38"
words = {path="./words"} words = {path="./words"}
nom = "*"
[dev-dependencies] [dev-dependencies]
csv = "1.1" csv = "1.1"

View File

@ -4,6 +4,7 @@
#![allow(clippy::module_name_repetitions)] #![allow(clippy::module_name_repetitions)]
pub mod v0; pub mod v0;
pub mod wide;
use conversions::lat_lon_to_cellid; use conversions::lat_lon_to_cellid;
pub use s2::s1::angle::Angle; pub use s2::s1::angle::Angle;
use std::{ use std::{

148
src/wide.rs Normal file
View File

@ -0,0 +1,148 @@
use std::{fmt::Display, str::FromStr};
use nom::{
branch::alt,
bytes::complete::tag_no_case,
character::complete::{self, alpha1, space0},
combinator::{eof, map, map_opt},
complete::tag,
multi::many1,
sequence::{pair, tuple},
IResult,
};
use words::Word;
use crate::{Address, Error, Number};
// TODO: Remove the english pronounciations of this separator from the wordlist
pub const SEPARATOR: &str = "AND";
pub const SEPARATOR_CHAR: &str = "&";
pub struct Addresses<'a> {
addresses: Vec<Address<'a>>,
}
impl<'a> Addresses<'a> {
// fn to_compact_tokens(&self) -> impl Iterator<Item=Token<'a>> {
// }
}
impl<'a> From<Vec<Address<'a>>> for Addresses<'a> {
fn from(addresses: Vec<Address<'a>>) -> Self {
Self { addresses }
}
}
impl Display for Addresses<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
todo!()
}
}
impl FromStr for Addresses<'static> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
todo!()
}
}
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord)]
enum Token<'a> {
Number(Number),
Word(&'a Word<'a>),
WideSeparator,
}
impl Token<'static> {
fn parse_stream(i: &str) -> IResult<&str, Vec<Self>> {
tuple((
space0,
many1(map(pair(Self::parse, space0), |(t, _)| t)),
eof,
))(i)
.map(|(_i, (_space, tokens, _eof))| ("", tokens))
}
fn parse(i: &str) -> IResult<&str, Self> {
alt((
map_opt(alpha1, |maybe_word| {
words::get_word(maybe_word).map(Self::Word)
}),
map(complete::u32, Self::Number),
map(
alt((tag_no_case(SEPARATOR), tag_no_case(SEPARATOR_CHAR))),
|_| Self::WideSeparator,
),
))(i)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_token() {
assert_eq!(Token::parse("&"), Ok(("", Token::WideSeparator)));
assert_eq!(Token::parse("AnD"), Ok(("", Token::WideSeparator)));
assert_eq!(Token::parse("AnD "), Ok((" ", Token::WideSeparator)));
assert_eq!(Token::parse("AnD&& "), Ok(("&& ", Token::WideSeparator)));
assert_eq!(Token::parse("1234&&"), Ok(("&&", Token::Number(1234))));
assert_eq!(
Token::parse("clarify"),
Ok(("", Token::Word(words::get_word("clarify").unwrap())))
);
assert!(Token::parse("").is_err());
}
#[test]
fn test_parse_token_stream() {
assert_eq!(
Token::parse_stream(" & "),
Ok(("", vec![Token::WideSeparator]))
);
assert_eq!(
Token::parse_stream("AnD"),
Ok(("", vec![Token::WideSeparator,]))
);
assert_eq!(
Token::parse_stream("AnD "),
Ok(("", vec![Token::WideSeparator]))
);
assert_eq!(
Token::parse_stream("AnD&& "),
Ok((
"",
vec![
Token::WideSeparator,
Token::WideSeparator,
Token::WideSeparator,
]
))
);
assert_eq!(
Token::parse_stream("1234&&"),
Ok((
"",
vec![
Token::Number(1234),
Token::WideSeparator,
Token::WideSeparator,
]
))
);
assert_eq!(
Token::parse_stream("clarify"),
Ok(("", vec![Token::Word(words::get_word("clarify").unwrap())]))
);
assert!(Token::parse_stream("clarify _").is_err());
}
}

View File

@ -2,7 +2,7 @@ use std::fmt::Display;
include!(concat!(env!("OUT_DIR"), "/codegen.rs")); include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
/// A word struct /// A word struct
pub struct Word<'a> { pub struct Word<'a> {
/// The word itself /// The word itself