From db83a68923aa6f839a54ca260bd4cc56aef6b9db Mon Sep 17 00:00:00 2001 From: Austen Adler Date: Mon, 6 Mar 2023 22:57:07 -0500 Subject: [PATCH] Update to use four digits and 4k wordlist size --- docs/ALGORITHM.adoc | 16 ++++++++-------- justfile | 17 ++++++++++++----- src/conversions.rs | 3 +++ src/lib.rs | 23 ++++++++++++----------- src/v0.rs | 32 ++++++++++++-------------------- wordlist/03-exclude.csv | 9 +++++---- words/build.rs | 2 ++ 7 files changed, 54 insertions(+), 48 deletions(-) diff --git a/docs/ALGORITHM.adoc b/docs/ALGORITHM.adoc index b8a3900..de5dd6f 100644 --- a/docs/ALGORITHM.adoc +++ b/docs/ALGORITHM.adoc @@ -19,14 +19,14 @@ If you want to see the steps to get to this definition, go to link:DESIGN.html[D [source,title='xpin and S2 CellID Format'] ---- === xpin Format === -WORD2 (13 bits) : vvvvvvvvvvvvv -WORD1 (13 bits) : | |vvv vvvvvvvvvv -WORD0 (13 bits) : | | |vvvvvv vvvvvvv -0000 (10 bits) : | | | |vvvvvvvvv v -Not represented : | | | | | - : | | | | | -Bit : 63 51 48 38 32 25 16| 0 - : | | | | | | | | | +WORD2 (12 bits) [52, 63] : vvvvvvvvvvvv +WORD1 (12 bits) [40, 51] : | |vvvv vvvvvvvv +WORD0 (12 bits) [28, 39] : | | |vvvvvvvv vvvv +0000 (13 bits) [15, 27] : | | | |vvvvvvvvvvvv v +Not represented : | | | | | + : | | | | | +Bit : 63 52 48 40 32 28 16| 0 + : | | | | | | | | | : 0100101110101000 1011100010010011 1001001100100100 1100000000000000 === S2 CellID Format === | | || | Face number : ^^^ || | diff --git a/justfile b/justfile index 5fd0364..7381d55 100644 --- a/justfile +++ b/justfile @@ -1,6 +1,6 @@ -build: fmt rust-build build-wasm js-build +build: fmt rust-build wasm-build js-build -all: clean fmt build build-docs # rust-test +all: clean fmt build docs-build # rust-test rsync -ha ./web-frontend/build/ build/ du -shc build/* | sort -h @@ -11,16 +11,21 @@ rust-test: rust-build: cargo build --all -build-wasm: +wasm-build: wasm-pack build --target web xpin-wasm -js-build: build-wasm +wordlist-build: + . wordlist/venv/bin/activate && cd wordlist && for i in *.py; do "./${i}"; done + +js-build: wasm-build docs-build + # mkdir -p ./web-frontend/static/docs/ + # rsync -ha ./build/docs/ ./web-frontend/static/docs/ yarn --cwd ./web-frontend/ build clean: rm -vrf build -build-docs: +docs-build: earthly +docs cargo doc --all @@ -43,3 +48,5 @@ init: yarn --cwd ./web-frontend/ cargo fetch cargo install wasm-pack + if [ ! -d "wordlist/venv" ]; then python3 -m virtualenv wordlist/venv -p "$(which python3)"; fi; export OSTYPE=linux-gnu && . wordlist/venv/bin/activate + export OSTYPE=linux-gnu && . wordlist/venv/bin/activate && pip install -r wordlist/requirements.txt && python -c 'import nltk; nltk.download('wordnet')' diff --git a/src/conversions.rs b/src/conversions.rs index e6d214e..e53043e 100644 --- a/src/conversions.rs +++ b/src/conversions.rs @@ -2,6 +2,9 @@ use std::ops::RangeInclusive; use s2::{cellid::CellID, latlng::LatLng}; +pub(crate) const TWELVE_BITS: u64 = 0b1111_1111_1111; +pub(crate) const TEN_BITS: u64 = 0b1111_1111_11; + use crate::Error; pub fn lat_lon_to_cellid(lat: f64, lon: f64) -> Result { diff --git a/src/lib.rs b/src/lib.rs index e67d286..fa1b56a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,9 +23,9 @@ pub type Number = u32; pub const CELLID_LEVEL: u64 = 23; /// The maximum number value for V0 -pub const V0_MAX_NUMBER: u32 = 1024; +pub const V0_MAX_NUMBER: u32 = 9120; /// The minimum number value for V0 -pub const V0_MIN_NUMBER: u32 = 1; +pub const V0_MIN_NUMBER: u32 = 1024; // Any encoding or decoding error #[derive(Error, Debug, Eq, PartialEq)] @@ -186,9 +186,10 @@ impl Address<'_> { } fn from_components(number: Number, other_components: &[&str]) -> Result { - match extract_version(number)? { - Version::V0 => Self::parse_v0(number, other_components), - } + // match extract_version(number)? { + // Version::V0 => Self::parse_v0(number, other_components), + // } + Self::parse_v0(number, other_components) } /// Get the address as a [`CellID`] value @@ -211,12 +212,12 @@ impl Address<'_> { /// /// The version number is set by the two bits 11 and 12 // TODO: impl TryFrom ? -const fn extract_version(number: Number) -> Result { - match ((number >> 10) & 0b11) as u8 { - 0 => Ok(Version::V0), - v => Err(Error::UnimplementedVersion(v)), - } -} +// const fn extract_version(number: Number) -> Result { +// match ((number >> 10) & 0b11) as u8 { +// 0 => Ok(Version::V0), +// v => Err(Error::UnimplementedVersion(v)), +// } +// } impl Display for Address<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/src/v0.rs b/src/v0.rs index bdff407..923928d 100644 --- a/src/v0.rs +++ b/src/v0.rs @@ -2,6 +2,8 @@ use crate::{conversions, Address, Version, CELLID_LEVEL}; use std::ops::{Add, RangeInclusive}; use words::NUMBER_TO_WORDS; +use conversions::TEN_BITS; +use conversions::TWELVE_BITS; use s2::{cell::Cell, cellid::CellID}; pub struct UnpackedCellID { @@ -21,10 +23,10 @@ impl From for UnpackedCellID { impl From for UnpackedCellID { fn from(cellid: u64) -> Self { Self { - number_bits: conversions::extract_binary(cellid, 15..=24) as u16, - word0_bits: conversions::extract_binary(cellid, 25..=37) as u16, - word1_bits: conversions::extract_binary(cellid, 38..=50) as u16, - word2_bits: conversions::extract_binary(cellid, 51..=63) as u16, + number_bits: conversions::extract_binary(cellid, 15..=27) as u16, + word0_bits: conversions::extract_binary(cellid, 28..=39) as u16, + word1_bits: conversions::extract_binary(cellid, 40..=51) as u16, + word2_bits: conversions::extract_binary(cellid, 52..=63) as u16, } } } @@ -36,7 +38,7 @@ impl From for Address<'_> { let word2 = words::NUMBER_TO_WORDS[unpacked_cellid.word2_bits as usize][0]; Self { - number: u32::from(unpacked_cellid.number_bits), + number: u32::from(unpacked_cellid.number_bits) + 1024, words: [word0, word1, word2], version: Version::V0, } @@ -46,7 +48,7 @@ impl From for Address<'_> { impl From<&Address<'_>> for UnpackedCellID { fn from(value: &Address) -> Self { Self { - number_bits: value.number as u16, + number_bits: value.number as u16 - 1024, word0_bits: value.words[0].number, word1_bits: value.words[1].number, word2_bits: value.words[2].number, @@ -57,17 +59,15 @@ impl From<&Address<'_>> for UnpackedCellID { #[allow(clippy::use_self)] impl From for u64 { fn from(value: UnpackedCellID) -> Self { - let ten_bits = 0b1111_1111_11; - let thirteen_bits = 0b1_1111_1111_1111; let mut ret = 0b0; // Add words in reverse order - ret = (ret << 13) | (u64::from(value.word2_bits) & thirteen_bits); - ret = (ret << 13) | (u64::from(value.word1_bits) & thirteen_bits); - ret = (ret << 13) | (u64::from(value.word0_bits) & thirteen_bits); + ret = (ret << 12) | (u64::from(value.word2_bits) & TWELVE_BITS); + ret = (ret << 12) | (u64::from(value.word1_bits) & TWELVE_BITS); + ret = (ret << 12) | (u64::from(value.word0_bits) & TWELVE_BITS); // Add the number - ret = (ret << 10) | (u64::from(value.number_bits) & ten_bits); + ret = (ret << 13) | (u64::from(value.number_bits) & TEN_BITS); // Add the final bit ret = (ret << 1) | 0b1; @@ -84,11 +84,3 @@ impl From for CellID { Self(value.into()) } } - -// impl>> From for UnpackedCellID { -// fn from(addr: A) -> Self { -// let number_bits = addr. - -// CellID(ret) -// } -// } diff --git a/wordlist/03-exclude.csv b/wordlist/03-exclude.csv index 6638d0d..03421d2 100644 --- a/wordlist/03-exclude.csv +++ b/wordlist/03-exclude.csv @@ -79,6 +79,7 @@ ALBERTA ALBION ALBUMIN ALBUQUERQUE +ALCOHOL ALCOHOLISM ALDER ALDERMAN @@ -382,6 +383,8 @@ BLEED BLEEDING BLEST BLEW +BLOWING +BLUE BLUNT BO BOB @@ -880,6 +883,7 @@ DENMARK DENNIS DENT DENVER +DEPRESSION DEPRESSIVE DERBY DERBYSHIRE @@ -1120,6 +1124,7 @@ FAHRENHEIT FAIL FAILING FAILURE +FAINT FAIRBANKS FAIRFAX FAKE @@ -3333,7 +3338,3 @@ ZOOLOGICAL ZOOLOGY ZULU ZURICH -ALCOHOL -DEPRESSION -FAINT -BLOWING diff --git a/words/build.rs b/words/build.rs index 6bf9020..ad8d1b5 100644 --- a/words/build.rs +++ b/words/build.rs @@ -98,10 +98,12 @@ pub static NUMBER_TO_WORDS: &[&[&Word]] = &["# #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Word { /// The word itself + #[serde(rename = "WORD")] pub word: String, /// The binary representation of this number /// /// The words are responsible for 13 bits of data, so this is fine to fit in a u16 + #[serde(rename = "NUMBER")] pub number: u16, }