Update wordlist

This commit is contained in:
Austen Adler 2023-03-06 20:28:48 -05:00
parent 4706aa2c70
commit d834ca331f
5 changed files with 60 additions and 4 deletions

View File

@ -1,5 +1,5 @@
// echo README.adoc | entr sh -c "podman run --rm -it --network none -v "${PWD}:/documents/" asciidoctor/docker-asciidoctor asciidoctor -r asciidoctor-mathematical -a mathematical-format=svg README.adoc; printf 'Done ($(date -Isecond))\n'" // echo README.adoc | entr sh -c "podman run --rm -it --network none -v "${PWD}:/documents/" asciidoctor/docker-asciidoctor asciidoctor -r asciidoctor-mathematical -a mathematical-format=svg README.adoc; printf 'Done ($(date -Isecond))\n'"
:toc: // :toc:
:nofooter: :nofooter:
:!webfonts: :!webfonts:
:source-highlighter: rouge :source-highlighter: rouge
@ -7,3 +7,54 @@
:sectlinks: :sectlinks:
= xpin = xpin
== File Structure
In order of dependency, the role of each of these directories in this repository is:
[cols="m,,"]
|===
|Directory |Contents |Description
|docs
|Asciidoctor documentation
|Design decisions and algorithm definition documentation
|wordlist
|Python/CSV
|Wordlist sources and generator code
|test-data
|C++/CSV
|Test data mapping random and hand-picked lat/lon transformed to CellIDs and xpin addresses.
Also contains source code linking to the C++ S2 library to ensure CellIDs are translated properly.
|words
|Rust Crate
|Crate to store the mapping of words and their associated numeric values.
|src
|Rust Crate
|
|tests
|
|
|xpin-wasm
|Rust Crate
|WASM bindings for xpin
|web-frontend
|
|
|build
|
|
|web
|Rust Crate
|
|===

View File

@ -555,6 +555,7 @@ CASTILE
CASTRO CASTRO
CATALOGING CATALOGING
CATALOGUING CATALOGUING
CATARACT
CATASTROPHE CATASTROPHE
CATECHISM CATECHISM
CATHARINE CATHARINE
@ -1472,6 +1473,7 @@ HOSTAGE
HOSTILITY HOSTILITY
HOT HOT
HOUGH HOUGH
HOUSEWIFE
HOUSTON HOUSTON
HOWARD HOWARD
HP HP
@ -2141,7 +2143,6 @@ MYSORE
NAD NAD
NAIROBI NAIROBI
NAKED NAKED
NAKED
NAMIBIA NAMIBIA
NAN NAN
NANCY NANCY
@ -2676,6 +2677,7 @@ RUSSIA
RUSSIAN RUSSIAN
RUTH RUTH
RUTHERFORD RUTHERFORD
RUTHLESS
RUTLAND RUTLAND
RWANDA RWANDA
SABBATH SABBATH

1 WORD
555 CASTRO
556 CATALOGING
557 CATALOGUING
558 CATARACT
559 CATASTROPHE
560 CATECHISM
561 CATHARINE
1473 HOSTILITY
1474 HOT
1475 HOUGH
1476 HOUSEWIFE
1477 HOUSTON
1478 HOWARD
1479 HP
2143 NAD
2144 NAIROBI
2145 NAKED
NAKED
2146 NAMIBIA
2147 NAN
2148 NANCY
2677 RUSSIAN
2678 RUTH
2679 RUTHERFORD
2680 RUTHLESS
2681 RUTLAND
2682 RWANDA
2683 SABBATH

View File

@ -114,7 +114,9 @@ pprint(list(enumerate(final_wordlist)))
print(f"Ending index: {ending_word_index}") print(f"Ending index: {ending_word_index}")
final_wordlist = [ final_wordlist = [
(idx + 1, word) # The idx here starts at 0, which is fine
# It indicates that a *word* component can map to 0 (not that the numeric component can)
(idx, word)
for idx, words in enumerate(final_wordlist) for idx, words in enumerate(final_wordlist)
for word in words for word in words
] ]

1
wordlist/99-wordlist.csv Symbolic link
View File

@ -0,0 +1 @@
04-deduplicated-words.py
1 04-deduplicated-words.py

View File

@ -11,7 +11,7 @@ use std::path::Path;
fn main() { fn main() {
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs"); let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs");
let mut file = BufWriter::new(File::create(path).unwrap()); let mut file = BufWriter::new(File::create(path).unwrap());
let wordlist_path = "../data/wordlist-tmp.csv"; let wordlist_path = "../wordlist/99-wordlist.csv";
println!("cargo:rerun-if-changed={}", wordlist_path); println!("cargo:rerun-if-changed={}", wordlist_path);