2023-02-11 17:04:16 -05:00
|
|
|
#![feature(slice_group_by)]
|
|
|
|
#![allow(unused_imports)]
|
|
|
|
use csv::ReaderBuilder;
|
|
|
|
use serde::{Deserialize, Deserializer, Serialize};
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::env;
|
|
|
|
use std::fs::File;
|
|
|
|
use std::io::{BufRead, BufReader, BufWriter, Write};
|
|
|
|
use std::path::Path;
|
|
|
|
|
|
|
|
fn main() {
|
|
|
|
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs");
|
|
|
|
let mut file = BufWriter::new(File::create(path).unwrap());
|
2023-02-15 22:57:30 -05:00
|
|
|
let wordlist_path = "../data/wordlist-tmp.csv";
|
|
|
|
|
|
|
|
println!("cargo:rerun-if-changed={}", wordlist_path);
|
2023-02-11 17:04:16 -05:00
|
|
|
|
|
|
|
let rdr_builder = ReaderBuilder::new();
|
|
|
|
|
|
|
|
// First get the actual wordlist
|
|
|
|
let words: Vec<Word> = rdr_builder
|
2023-02-15 22:57:30 -05:00
|
|
|
.from_reader(File::open(wordlist_path).unwrap())
|
2023-02-11 17:04:16 -05:00
|
|
|
.deserialize()
|
|
|
|
.collect::<Result<Vec<Word>, _>>()
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
// Write it to an array containing all words
|
2023-02-15 19:16:57 -05:00
|
|
|
write_words(&mut file, &words);
|
|
|
|
|
|
|
|
// Make a mapping of all caps word to a reference to the `Word` entry
|
|
|
|
write_word_map(&mut file, &words);
|
|
|
|
|
|
|
|
// Make a mapping of numbers to `Word`s
|
|
|
|
write_number_to_words(&mut file, &words);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn write_words(mut file: impl Write, words: &[Word]) {
|
2023-02-15 22:57:30 -05:00
|
|
|
writeln!(
|
|
|
|
&mut file,
|
|
|
|
r#"/// Static array of `Word`
|
2023-02-11 17:20:11 -05:00
|
|
|
pub const WORDS: &[Word] = &["#
|
2023-02-15 22:57:30 -05:00
|
|
|
)
|
|
|
|
.unwrap();
|
2023-02-11 17:04:16 -05:00
|
|
|
|
2023-02-15 22:57:30 -05:00
|
|
|
for result in words.iter() {
|
|
|
|
writeln!(&mut file, "\t{result:?},").unwrap();
|
2023-02-15 19:16:57 -05:00
|
|
|
}
|
2023-02-15 22:57:30 -05:00
|
|
|
writeln!(&mut file, "];\n").unwrap();
|
|
|
|
}
|
2023-02-11 17:04:16 -05:00
|
|
|
|
2023-02-15 19:16:57 -05:00
|
|
|
fn write_word_map(mut file: impl Write, words: &[Word]) {
|
2023-02-15 22:57:30 -05:00
|
|
|
let mut word_map = phf_codegen::Map::new();
|
|
|
|
for (idx, word) in words.iter().enumerate() {
|
|
|
|
let idx_str = format!("&WORDS[{idx}]");
|
|
|
|
word_map.entry(word.word.to_uppercase(), &idx_str);
|
|
|
|
}
|
|
|
|
writeln!(
|
|
|
|
&mut file,
|
|
|
|
r#"/// Mapping from all caps `&str` to `&'static Word`
|
2023-02-11 17:04:16 -05:00
|
|
|
pub static WORD_MAP: phf::Map<&'static str, &'static Word> =
|
|
|
|
{};"#,
|
2023-02-15 22:57:30 -05:00
|
|
|
word_map.build()
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
}
|
2023-02-11 17:04:16 -05:00
|
|
|
|
2023-02-15 19:16:57 -05:00
|
|
|
fn write_number_to_words(mut file: impl Write, words: &[Word]) {
|
2023-02-15 22:57:30 -05:00
|
|
|
let word_number_to_idx = words
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.map(|(idx, w)| (w.number, idx))
|
|
|
|
.collect::<Vec<(u16, usize)>>();
|
2023-02-15 19:16:57 -05:00
|
|
|
|
2023-02-15 22:57:30 -05:00
|
|
|
writeln!(
|
|
|
|
&mut file,
|
|
|
|
// "pub const NUMBER_TO_WORDS: &[&[usize]] = &["
|
|
|
|
"pub const NUMBER_TO_WORDS: &[&[&'static Word]] = &["
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
for entry in word_number_to_idx
|
|
|
|
.as_slice()
|
|
|
|
.group_by(|(number1, _idx1), (number2, _idx2)| number1 == number2)
|
|
|
|
{
|
|
|
|
write!(&mut file, "\t&[",).unwrap();
|
|
|
|
|
|
|
|
for idx in entry.iter().map(|(_w, idx)| idx) {
|
|
|
|
// write!(&mut file, "{idx},").unwrap();
|
|
|
|
write!(&mut file, "&WORDS[{idx}],").unwrap();
|
2023-02-11 17:04:16 -05:00
|
|
|
}
|
2023-02-15 22:57:30 -05:00
|
|
|
writeln!(&mut file, "],").unwrap();
|
2023-02-11 17:04:16 -05:00
|
|
|
}
|
2023-02-15 22:57:30 -05:00
|
|
|
writeln!(&mut file, "];\n").unwrap();
|
|
|
|
}
|
2023-02-11 17:04:16 -05:00
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
|
pub struct Word {
|
|
|
|
/// The word itself
|
|
|
|
pub word: String,
|
|
|
|
|
|
|
|
/// The binary representation of this number
|
|
|
|
///
|
|
|
|
/// The words are responsible for 13 bits of data, so this is fine to fit in a u16
|
|
|
|
pub number: u16,
|
|
|
|
}
|