#![feature(slice_group_by)] #![allow(unused_imports)] use csv::ReaderBuilder; use serde::{Deserialize, Deserializer, Serialize}; use std::collections::HashMap; use std::env; use std::fs::File; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::path::Path; fn main() { let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs"); let mut file = BufWriter::new(File::create(path).unwrap()); let wordlist_path = "../wordlist/99-wordlist.csv"; println!("cargo:rerun-if-changed={}", wordlist_path); let rdr_builder = ReaderBuilder::new(); // First get the actual wordlist let words: Vec = rdr_builder .from_reader(File::open(wordlist_path).unwrap()) .deserialize() .collect::, _>>() .unwrap(); // Write it to an array containing all words write_words(&mut file, &words); // Make a mapping of all caps word to a reference to the `Word` entry write_word_map(&mut file, &words); // Make a mapping of numbers to `Word`s write_number_to_words(&mut file, &words); } fn write_words(mut file: impl Write, words: &[Word]) { let len = words.len(); writeln!( &mut file, r#"/// Static array of `Word` pub static WORDS: [Word; {len}] = ["# ) .unwrap(); for result in words.iter() { writeln!(&mut file, "\t{result:?},").unwrap(); } writeln!(&mut file, "];\n").unwrap(); } fn write_word_map(mut file: impl Write, words: &[Word]) { let mut word_map = phf_codegen::Map::new(); for (idx, word) in words.iter().enumerate() { let idx_str = format!("&WORDS[{idx}]"); word_map.entry(word.word.to_uppercase(), &idx_str); } writeln!( &mut file, r#"/// Mapping from all caps `&str` to `&'static Word` pub static WORD_MAP: phf::Map<&'static str, &Word> = {};"#, word_map.build() ) .unwrap(); } fn write_number_to_words(mut file: impl Write, words: &[Word]) { let word_number_to_idx = words .iter() .enumerate() .map(|(idx, w)| (w.number, idx)) .collect::>(); writeln!( &mut file, // "pub static NUMBER_TO_WORDS: &[&[usize]] = &[" r#"/// Mapping from each number to its associated word pub static NUMBER_TO_WORDS: &[&[&Word]] = &["# ) .unwrap(); for entry in word_number_to_idx .as_slice() .group_by(|(number1, _idx1), (number2, _idx2)| number1 == number2) { write!(&mut file, "\t&[",).unwrap(); for idx in entry.iter().map(|(_w, idx)| idx) { // write!(&mut file, "{idx},").unwrap(); write!(&mut file, "&WORDS[{idx}],").unwrap(); } writeln!(&mut file, "], /* {} */", entry[0].0).unwrap(); } writeln!(&mut file, "];\n").unwrap(); } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Word { /// The word itself pub word: String, /// The binary representation of this number /// /// The words are responsible for 13 bits of data, so this is fine to fit in a u16 pub number: u16, }