this_algorithm/words/build.rs
2023-02-15 22:57:30 -05:00

106 lines
3.0 KiB
Rust

#![feature(slice_group_by)]
#![allow(unused_imports)]
use csv::ReaderBuilder;
use serde::{Deserialize, Deserializer, Serialize};
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;
fn main() {
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs");
let mut file = BufWriter::new(File::create(path).unwrap());
let wordlist_path = "../data/wordlist-tmp.csv";
println!("cargo:rerun-if-changed={}", wordlist_path);
let rdr_builder = ReaderBuilder::new();
// First get the actual wordlist
let words: Vec<Word> = rdr_builder
.from_reader(File::open(wordlist_path).unwrap())
.deserialize()
.collect::<Result<Vec<Word>, _>>()
.unwrap();
// Write it to an array containing all words
write_words(&mut file, &words);
// Make a mapping of all caps word to a reference to the `Word` entry
write_word_map(&mut file, &words);
// Make a mapping of numbers to `Word`s
write_number_to_words(&mut file, &words);
}
fn write_words(mut file: impl Write, words: &[Word]) {
writeln!(
&mut file,
r#"/// Static array of `Word`
pub const WORDS: &[Word] = &["#
)
.unwrap();
for result in words.iter() {
writeln!(&mut file, "\t{result:?},").unwrap();
}
writeln!(&mut file, "];\n").unwrap();
}
fn write_word_map(mut file: impl Write, words: &[Word]) {
let mut word_map = phf_codegen::Map::new();
for (idx, word) in words.iter().enumerate() {
let idx_str = format!("&WORDS[{idx}]");
word_map.entry(word.word.to_uppercase(), &idx_str);
}
writeln!(
&mut file,
r#"/// Mapping from all caps `&str` to `&'static Word`
pub static WORD_MAP: phf::Map<&'static str, &'static Word> =
{};"#,
word_map.build()
)
.unwrap();
}
fn write_number_to_words(mut file: impl Write, words: &[Word]) {
let word_number_to_idx = words
.iter()
.enumerate()
.map(|(idx, w)| (w.number, idx))
.collect::<Vec<(u16, usize)>>();
writeln!(
&mut file,
// "pub const NUMBER_TO_WORDS: &[&[usize]] = &["
"pub const NUMBER_TO_WORDS: &[&[&'static Word]] = &["
)
.unwrap();
for entry in word_number_to_idx
.as_slice()
.group_by(|(number1, _idx1), (number2, _idx2)| number1 == number2)
{
write!(&mut file, "\t&[",).unwrap();
for idx in entry.iter().map(|(_w, idx)| idx) {
// write!(&mut file, "{idx},").unwrap();
write!(&mut file, "&WORDS[{idx}],").unwrap();
}
writeln!(&mut file, "],").unwrap();
}
writeln!(&mut file, "];\n").unwrap();
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Word {
/// The word itself
pub word: String,
/// The binary representation of this number
///
/// The words are responsible for 13 bits of data, so this is fine to fit in a u16
pub number: u16,
}