Continue work on format spec

This commit is contained in:
Austen Adler 2023-11-15 00:15:30 -05:00
parent 0d45390b40
commit 84a4f09fe4
5 changed files with 306 additions and 34 deletions

15
Cargo.lock generated
View File

@ -5,6 +5,9 @@ version = 3
[[package]] [[package]]
name = "arncdu" name = "arncdu"
version = "0.1.0" version = "0.1.0"
dependencies = [
"ncdufmt",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
@ -25,6 +28,7 @@ dependencies = [
"num-traits", "num-traits",
"serde", "serde",
"serde_json", "serde_json",
"serde_repr",
] ]
[[package]] [[package]]
@ -91,6 +95,17 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "serde_repr"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.39" version = "2.0.39"

View File

@ -16,3 +16,4 @@ members = [
# serde = { version = "1.0.192", features = ["derive"] } # serde = { version = "1.0.192", features = ["derive"] }
# serde_json = "1.0.108" # serde_json = "1.0.108"
# tar = "0.4.40" # tar = "0.4.40"
ncdufmt = {path="./ncdufmt/"}

View File

@ -9,3 +9,4 @@ edition = "2021"
num-traits = "0.2.17" num-traits = "0.2.17"
serde = { version = "1.0.192", features = ["derive"] } serde = { version = "1.0.192", features = ["derive"] }
serde_json = "1.0.108" serde_json = "1.0.108"
serde_repr = "0.1.17"

View File

@ -1,9 +1,14 @@
// use crate::async_temp_buf::AsyncTempBuf; // use crate::async_temp_buf::AsyncTempBuf;
// use anyhow::Result; // use anyhow::Result;
use std::time::{Duration, SystemTime};
use std::ops::Not;
use num_traits::identities::Zero;
use num_traits::identities::One; use num_traits::identities::One;
use num_traits::identities::Zero;
use serde::de;
use serde::de::Error;
use serde::de::Visitor;
use serde_repr::Deserialize_repr;
use serde_repr::Serialize_repr;
use std::ops::Not;
use std::time::{Duration, SystemTime};
// use std::{os::unix::prelude::MetadataExt, path::Path}; // use std::{os::unix::prelude::MetadataExt, path::Path};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -11,37 +16,111 @@ use serde::{Deserialize, Serialize};
// This is based on https://dev.yorhel.nl/ncdu/jsonfmt // This is based on https://dev.yorhel.nl/ncdu/jsonfmt
pub type DeviceId = u64; #[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
struct NcduFile {
#[serde(flatten)]
header: Header,
}
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Debug, PartialEq, Eq, Clone)]
pub struct Header { pub struct Header {
pub majorver: u64, pub major_version: MajorVersion,
pub minorver: u64, pub minor_version: MinorVersion,
pub header_metadata: HeaderMetadata, pub header_metadata: HeaderMetadata,
} }
impl Default for Header { impl<'de> Deserialize<'de> for Header {
fn default() -> Self { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct HeaderVisitor;
impl<'de> Visitor<'de> for HeaderVisitor {
type Value = Header;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("struct Header")
}
fn visit_seq<V>(self, mut seq: V) -> Result<Self::Value, V::Error>
where
V: serde::de::SeqAccess<'de>,
{
let major_version = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
let minor_version = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
let header_metadata = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(2, &self))?;
Ok(Header {
major_version,
minor_version,
header_metadata,
})
}
}
deserializer.deserialize_seq(HeaderVisitor)
}
}
impl Header {
pub fn from(progname: String, progver: String) -> Self {
Self { Self {
majorver: 1, major_version: MajorVersion::default(),
minorver: 2, minor_version: MinorVersion::default(),
header_metadata: HeaderMetadata::default(), header_metadata: HeaderMetadata::new(progname, progver),
} }
} }
} }
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize_repr, Deserialize_repr, Debug, PartialEq, Eq, Clone)]
#[repr(u8)]
pub enum MajorVersion {
Version1 = 1,
}
impl Default for MajorVersion {
fn default() -> Self {
Self::Version1
}
}
#[derive(Serialize_repr, Deserialize_repr, Debug, PartialEq, Eq, Clone)]
#[repr(u16)]
pub enum MinorVersion {
/// For ncdu 1.9-1.12
Minor0 = 0,
/// For ncdu 1.13-1.15.2
Minor1 = 1,
/// For ncdu 1.16+
Minor2 = 2,
}
impl Default for MinorVersion {
fn default() -> Self {
Self::Minor2
}
}
pub type DeviceId = u64;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct HeaderMetadata { pub struct HeaderMetadata {
pub progname: String, pub progname: String,
pub progver: String, pub progver: String,
pub timestamp: u64, pub timestamp: u64,
} }
impl Default for HeaderMetadata { impl HeaderMetadata {
fn default() -> Self { fn new(progname: String, progver: String) -> Self {
Self { Self {
progname: String::from("ncdu"), progname,
progver: String::from("1.17"), progver,
timestamp: SystemTime::now() timestamp: SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH) .duration_since(SystemTime::UNIX_EPOCH)
.as_ref() .as_ref()
@ -51,8 +130,60 @@ impl Default for HeaderMetadata {
} }
} }
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
pub struct Directory {
info: InfoBlock,
#[serde(flatten)]
contents: Vec<FileOrDirectory>,
}
impl<'de> Deserialize<'de> for Directory {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct HeaderVisitor;
impl<'de> Visitor<'de> for HeaderVisitor {
type Value = Directory;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("struct Directory")
}
fn visit_seq<V>(self, mut seq: V) -> Result<Self::Value, V::Error>
where
V: serde::de::SeqAccess<'de>,
{
let info = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
let mut contents = if let Some(size_hint) = seq.size_hint() {
Vec::with_capacity(size_hint)
} else {
Vec::new()
};
while let Some(entry) = seq.next_element()? {
contents.push(entry);
}
Ok(Directory { info, contents })
}
}
deserializer.deserialize_seq(HeaderVisitor)
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(untagged)]
pub enum FileOrDirectory {
Directory(Directory),
File(InfoBlock),
}
/// String. Set if this file or directory is to be excluded from calculation for some reason. /// String. Set if this file or directory is to be excluded from calculation for some reason.
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum Excluded { pub enum Excluded {
/// If the path matched an exclude pattern. /// If the path matched an exclude pattern.
@ -65,7 +196,7 @@ pub enum Excluded {
FrmLink, FrmLink,
} }
#[derive(Serialize, Deserialize, Debug, Default)] #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct InfoBlock { pub struct InfoBlock {
/// Basename /// Basename
/// TODO: Max 32768 bytes /// TODO: Max 32768 bytes
@ -74,39 +205,39 @@ pub struct InfoBlock {
/// Device ID. A unique ID within the context of the exported dump. Could be a serialization of lstat().st_dev, but could be randomly generated and used within this file /// Device ID. A unique ID within the context of the exported dump. Could be a serialization of lstat().st_dev, but could be randomly generated and used within this file
/// ///
/// Accepted values are in the range of 0 <= dev < 2^64. /// Accepted values are in the range of 0 <= dev < 2^64.
pub dev: DeviceId, pub dev: Option<DeviceId>,
/// Apparent filesize /// Apparent filesize
/// Reported by lstat().st_size. If absent, 0 is assumed. Accepted values are in the range of 0 <= asize < 2^63 /// Reported by lstat().st_size. If absent, 0 is assumed. Accepted values are in the range of 0 <= asize < 2^63
#[serde(skip_serializing_if = "Zero::is_zero")] #[serde(skip_serializing_if = "Zero::is_zero", default)]
pub asize: u64, pub asize: u64,
/// Size of the file, as consumed on the disk. /// Size of the file, as consumed on the disk.
/// This is obtained through lstat().st_blocks*S_BLKSIZE. If absent, 0 is assumed. Accepted values are in the range of 0 <= dsize < 2^63. /// This is obtained through lstat().st_blocks*S_BLKSIZE. If absent, 0 is assumed. Accepted values are in the range of 0 <= dsize < 2^63.
#[serde(skip_serializing_if = "Zero::is_zero")] #[serde(skip_serializing_if = "Zero::is_zero", default)]
pub dsize: u64, pub dsize: u64,
/// Hardlinks /// Hardlinks
/// true if this is a file with lstat().st_nlink > 1 /// true if this is a file with lstat().st_nlink > 1
#[serde(skip_serializing_if = "Not::not")] #[serde(skip_serializing_if = "Not::not", default)]
pub hlnkc: bool, pub hlnkc: bool,
/// Inode number as reported by lstat().st_ino. Together with the Device ID this uniquely identifies a file in this dump. /// Inode number as reported by lstat().st_ino. Together with the Device ID this uniquely identifies a file in this dump.
/// In the case of hard links, two objects may appear with the same (dev,ino) combination. As of ncdu 1.16, this field is only exported if st_nlink > 1. A value of 0 is assumed if this field is absent, which is fine as long as the hlnkc field is false and nlink is 1, otherwise everything with the same dev and empty ino values will be considered as a single hardlinked file. Accepted values are in the range of 0 <= ino < 2^64. /// In the case of hard links, two objects may appear with the same (dev,ino) combination. As of ncdu 1.16, this field is only exported if st_nlink > 1. A value of 0 is assumed if this field is absent, which is fine as long as the hlnkc field is false and nlink is 1, otherwise everything with the same dev and empty ino values will be considered as a single hardlinked file. Accepted values are in the range of 0 <= ino < 2^64.
#[serde(skip_serializing_if = "Zero::is_zero")] #[serde(skip_serializing_if = "Zero::is_zero", default)]
pub ino: u64, pub ino: u64,
/// Number of hardlinks to this inode. The value of lstat().st_nlink /// Number of hardlinks to this inode. The value of lstat().st_nlink
/// Accepted values are in the range 1 <= nlink < 2^32. If absent, 1 is assumed. /// Accepted values are in the range 1 <= nlink < 2^32. If absent, 1 is assumed.
#[serde(skip_serializing_if = "One::is_one")] #[serde(skip_serializing_if = "One::is_one", default = "One::one")]
pub nlink: u64, pub nlink: u64,
/// Something went wrong while reading this entry /// Something went wrong while reading this entry
/// For files, this indicates that the lstat() call failed. For directories, this means that an error occurred while obtaining the file listing, and some items may be missing /// For files, this indicates that the lstat() call failed. For directories, this means that an error occurred while obtaining the file listing, and some items may be missing
#[serde(skip_serializing_if = "Not::not")] #[serde(skip_serializing_if = "Not::not", default)]
pub read_error: bool, pub read_error: bool,
// TODO: Implement this one // TODO: Implement this one
@ -120,26 +251,127 @@ pub struct InfoBlock {
/// "frmlink" /// "frmlink"
/// If the item is a firmlink and hasnt been followed with --follow-firmlinks (since ncdu 1.15). /// If the item is a firmlink and hasnt been followed with --follow-firmlinks (since ncdu 1.15).
/// Excluded items may still be included in the export, but only by name. size, asize and other information may be absent. If this item was excluded by a pattern, ncdu will not do an lstat() on it, and may thus report this item as a file even if it is a directory. /// Excluded items may still be included in the export, but only by name. size, asize and other information may be absent. If this item was excluded by a pattern, ncdu will not do an lstat() on it, and may thus report this item as a file even if it is a directory.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none", default)]
pub excluded: Option<Excluded>, pub excluded: Option<Excluded>,
/// This is true if neither S_ISREG() nor S_ISDIR() evaluates to true. I.e. this is a symlink, character device, block device, FIFO, socket, or whatever else your system may support. /// This is true if neither S_ISREG() nor S_ISDIR() evaluates to true. I.e. this is a symlink, character device, block device, FIFO, socket, or whatever else your system may support.
#[serde(skip_serializing_if = "Not::not")] #[serde(skip_serializing_if = "Not::not", default)]
pub notreg: bool, pub notreg: bool,
/// Extended info if it was specified to generate /// Extended info if it was specified to generate
#[serde(skip_serializing_if = "Option::is_none", flatten)] #[serde(skip_serializing_if = "Option::is_none", default, flatten)]
pub extended_info_block: Option<ExtendedInfoBlock>, pub extended_info_block: Option<ExtendedInfoBlock>,
} }
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct ExtendedInfoBlock { pub struct ExtendedInfoBlock {
/// Number, user ID who owns the file. Accepted values are in the range 0 <= uid < 2^31. /// Number, user ID who owns the file. Accepted values are in the range 0 <= uid < 2^31.
uid: u32, uid: Option<u32>,
/// Number, group ID who owns the file. Accepted values are in the range 0 <= uid < 2^31. /// Number, group ID who owns the file. Accepted values are in the range 0 <= uid < 2^31.
gid: u32, gid: Option<u32>,
/// Number, the raw file mode as returned by lstat(3). For Linux systems, see inode(7) for the interpretation of this field. Accepted range: 0 <= mode < 2^16. /// Number, the raw file mode as returned by lstat(3). For Linux systems, see inode(7) for the interpretation of this field. Accepted range: 0 <= mode < 2^16.
mode: u16, mode: Option<u16>,
/// Number, last modification time as a UNIX timestamp. Accepted range: 0 <= mtime < 2^64. As of ncdu 1.16, this number may also include an (infinite precision) decimal part for fractional seconds, though the decimal part is (currently) discarded during import. /// Number, last modification time as a UNIX timestamp. Accepted range: 0 <= mtime < 2^64. As of ncdu 1.16, this number may also include an (infinite precision) decimal part for fractional seconds, though the decimal part is (currently) discarded during import.
mtime: u64, mtime: Option<u64>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_header() {
let header_text = r#"[
1,
2,
{
"progname": "Test program name",
"progver": "Version One",
"timestamp": 0
}
]"#;
let parsed = dbg!(serde_json::from_str::<Header>(header_text));
assert!(parsed.is_ok());
assert!(parsed.unwrap().minor_version == MinorVersion::Minor2);
}
#[test]
fn test_header_metadata() {
let header_text = r#"{
"progname": "Test program name",
"progver": "Version One",
"timestamp": 0
}"#;
assert!(dbg!(serde_json::from_str::<HeaderMetadata>(header_text)).is_ok());
}
#[test]
fn test_directory() {
// Empty directories don't work
assert!(dbg!(serde_json::from_str::<Directory>(r#"[]"#)).is_err());
// Directories must be arrays
assert!(dbg!(serde_json::from_str::<Directory>(r#"{}"#)).is_err());
// Directory with no objects should be safe
assert!(dbg!(serde_json::from_str::<Directory>(
r#"[
{
"name": "/tmp/tmp.2gWrgcHU4X",
"asize": 80,
"dev": 39
}
]"#
))
.is_ok());
// Directory with a few files should be safe
assert!(dbg!(serde_json::from_str::<Directory>(
r#"[
{
"name": "/tmp/tmp.2gWrgcHU4X",
"asize": 80,
"dev": 39
},
{
"name": "out"
},
{
"name": "a"
}
]"#
))
.is_ok());
}
#[test]
fn test_file_or_directory() {
// File
assert!(dbg!(serde_json::from_str::<FileOrDirectory>(
r#"{
"name": "a"
}"#
))
.is_ok());
// Directory
assert!(dbg!(serde_json::from_str::<FileOrDirectory>(
r#"[
{
"name": "/tmp/tmp.2gWrgcHU4X",
"asize": 80,
"dev": 39
},
{
"name": "out"
},
{
"name": "a"
}
]"#
))
.is_ok());
}
} }

23
sample-ncdu-output.json Normal file
View File

@ -0,0 +1,23 @@
[
1,
2,
{
"progname": "ncdu",
"progver": "1.19",
"timestamp": 1700023150
},
[
{
"name": "/tmp/tmp.2gWrgcHU4X",
"asize": 80,
"dev": 39
},
{
"name": "out"
},
{
"name": "a"
}
]
]