#[cfg(feature = "zdict_builder")]
use std::io::{self, Read};
pub use zstd_safe::{CDict, DDict};
pub struct EncoderDictionary<'a> {
cdict: CDict<'a>,
}
impl EncoderDictionary<'static> {
pub fn copy(dictionary: &[u8], level: i32) -> Self {
Self {
cdict: zstd_safe::create_cdict(dictionary, level),
}
}
}
impl<'a> EncoderDictionary<'a> {
#[cfg(feature = "experimental")]
#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))]
pub fn new(dictionary: &'a [u8], level: i32) -> Self {
Self {
cdict: zstd_safe::CDict::create_by_reference(dictionary, level),
}
}
pub fn as_cdict(&self) -> &CDict<'a> {
&self.cdict
}
}
pub struct DecoderDictionary<'a> {
ddict: DDict<'a>,
}
impl DecoderDictionary<'static> {
pub fn copy(dictionary: &[u8]) -> Self {
Self {
ddict: zstd_safe::DDict::create(dictionary),
}
}
}
impl<'a> DecoderDictionary<'a> {
#[cfg(feature = "experimental")]
#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))]
pub fn new(dict: &'a [u8]) -> Self {
Self {
ddict: zstd_safe::DDict::create_by_reference(dict),
}
}
pub fn as_ddict(&self) -> &DDict<'a> {
&self.ddict
}
}
#[cfg(feature = "zdict_builder")]
#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
pub fn from_continuous(
sample_data: &[u8],
sample_sizes: &[usize],
max_size: usize,
) -> io::Result<Vec<u8>> {
use crate::map_error_code;
if sample_sizes.iter().sum::<usize>() != sample_data.len() {
return Err(io::Error::new(
io::ErrorKind::Other,
"sample sizes don't add up".to_string(),
));
}
let mut result = Vec::with_capacity(max_size);
zstd_safe::train_from_buffer(&mut result, sample_data, sample_sizes)
.map_err(map_error_code)?;
Ok(result)
}
#[cfg(feature = "zdict_builder")]
#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
pub fn from_samples<S: AsRef<[u8]>>(
samples: &[S],
max_size: usize,
) -> io::Result<Vec<u8>> {
let total_length: usize =
samples.iter().map(|sample| sample.as_ref().len()).sum();
let mut data = Vec::with_capacity(total_length);
data.extend(samples.iter().flat_map(|s| s.as_ref()).cloned());
let sizes: Vec<_> = samples.iter().map(|s| s.as_ref().len()).collect();
from_continuous(&data, &sizes, max_size)
}
#[cfg(feature = "zdict_builder")]
#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
pub fn from_sample_iterator<I, R>(
samples: I,
max_size: usize,
) -> io::Result<Vec<u8>>
where
I: IntoIterator<Item = io::Result<R>>,
R: Read,
{
let mut data = Vec::new();
let mut sizes = Vec::new();
for sample in samples {
let mut sample = sample?;
let len = sample.read_to_end(&mut data)?;
sizes.push(len);
}
from_continuous(&data, &sizes, max_size)
}
#[cfg(feature = "zdict_builder")]
#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
pub fn from_files<I, P>(filenames: I, max_size: usize) -> io::Result<Vec<u8>>
where
P: AsRef<std::path::Path>,
I: IntoIterator<Item = P>,
{
from_sample_iterator(
filenames
.into_iter()
.map(|filename| std::fs::File::open(filename)),
max_size,
)
}
#[cfg(test)]
#[cfg(feature = "zdict_builder")]
mod tests {
use std::fs;
use std::io;
use std::io::Read;
use walkdir;
#[test]
fn test_dict_training() {
let paths: Vec<_> = walkdir::WalkDir::new("src")
.into_iter()
.map(|entry| entry.unwrap())
.map(|entry| entry.into_path())
.filter(|path| path.to_str().unwrap().ends_with(".rs"))
.collect();
let dict = super::from_files(&paths, 4000).unwrap();
for path in paths {
let mut buffer = Vec::new();
let mut file = fs::File::open(path).unwrap();
let mut content = Vec::new();
file.read_to_end(&mut content).unwrap();
io::copy(
&mut &content[..],
&mut crate::stream::Encoder::with_dictionary(
&mut buffer,
1,
&dict,
)
.unwrap()
.auto_finish(),
)
.unwrap();
let mut result = Vec::new();
io::copy(
&mut crate::stream::Decoder::with_dictionary(
&buffer[..],
&dict[..],
)
.unwrap(),
&mut result,
)
.unwrap();
assert_eq!(&content, &result);
}
}
}