[go: up one dir, main page]

cfb/
lib.rs

1//! A library for reading/writing [Compound File Binary](
2//! https://en.wikipedia.org/wiki/Compound_File_Binary_Format) (structured
3//! storage) files.  See [MS-CFB](
4//! https://msdn.microsoft.com/en-us/library/dd942138.aspx) for the format
5//! specification.
6//!
7//! A Compound File Binary (CFB) file, also called a *structured storage file*
8//! or simply a *compound file*, is a bit like a simple file system within a
9//! file.  A compound file contains a tree of *storage* objects
10//! (i.e. directories), each of which can contain *stream* objects (i.e. files)
11//! or other storage objects.  The format is designed to allow reasonably
12//! efficient in-place mutation and resizing of these stream and storage
13//! objects, without having to completely rewrite the CFB file on disk.
14//!
15//! # Example usage
16//!
17//! ```no_run
18//! use cfb;
19//! use std::io::{Read, Seek, SeekFrom, Write};
20//!
21//! // Open an existing compound file in read-write mode.
22//! let mut comp = cfb::open_rw("path/to/cfb/file").unwrap();
23//!
24//! // Read in all the data from one of the streams in that compound file.
25//! let data = {
26//!     let mut stream = comp.open_stream("/foo/bar").unwrap();
27//!     let mut buffer = Vec::new();
28//!     stream.read_to_end(&mut buffer).unwrap();
29//!     buffer
30//! };
31//!
32//! // Append that data to the end of another stream in the same file.
33//! {
34//!     let mut stream = comp.open_stream("/baz").unwrap();
35//!     stream.seek(SeekFrom::End(0)).unwrap();
36//!     stream.write_all(&data).unwrap();
37//! }
38//!
39//! // Now create a new compound file, and create a new stream with the data.
40//! let mut comp2 = cfb::create("some/other/path").unwrap();
41//! comp2.create_storage("/spam/").unwrap();
42//! let mut stream = comp2.create_stream("/spam/eggs").unwrap();
43//! stream.write_all(&data).unwrap();
44//! ```
45
46#![warn(missing_docs)]
47
48use std::fmt;
49use std::fs;
50use std::io::{self, Read, Seek, SeekFrom, Write};
51use std::mem::size_of;
52use std::path::{Path, PathBuf};
53use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
54
55use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
56use fnv::FnvHashSet;
57use uuid::Uuid;
58
59use crate::internal::consts;
60use crate::internal::{
61    Allocator, DirEntry, Directory, EntriesOrder, Header, MiniAllocator,
62    ObjType, SectorInit, Sectors, Timestamp, Validation,
63};
64pub use crate::internal::{Entries, Entry, Stream, Version};
65
66#[macro_use]
67mod internal;
68
69//===========================================================================//
70
71/// Opens an existing compound file at the given path in read-only mode.
72pub fn open<P: AsRef<Path>>(path: P) -> io::Result<CompoundFile<fs::File>> {
73    CompoundFile::open(fs::File::open(path)?)
74}
75
76/// Opens an existing compound file at the given path in read-write mode.
77pub fn open_rw<P: AsRef<Path>>(path: P) -> io::Result<CompoundFile<fs::File>> {
78    open_rw_with_path(path.as_ref())
79}
80
81fn open_rw_with_path(path: &Path) -> io::Result<CompoundFile<fs::File>> {
82    let file = fs::OpenOptions::new().read(true).write(true).open(path)?;
83    CompoundFile::open(file)
84}
85
86/// Creates a new compound file with no contents at the given path.
87///
88/// The returned `CompoundFile` object will be both readable and writable.  If
89/// a file already exists at the given path, this will overwrite it.
90pub fn create<P: AsRef<Path>>(path: P) -> io::Result<CompoundFile<fs::File>> {
91    create_with_path(path.as_ref())
92}
93
94fn create_with_path(path: &Path) -> io::Result<CompoundFile<fs::File>> {
95    let file = fs::OpenOptions::new()
96        .read(true)
97        .write(true)
98        .create(true)
99        .truncate(true)
100        .open(path)?;
101    CompoundFile::create(file)
102}
103
104//===========================================================================//
105
106/// A compound file, backed by an underlying reader/writer (such as a
107/// [`File`](https://doc.rust-lang.org/std/fs/struct.File.html) or
108/// [`Cursor`](https://doc.rust-lang.org/std/io/struct.Cursor.html)).
109pub struct CompoundFile<F> {
110    minialloc: Arc<RwLock<MiniAllocator<F>>>,
111}
112
113impl<F> CompoundFile<F> {
114    fn minialloc(&self) -> RwLockReadGuard<MiniAllocator<F>> {
115        self.minialloc.read().unwrap()
116    }
117
118    fn minialloc_mut(&mut self) -> RwLockWriteGuard<MiniAllocator<F>> {
119        self.minialloc.write().unwrap()
120    }
121
122    /// Returns the CFB format version used for this compound file.
123    pub fn version(&self) -> Version {
124        self.minialloc().version()
125    }
126
127    fn stream_id_for_name_chain(&self, names: &[&str]) -> Option<u32> {
128        self.minialloc().stream_id_for_name_chain(names)
129    }
130
131    /// Returns information about the root storage object.  This is equivalent
132    /// to `self.entry("/").unwrap()` (but always succeeds).
133    pub fn root_entry(&self) -> Entry {
134        Entry::new(self.minialloc().root_dir_entry(), PathBuf::from("/"))
135    }
136
137    /// Given a path within the compound file, get information about that
138    /// stream or storage object.
139    pub fn entry<P: AsRef<Path>>(&self, path: P) -> io::Result<Entry> {
140        self.entry_with_path(path.as_ref())
141    }
142
143    fn entry_with_path(&self, path: &Path) -> io::Result<Entry> {
144        let names = internal::path::name_chain_from_path(path)?;
145        let path = internal::path::path_from_name_chain(&names);
146        let stream_id = match self.stream_id_for_name_chain(&names) {
147            Some(stream_id) => stream_id,
148            None => not_found!("No such object: {:?}", path),
149        };
150        Ok(Entry::new(self.minialloc().dir_entry(stream_id), path))
151    }
152
153    /// Returns an iterator over the entries within the root storage object.
154    /// This is equivalent to `self.read_storage("/").unwrap()` (but always
155    /// succeeds).
156    pub fn read_root_storage(&self) -> Entries<F> {
157        let start = self.minialloc().root_dir_entry().child;
158        Entries::new(
159            EntriesOrder::Nonrecursive,
160            &self.minialloc,
161            internal::path::path_from_name_chain(&[]),
162            start,
163        )
164    }
165
166    /// Returns an iterator over the entries within a storage object.
167    pub fn read_storage<P: AsRef<Path>>(
168        &self,
169        path: P,
170    ) -> io::Result<Entries<F>> {
171        self.read_storage_with_path(path.as_ref())
172    }
173
174    fn read_storage_with_path(&self, path: &Path) -> io::Result<Entries<F>> {
175        let names = internal::path::name_chain_from_path(path)?;
176        let path = internal::path::path_from_name_chain(&names);
177        let stream_id = match self.stream_id_for_name_chain(&names) {
178            Some(stream_id) => stream_id,
179            None => not_found!("No such storage: {:?}", path),
180        };
181        let start = {
182            let minialloc = self.minialloc();
183            let dir_entry = minialloc.dir_entry(stream_id);
184            if dir_entry.obj_type == ObjType::Stream {
185                invalid_input!("Not a storage: {:?}", path);
186            }
187            debug_assert!(
188                dir_entry.obj_type == ObjType::Storage
189                    || dir_entry.obj_type == ObjType::Root
190            );
191            dir_entry.child
192        };
193        Ok(Entries::new(
194            EntriesOrder::Nonrecursive,
195            &self.minialloc,
196            path,
197            start,
198        ))
199    }
200
201    /// Returns an iterator over all entries within the compound file, starting
202    /// from and including the root entry.  The iterator walks the storage tree
203    /// in a preorder traversal.  This is equivalent to
204    /// `self.walk_storage("/").unwrap()` (but always succeeds).
205    pub fn walk(&self) -> Entries<F> {
206        Entries::new(
207            EntriesOrder::Preorder,
208            &self.minialloc,
209            internal::path::path_from_name_chain(&[]),
210            consts::ROOT_STREAM_ID,
211        )
212    }
213
214    /// Returns an iterator over all entries under a storage subtree, including
215    /// the given path itself.  The iterator walks the storage tree in a
216    /// preorder traversal.
217    pub fn walk_storage<P: AsRef<Path>>(
218        &self,
219        path: P,
220    ) -> io::Result<Entries<F>> {
221        self.walk_storage_with_path(path.as_ref())
222    }
223
224    fn walk_storage_with_path(&self, path: &Path) -> io::Result<Entries<F>> {
225        let mut names = internal::path::name_chain_from_path(path)?;
226        let stream_id = match self.stream_id_for_name_chain(&names) {
227            Some(stream_id) => stream_id,
228            None => {
229                not_found!(
230                    "No such object: {:?}",
231                    internal::path::path_from_name_chain(&names)
232                );
233            }
234        };
235        names.pop();
236        let parent_path = internal::path::path_from_name_chain(&names);
237        Ok(Entries::new(
238            EntriesOrder::Preorder,
239            &self.minialloc,
240            parent_path,
241            stream_id,
242        ))
243    }
244
245    /// Returns true if there is an existing stream or storage at the given
246    /// path, or false if there is nothing at that path.
247    pub fn exists<P: AsRef<Path>>(&self, path: P) -> bool {
248        match internal::path::name_chain_from_path(path.as_ref()) {
249            Ok(names) => self.stream_id_for_name_chain(&names).is_some(),
250            Err(_) => false,
251        }
252    }
253
254    /// Returns true if there is an existing stream at the given path, or false
255    /// if there is a storage or nothing at that path.
256    pub fn is_stream<P: AsRef<Path>>(&self, path: P) -> bool {
257        match internal::path::name_chain_from_path(path.as_ref()) {
258            Ok(names) => match self.stream_id_for_name_chain(&names) {
259                Some(stream_id) => {
260                    self.minialloc().dir_entry(stream_id).obj_type
261                        == ObjType::Stream
262                }
263                None => false,
264            },
265            Err(_) => false,
266        }
267    }
268
269    /// Returns true if there is an existing storage at the given path, or
270    /// false if there is a stream or nothing at that path.
271    pub fn is_storage<P: AsRef<Path>>(&self, path: P) -> bool {
272        match internal::path::name_chain_from_path(path.as_ref()) {
273            Ok(names) => match self.stream_id_for_name_chain(&names) {
274                Some(stream_id) => {
275                    self.minialloc().dir_entry(stream_id).obj_type
276                        != ObjType::Stream
277                }
278                None => false,
279            },
280            Err(_) => false,
281        }
282    }
283
284    // TODO: pub fn copy_stream
285
286    // TODO: pub fn rename
287
288    /// Consumes the `CompoundFile`, returning the underlying reader/writer.
289    pub fn into_inner(self) -> F {
290        // We only ever retain Weak copies of the CompoundFile's minialloc Rc
291        // (e.g. in Stream structs), so the Rc::try_unwrap() should always
292        // succeed.
293        match Arc::try_unwrap(self.minialloc) {
294            Ok(ref_cell) => ref_cell.into_inner().unwrap().into_inner(),
295            Err(_) => unreachable!(),
296        }
297    }
298}
299
300impl<F: Seek> CompoundFile<F> {
301    /// Opens an existing stream in the compound file for reading and/or
302    /// writing (depending on what the underlying file supports).
303    pub fn open_stream<P: AsRef<Path>>(
304        &mut self,
305        path: P,
306    ) -> io::Result<Stream<F>> {
307        self.open_stream_with_path(path.as_ref())
308    }
309
310    fn open_stream_with_path(&mut self, path: &Path) -> io::Result<Stream<F>> {
311        let names = internal::path::name_chain_from_path(path)?;
312        let path = internal::path::path_from_name_chain(&names);
313        let stream_id = match self.stream_id_for_name_chain(&names) {
314            Some(stream_id) => stream_id,
315            None => not_found!("No such stream: {:?}", path),
316        };
317        if self.minialloc().dir_entry(stream_id).obj_type != ObjType::Stream {
318            invalid_input!("Not a stream: {:?}", path);
319        }
320        Ok(Stream::new(&self.minialloc, stream_id))
321    }
322}
323
324impl<F: Read + Seek> CompoundFile<F> {
325    /// Opens an existing compound file, using the underlying reader.  If the
326    /// underlying reader also supports the `Write` trait, then the
327    /// `CompoundFile` object will be writable as well.
328    pub fn open(inner: F) -> io::Result<CompoundFile<F>> {
329        CompoundFile::open_internal(inner, Validation::Permissive)
330    }
331
332    /// Like `open()`, but is stricter when parsing and will return an error if
333    /// the file violates the CFB spec in any way (which many CFB files in the
334    /// wild do).  This is mainly useful for validating a CFB file or
335    /// implemention (such as this crate itself) to help ensure compatibility
336    /// with other readers.
337    pub fn open_strict(inner: F) -> io::Result<CompoundFile<F>> {
338        CompoundFile::open_internal(inner, Validation::Strict)
339    }
340
341    fn open_internal(
342        mut inner: F,
343        validation: Validation,
344    ) -> io::Result<CompoundFile<F>> {
345        let inner_len = inner.seek(SeekFrom::End(0))?;
346        if inner_len < consts::HEADER_LEN as u64 {
347            invalid_data!(
348                "Invalid CFB file ({} bytes is too small)",
349                inner_len
350            );
351        }
352        inner.seek(SeekFrom::Start(0))?;
353
354        let header = Header::read_from(&mut inner, validation)?;
355        let sector_len = header.version.sector_len();
356        if inner_len
357            > ((consts::MAX_REGULAR_SECTOR + 1) as u64) * (sector_len as u64)
358        {
359            invalid_data!(
360                "Invalid CFB file ({} bytes is too large)",
361                inner_len
362            );
363        }
364
365        if inner_len < header.version.sector_len() as u64 {
366            invalid_data!(
367                "Invalid CFB file (length of {} < sector length of {})",
368                inner_len,
369                header.version.sector_len()
370            );
371        }
372        let mut sectors = Sectors::new(header.version, inner_len, inner);
373        let num_sectors = sectors.num_sectors();
374
375        // Read in DIFAT.
376        let mut difat = Vec::<u32>::new();
377        difat.extend_from_slice(&header.initial_difat_entries);
378        let mut seen_sector_ids = FnvHashSet::default();
379        let mut difat_sector_ids = Vec::new();
380        let mut current_difat_sector = header.first_difat_sector;
381        while current_difat_sector != consts::END_OF_CHAIN
382            && current_difat_sector != consts::FREE_SECTOR
383        {
384            if current_difat_sector > consts::MAX_REGULAR_SECTOR {
385                invalid_data!(
386                    "DIFAT chain includes invalid sector index {}",
387                    current_difat_sector
388                );
389            } else if current_difat_sector >= num_sectors {
390                invalid_data!(
391                    "DIFAT chain includes sector index {}, but sector count \
392                     is only {}",
393                    current_difat_sector,
394                    num_sectors
395                );
396            }
397            if seen_sector_ids.contains(&current_difat_sector) {
398                invalid_data!(
399                    "DIFAT chain includes duplicate sector index {}",
400                    current_difat_sector,
401                );
402            }
403            seen_sector_ids.insert(current_difat_sector);
404            difat_sector_ids.push(current_difat_sector);
405            let mut sector = sectors.seek_to_sector(current_difat_sector)?;
406            for _ in 0..(sector_len / size_of::<u32>() - 1) {
407                let next = sector.read_u32::<LittleEndian>()?;
408                if next != consts::FREE_SECTOR
409                    && next > consts::MAX_REGULAR_SECTOR
410                {
411                    invalid_data!(
412                        "DIFAT refers to invalid sector index {}",
413                        next
414                    );
415                }
416                difat.push(next);
417            }
418            current_difat_sector = sector.read_u32::<LittleEndian>()?;
419            if validation.is_strict()
420                && current_difat_sector == consts::FREE_SECTOR
421            {
422                invalid_data!(
423                    "DIFAT chain must terminate with {}, not {}",
424                    consts::END_OF_CHAIN,
425                    consts::FREE_SECTOR
426                );
427            }
428        }
429        if validation.is_strict()
430            && header.num_difat_sectors as usize != difat_sector_ids.len()
431        {
432            invalid_data!(
433                "Incorrect DIFAT chain length (header says {}, actual is {})",
434                header.num_difat_sectors,
435                difat_sector_ids.len()
436            );
437        }
438        while difat.last() == Some(&consts::FREE_SECTOR) {
439            difat.pop();
440        }
441        if validation.is_strict()
442            && header.num_fat_sectors as usize != difat.len()
443        {
444            invalid_data!(
445                "Incorrect number of FAT sectors (header says {}, DIFAT says \
446                 {})",
447                header.num_fat_sectors,
448                difat.len()
449            );
450        }
451
452        // Read in FAT.
453        let mut fat = Vec::<u32>::new();
454        for &sector_index in difat.iter() {
455            if sector_index >= num_sectors {
456                invalid_data!(
457                    "DIFAT refers to sector {}, but sector count is only {}",
458                    sector_index,
459                    num_sectors
460                );
461            }
462            let mut sector = sectors.seek_to_sector(sector_index)?;
463            for _ in 0..(sector_len / size_of::<u32>()) {
464                fat.push(sector.read_u32::<LittleEndian>()?);
465            }
466        }
467        // If the number of sectors in the file is not a multiple of the number
468        // of FAT entries per sector, then the last FAT sector must be padded
469        // with FREE_SECTOR entries (see MS-CFB section 2.3).  However, some
470        // CFB implementations incorrectly pad the last FAT sector with zeros
471        // (see https://github.com/mdsteele/rust-cfb/issues/8), so we allow
472        // this under Permissive validation.  Since zero is normally a
473        // meaningful FAT entry (referring to sector 0), we only want to strip
474        // zeros from the end of the FAT if they are beyond the number of
475        // sectors in the file.
476        if !validation.is_strict() {
477            while fat.len() > num_sectors as usize && fat.last() == Some(&0) {
478                fat.pop();
479            }
480        }
481        // Strip FREE_SECTOR entries from the end of the FAT.  Unlike the zero
482        // case above, we can remove these even if it makes the number of FAT
483        // entries less than the number of sectors in the file; the allocator
484        // will implicitly treat these extra sectors as free.
485        while fat.last() == Some(&consts::FREE_SECTOR) {
486            fat.pop();
487        }
488
489        let mut allocator =
490            Allocator::new(sectors, difat_sector_ids, difat, fat, validation)?;
491
492        // Read in directory.
493        let mut dir_entries = Vec::<DirEntry>::new();
494        let mut seen_dir_sectors = FnvHashSet::default();
495        let mut current_dir_sector = header.first_dir_sector;
496        let mut dir_sector_count = 1;
497        while current_dir_sector != consts::END_OF_CHAIN {
498            if validation.is_strict()
499                && header.version == Version::V4
500                && dir_sector_count > header.num_dir_sectors
501            {
502                invalid_data!(
503                    "Directory chain includes at least {} sectors which is greater than header num_dir_sectors {}",
504                    dir_sector_count,
505                    header.num_dir_sectors
506                );
507            }
508            if current_dir_sector > consts::MAX_REGULAR_SECTOR {
509                invalid_data!(
510                    "Directory chain includes invalid sector index {}",
511                    current_dir_sector
512                );
513            } else if current_dir_sector >= num_sectors {
514                invalid_data!(
515                    "Directory chain includes sector index {}, but sector \
516                     count is only {}",
517                    current_dir_sector,
518                    num_sectors
519                );
520            }
521            if seen_dir_sectors.contains(&current_dir_sector) {
522                invalid_data!(
523                    "Directory chain includes duplicate sector index {}",
524                    current_dir_sector,
525                );
526            }
527            seen_dir_sectors.insert(current_dir_sector);
528            {
529                let mut sector =
530                    allocator.seek_to_sector(current_dir_sector)?;
531                for _ in 0..header.version.dir_entries_per_sector() {
532                    dir_entries.push(DirEntry::read_from(
533                        &mut sector,
534                        header.version,
535                        validation,
536                    )?);
537                }
538            }
539            current_dir_sector = allocator.next(current_dir_sector)?;
540            dir_sector_count += 1;
541        }
542
543        let mut directory = Directory::new(
544            allocator,
545            dir_entries,
546            header.first_dir_sector,
547            validation,
548        )?;
549
550        // Read in MiniFAT.
551        let minifat = {
552            let mut chain = directory
553                .open_chain(header.first_minifat_sector, SectorInit::Fat)?;
554            if validation.is_strict()
555                && header.num_minifat_sectors as usize != chain.num_sectors()
556            {
557                invalid_data!(
558                    "Incorrect MiniFAT chain length (header says {}, actual \
559                     is {})",
560                    header.num_minifat_sectors,
561                    chain.num_sectors()
562                );
563            }
564            let num_minifat_entries = (chain.len() / 4) as usize;
565            let mut minifat = Vec::<u32>::with_capacity(num_minifat_entries);
566            for _ in 0..num_minifat_entries {
567                minifat.push(chain.read_u32::<LittleEndian>()?);
568            }
569            while minifat.last() == Some(&consts::FREE_SECTOR) {
570                minifat.pop();
571            }
572            minifat
573        };
574
575        let minialloc = MiniAllocator::new(
576            directory,
577            minifat,
578            header.first_minifat_sector,
579            validation,
580        )?;
581
582        Ok(CompoundFile { minialloc: Arc::new(RwLock::new(minialloc)) })
583    }
584}
585
586impl<F: Read + Write + Seek> CompoundFile<F> {
587    /// Creates a new compound file with no contents, using the underlying
588    /// reader/writer.  The reader/writer should be initially empty.
589    pub fn create(inner: F) -> io::Result<CompoundFile<F>> {
590        CompoundFile::create_with_version(Version::V4, inner)
591    }
592
593    /// Creates a new compound file of the given version with no contents,
594    /// using the underlying writer.  The writer should be initially empty.
595    pub fn create_with_version(
596        version: Version,
597        mut inner: F,
598    ) -> io::Result<CompoundFile<F>> {
599        let mut header = Header {
600            version,
601            // 2.2 requires this to be zero in V3
602            num_dir_sectors: if version == Version::V3 { 0 } else { 1 },
603            num_fat_sectors: 1,
604            first_dir_sector: 1,
605            first_minifat_sector: consts::END_OF_CHAIN,
606            num_minifat_sectors: 0,
607            first_difat_sector: consts::END_OF_CHAIN,
608            num_difat_sectors: 0,
609            initial_difat_entries: [consts::FREE_SECTOR;
610                consts::NUM_DIFAT_ENTRIES_IN_HEADER],
611        };
612        header.initial_difat_entries[0] = 0;
613        header.write_to(&mut inner)?;
614
615        // Pad the header with zeroes so it's the length of a sector.
616        let sector_len = version.sector_len();
617        debug_assert!(sector_len >= consts::HEADER_LEN);
618        if sector_len > consts::HEADER_LEN {
619            inner.write_all(&vec![0; sector_len - consts::HEADER_LEN])?;
620        }
621
622        // Write FAT sector:
623        let fat: Vec<u32> = vec![consts::FAT_SECTOR, consts::END_OF_CHAIN];
624        for &entry in fat.iter() {
625            inner.write_u32::<LittleEndian>(entry)?;
626        }
627        for _ in fat.len()..(sector_len / size_of::<u32>()) {
628            inner.write_u32::<LittleEndian>(consts::FREE_SECTOR)?;
629        }
630        let difat: Vec<u32> = vec![0];
631        let difat_sector_ids: Vec<u32> = vec![];
632
633        // Write directory sector:
634        let root_dir_entry = DirEntry::empty_root_entry();
635        root_dir_entry.write_to(&mut inner)?;
636        for _ in 1..version.dir_entries_per_sector() {
637            DirEntry::unallocated().write_to(&mut inner)?;
638        }
639
640        let sectors = Sectors::new(version, 3 * sector_len as u64, inner);
641        let allocator = Allocator::new(
642            sectors,
643            difat_sector_ids,
644            difat,
645            fat,
646            Validation::Strict,
647        )?;
648        let directory = Directory::new(
649            allocator,
650            vec![root_dir_entry],
651            1,
652            Validation::Strict,
653        )?;
654        let minialloc = MiniAllocator::new(
655            directory,
656            vec![],
657            consts::END_OF_CHAIN,
658            Validation::Strict,
659        )?;
660        Ok(CompoundFile { minialloc: Arc::new(RwLock::new(minialloc)) })
661    }
662
663    /// Creates a new, empty storage object (i.e. "directory") at the provided
664    /// path.  The parent storage object must already exist.
665    pub fn create_storage<P: AsRef<Path>>(
666        &mut self,
667        path: P,
668    ) -> io::Result<()> {
669        self.create_storage_with_path(path.as_ref())
670    }
671
672    fn create_storage_with_path(&mut self, path: &Path) -> io::Result<()> {
673        let mut names = internal::path::name_chain_from_path(path)?;
674        if let Some(stream_id) = self.stream_id_for_name_chain(&names) {
675            let path = internal::path::path_from_name_chain(&names);
676            if self.minialloc().dir_entry(stream_id).obj_type
677                != ObjType::Stream
678            {
679                already_exists!(
680                    "Cannot create storage at {:?} because a \
681                                 storage already exists there",
682                    path
683                );
684            } else {
685                already_exists!(
686                    "Cannot create storage at {:?} because a \
687                                 stream already exists there",
688                    path
689                );
690            }
691        }
692        // If names is empty, that means we're trying to create the root.  But
693        // the root always already exists and will have been rejected above.
694        debug_assert!(!names.is_empty());
695        let name = names.pop().unwrap();
696        let parent_id = match self.stream_id_for_name_chain(&names) {
697            Some(stream_id) => stream_id,
698            None => {
699                not_found!("Parent storage doesn't exist");
700            }
701        };
702        self.minialloc_mut().insert_dir_entry(
703            parent_id,
704            name,
705            ObjType::Storage,
706        )?;
707        Ok(())
708    }
709
710    /// Recursively creates a storage and all of its parent storages if they
711    /// are missing.
712    pub fn create_storage_all<P: AsRef<Path>>(
713        &mut self,
714        path: P,
715    ) -> io::Result<()> {
716        self.create_storage_all_with_path(path.as_ref())
717    }
718
719    fn create_storage_all_with_path(&mut self, path: &Path) -> io::Result<()> {
720        let names = internal::path::name_chain_from_path(path)?;
721        for length in 1..(names.len() + 1) {
722            let prefix_path =
723                internal::path::path_from_name_chain(&names[..length]);
724            if self.is_storage(&prefix_path) {
725                continue;
726            }
727            self.create_storage_with_path(&prefix_path)?;
728        }
729        Ok(())
730    }
731
732    /// Removes the storage object at the provided path.  The storage object
733    /// must exist and have no children.
734    pub fn remove_storage<P: AsRef<Path>>(
735        &mut self,
736        path: P,
737    ) -> io::Result<()> {
738        self.remove_storage_with_path(path.as_ref())
739    }
740
741    fn remove_storage_with_path(&mut self, path: &Path) -> io::Result<()> {
742        let mut names = internal::path::name_chain_from_path(path)?;
743        let stream_id = match self.stream_id_for_name_chain(&names) {
744            Some(parent_id) => parent_id,
745            None => not_found!("No such storage: {:?}", path),
746        };
747        {
748            let minialloc = self.minialloc();
749            let dir_entry = minialloc.dir_entry(stream_id);
750            if dir_entry.obj_type == ObjType::Root {
751                invalid_input!("Cannot remove the root storage object");
752            }
753            if dir_entry.obj_type == ObjType::Stream {
754                invalid_input!("Not a storage: {:?}", path);
755            }
756            debug_assert_eq!(dir_entry.obj_type, ObjType::Storage);
757            if dir_entry.child != consts::NO_STREAM {
758                invalid_input!("Storage is not empty: {:?}", path);
759            }
760        }
761        debug_assert!(!names.is_empty());
762        let name = names.pop().unwrap();
763        let parent_id = self.stream_id_for_name_chain(&names).unwrap();
764        self.minialloc_mut().remove_dir_entry(parent_id, name)?;
765        Ok(())
766    }
767
768    /// Recursively removes a storage and all of its children.  If called on
769    /// the root storage, recursively removes all of its children but not the
770    /// root storage itself (which cannot be removed).
771    pub fn remove_storage_all<P: AsRef<Path>>(
772        &mut self,
773        path: P,
774    ) -> io::Result<()> {
775        self.remove_storage_all_with_path(path.as_ref())
776    }
777
778    fn remove_storage_all_with_path(&mut self, path: &Path) -> io::Result<()> {
779        let mut stack = self.walk_storage(path)?.collect::<Vec<Entry>>();
780        while let Some(entry) = stack.pop() {
781            if entry.is_stream() {
782                self.remove_stream_with_path(entry.path())?;
783            } else if !entry.is_root() {
784                self.remove_storage_with_path(entry.path())?;
785            }
786        }
787        Ok(())
788    }
789
790    /// Sets the CLSID for the storage object at the provided path.  (To get
791    /// the current CLSID for a storage object, use
792    /// `self.entry(path)?.clsid()`.)
793    pub fn set_storage_clsid<P: AsRef<Path>>(
794        &mut self,
795        path: P,
796        clsid: Uuid,
797    ) -> io::Result<()> {
798        self.set_storage_clsid_with_path(path.as_ref(), clsid)
799    }
800
801    fn set_storage_clsid_with_path(
802        &mut self,
803        path: &Path,
804        clsid: Uuid,
805    ) -> io::Result<()> {
806        let names = internal::path::name_chain_from_path(path)?;
807        let stream_id = match self.stream_id_for_name_chain(&names) {
808            Some(stream_id) => stream_id,
809            None => not_found!(
810                "No such storage: {:?}",
811                internal::path::path_from_name_chain(&names)
812            ),
813        };
814        let mut minialloc = self.minialloc_mut();
815        if minialloc.dir_entry(stream_id).obj_type == ObjType::Stream {
816            invalid_input!(
817                "Not a storage: {:?}",
818                internal::path::path_from_name_chain(&names)
819            );
820        }
821        minialloc.with_dir_entry_mut(stream_id, |dir_entry| {
822            dir_entry.clsid = clsid;
823        })
824    }
825
826    /// Creates and returns a new, empty stream object at the provided path.
827    /// If a stream already exists at that path, it will be replaced by the new
828    /// stream.  The parent storage object must already exist.
829    pub fn create_stream<P: AsRef<Path>>(
830        &mut self,
831        path: P,
832    ) -> io::Result<Stream<F>> {
833        self.create_stream_with_path(path.as_ref(), true)
834    }
835
836    /// Creates and returns a new, empty stream object at the provided path.
837    /// Returns an error if a stream already exists at that path.  The parent
838    /// storage object must already exist.
839    pub fn create_new_stream<P: AsRef<Path>>(
840        &mut self,
841        path: P,
842    ) -> io::Result<Stream<F>> {
843        self.create_stream_with_path(path.as_ref(), false)
844    }
845
846    fn create_stream_with_path(
847        &mut self,
848        path: &Path,
849        overwrite: bool,
850    ) -> io::Result<Stream<F>> {
851        let mut names = internal::path::name_chain_from_path(path)?;
852        if let Some(stream_id) = self.stream_id_for_name_chain(&names) {
853            if self.minialloc().dir_entry(stream_id).obj_type
854                != ObjType::Stream
855            {
856                already_exists!(
857                    "Cannot create stream at {:?} because a \
858                                 storage already exists there",
859                    internal::path::path_from_name_chain(&names)
860                );
861            } else if !overwrite {
862                already_exists!(
863                    "Cannot create new stream at {:?} because a \
864                                 stream already exists there",
865                    internal::path::path_from_name_chain(&names)
866                );
867            } else {
868                let mut stream = Stream::new(&self.minialloc, stream_id);
869                stream.set_len(0)?;
870                return Ok(stream);
871            }
872        }
873        // If names is empty, that means we're trying to create the root.  But
874        // the root always already exists and will have been rejected above.
875        debug_assert!(!names.is_empty());
876        let name = names.pop().unwrap();
877        let parent_id = match self.stream_id_for_name_chain(&names) {
878            Some(stream_id) => stream_id,
879            None => {
880                not_found!("Parent storage doesn't exist");
881            }
882        };
883        let new_stream_id = self.minialloc_mut().insert_dir_entry(
884            parent_id,
885            name,
886            ObjType::Stream,
887        )?;
888        Ok(Stream::new(&self.minialloc, new_stream_id))
889    }
890
891    /// Removes the stream object at the provided path.
892    pub fn remove_stream<P: AsRef<Path>>(
893        &mut self,
894        path: P,
895    ) -> io::Result<()> {
896        self.remove_stream_with_path(path.as_ref())
897    }
898
899    fn remove_stream_with_path(&mut self, path: &Path) -> io::Result<()> {
900        let mut names = internal::path::name_chain_from_path(path)?;
901        let stream_id = match self.stream_id_for_name_chain(&names) {
902            Some(parent_id) => parent_id,
903            None => not_found!("No such stream: {:?}", path),
904        };
905        let (start_sector_id, is_in_mini_stream) = {
906            let minialloc = self.minialloc();
907            let dir_entry = minialloc.dir_entry(stream_id);
908            if dir_entry.obj_type != ObjType::Stream {
909                invalid_input!("Not a stream: {:?}", path);
910            }
911            debug_assert_eq!(dir_entry.child, consts::NO_STREAM);
912            (
913                dir_entry.start_sector,
914                dir_entry.stream_len < consts::MINI_STREAM_CUTOFF as u64,
915            )
916        };
917        if is_in_mini_stream {
918            self.minialloc_mut().free_mini_chain(start_sector_id)?;
919        } else {
920            self.minialloc_mut().free_chain(start_sector_id)?;
921        }
922        debug_assert!(!names.is_empty());
923        let name = names.pop().unwrap();
924        let parent_id = self.stream_id_for_name_chain(&names).unwrap();
925        self.minialloc_mut().remove_dir_entry(parent_id, name)?;
926        Ok(())
927    }
928
929    /// Sets the user-defined bitflags for the object at the provided path.
930    /// (To get the current state bits for an object, use
931    /// `self.entry(path)?.state_bits()`.)
932    pub fn set_state_bits<P: AsRef<Path>>(
933        &mut self,
934        path: P,
935        bits: u32,
936    ) -> io::Result<()> {
937        self.set_state_bits_with_path(path.as_ref(), bits)
938    }
939
940    fn set_state_bits_with_path(
941        &mut self,
942        path: &Path,
943        bits: u32,
944    ) -> io::Result<()> {
945        let names = internal::path::name_chain_from_path(path)?;
946        let stream_id = match self.stream_id_for_name_chain(&names) {
947            Some(stream_id) => stream_id,
948            None => not_found!(
949                "No such object: {:?}",
950                internal::path::path_from_name_chain(&names)
951            ),
952        };
953        self.minialloc_mut().with_dir_entry_mut(stream_id, |dir_entry| {
954            dir_entry.state_bits = bits;
955        })
956    }
957
958    /// Sets the modified time for the object at the given path to now.  Has no
959    /// effect when called on the root storage.
960    pub fn touch<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
961        self.touch_with_path(path.as_ref())
962    }
963
964    fn touch_with_path(&mut self, path: &Path) -> io::Result<()> {
965        let names = internal::path::name_chain_from_path(path)?;
966        let path = internal::path::path_from_name_chain(&names);
967        let stream_id = match self.stream_id_for_name_chain(&names) {
968            Some(stream_id) => stream_id,
969            None => not_found!("No such object: {:?}", path),
970        };
971        if stream_id != consts::ROOT_STREAM_ID {
972            let mut minialloc = self.minialloc_mut();
973            debug_assert_ne!(
974                minialloc.dir_entry(stream_id).obj_type,
975                ObjType::Root
976            );
977            minialloc.with_dir_entry_mut(stream_id, |dir_entry| {
978                dir_entry.modified_time = Timestamp::now();
979            })?;
980        }
981        Ok(())
982    }
983
984    /// Flushes all changes to the underlying file.
985    pub fn flush(&mut self) -> io::Result<()> {
986        self.minialloc_mut().flush()
987    }
988}
989
990impl<F: fmt::Debug> fmt::Debug for CompoundFile<F> {
991    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
992        f.debug_tuple("CompoundFile").field(self.minialloc().inner()).finish()
993    }
994}
995
996//===========================================================================//
997
998#[cfg(test)]
999mod tests {
1000    use std::io::{self, Cursor, Seek, SeekFrom};
1001    use std::mem::size_of;
1002    use std::path::Path;
1003
1004    use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
1005
1006    use crate::internal::{consts, DirEntry, Header, Version};
1007
1008    use super::CompoundFile;
1009
1010    fn make_cfb_file_with_zero_padded_fat() -> io::Result<Vec<u8>> {
1011        let version = Version::V3;
1012        let mut data = Vec::<u8>::new();
1013        let mut header = Header {
1014            version,
1015            num_dir_sectors: 0,
1016            num_fat_sectors: 1,
1017            first_dir_sector: 1,
1018            first_minifat_sector: consts::END_OF_CHAIN,
1019            num_minifat_sectors: 0,
1020            first_difat_sector: consts::END_OF_CHAIN,
1021            num_difat_sectors: 0,
1022            initial_difat_entries: [consts::FREE_SECTOR;
1023                consts::NUM_DIFAT_ENTRIES_IN_HEADER],
1024        };
1025        header.initial_difat_entries[0] = 0;
1026        header.write_to(&mut data)?;
1027        // Write FAT sector:
1028        let fat: Vec<u32> = vec![consts::FAT_SECTOR, consts::END_OF_CHAIN];
1029        for &entry in fat.iter() {
1030            data.write_u32::<LittleEndian>(entry)?;
1031        }
1032        // Pad the FAT sector with zeros instead of FREE_SECTOR.  Technically
1033        // this violates the MS-CFB spec (section 2.3), but apparently some CFB
1034        // implementations do this.
1035        for _ in fat.len()..(version.sector_len() / size_of::<u32>()) {
1036            data.write_u32::<LittleEndian>(0)?;
1037        }
1038        // Write directory sector:
1039        DirEntry::empty_root_entry().write_to(&mut data)?;
1040        for _ in 1..version.dir_entries_per_sector() {
1041            DirEntry::unallocated().write_to(&mut data)?;
1042        }
1043        Ok(data)
1044    }
1045
1046    #[test]
1047    fn zero_padded_fat_strict() {
1048        let data = make_cfb_file_with_zero_padded_fat().unwrap();
1049        let result = CompoundFile::open_strict(Cursor::new(data));
1050        assert_eq!(
1051            result.err().unwrap().to_string(),
1052            "Malformed FAT (FAT has 128 entries, but file has only 2 sectors)"
1053        );
1054    }
1055
1056    // Regression test for https://github.com/mdsteele/rust-cfb/issues/8.
1057    #[test]
1058    fn zero_padded_fat_permissive() {
1059        let data = make_cfb_file_with_zero_padded_fat().unwrap();
1060        // Despite the zero-padded FAT, we should be able to read this file
1061        // under Permissive validation.
1062        CompoundFile::open(Cursor::new(data)).expect("open");
1063    }
1064
1065    // Regression test for https://github.com/mdsteele/rust-cfb/issues/52.
1066    #[test]
1067    fn update_num_dir_sectors() {
1068        // Create a CFB file with 2 sectors for the directory.
1069        let cursor = Cursor::new(Vec::new());
1070        let mut comp = CompoundFile::create(cursor).unwrap();
1071        // root + 31 entries in the first sector
1072        // 1 stream entry in the second sector
1073        for i in 0..32 {
1074            let path = format!("stream{}", i);
1075            let path = Path::new(&path);
1076            comp.create_stream(path).unwrap();
1077        }
1078        comp.flush().unwrap();
1079
1080        // read num_dir_sectors from the header
1081        let mut cursor = comp.into_inner();
1082        cursor.seek(SeekFrom::Start(40)).unwrap();
1083        let num_dir_sectors = cursor.read_u32::<LittleEndian>().unwrap();
1084        assert_eq!(num_dir_sectors, 2);
1085    }
1086}
1087
1088//===========================================================================//