[go: up one dir, main page]

epub/
doc.rs

1//! Manages the epub doc.
2//!
3//! Provides easy methods to navigate through the epub content, cover,
4//! chapters, etc.
5
6use std::cmp::Ordering;
7use std::collections::HashMap;
8use std::fs::File;
9use std::io::BufReader;
10use std::io::{Read, Seek};
11use std::path::{Component, Path, PathBuf};
12use xmlutils::XMLError;
13
14use crate::archive::EpubArchive;
15
16use crate::xmlutils;
17
18#[derive(Debug, thiserror::Error)]
19pub enum DocError {
20    #[error("Archive Error: {0}")]
21    ArchiveError(#[from] crate::archive::ArchiveError),
22    #[error("XML Error: {0}")]
23    XmlError(#[from] crate::xmlutils::XMLError),
24    #[error("I/O Error: {0}")]
25    IOError(#[from] std::io::Error),
26    #[error("Invalid EPub")]
27    InvalidEpub,
28}
29
30#[derive(Clone, Debug, PartialEq, PartialOrd)]
31pub enum EpubVersion {
32    Version2_0,
33    Version3_0,
34    Unknown(String),
35}
36
37/// Struct that represent a navigation point in a table of content
38#[derive(Clone, Debug, Eq)]
39pub struct NavPoint {
40    /// the title of this navpoint
41    pub label: String,
42    /// the resource path
43    pub content: PathBuf,
44    /// nested navpoints
45    pub children: Vec<NavPoint>,
46    /// the order in the toc
47    pub play_order: usize,
48}
49
50impl Ord for NavPoint {
51    fn cmp(&self, other: &Self) -> Ordering {
52        self.play_order.cmp(&other.play_order)
53    }
54}
55
56impl PartialOrd for NavPoint {
57    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
58        Some(self.cmp(other))
59    }
60}
61
62impl PartialEq for NavPoint {
63    fn eq(&self, other: &Self) -> bool {
64        self.play_order == other.play_order
65    }
66}
67
68#[derive(Clone, Debug)]
69pub struct SpineItem {
70    pub idref: String,
71    pub id: Option<String>,
72    pub properties: Option<String>,
73    pub linear: bool,
74}
75
76/// Struct to control the epub document
77#[derive(Clone, Debug)]
78pub struct EpubDoc<R: Read + Seek> {
79    /// the zip archive
80    archive: EpubArchive<R>,
81
82    /// The current chapter, is an spine index
83    current: usize,
84
85    /// epub spec version
86    pub version: EpubVersion,
87
88    /// epub spine ids
89    pub spine: Vec<SpineItem>,
90
91    /// resource id -> (path, mime)
92    pub resources: HashMap<String, (PathBuf, String)>,
93
94    /// table of content, list of `NavPoint` in the toc.ncx
95    pub toc: Vec<NavPoint>,
96
97    /// title of toc
98    pub toc_title: String,
99
100    /// The epub metadata stored as key -> value
101    ///
102    /// # Examples
103    ///
104    /// ```
105    /// # use epub::doc::EpubDoc;
106    /// # let doc = EpubDoc::new("test.epub");
107    /// # let doc = doc.unwrap();
108    /// let title = doc.metadata.get("title");
109    /// assert_eq!(title.unwrap(), &vec!["Todo es mío".to_string()]);
110    /// ```
111    pub metadata: HashMap<String, Vec<String>>,
112
113    /// root file base path
114    pub root_base: PathBuf,
115
116    /// root file full path
117    pub root_file: PathBuf,
118
119    /// Custom css list to inject in every xhtml file
120    pub extra_css: Vec<String>,
121
122    /// unique identifier
123    pub unique_identifier: Option<String>,
124
125    /// The id of the cover, if any
126    pub cover_id: Option<String>,
127}
128
129/// A EpubDoc used for testing purposes
130#[cfg(feature = "mock")]
131impl EpubDoc<std::io::Cursor<Vec<u8>>> {
132    pub fn mock() -> Result<Self, DocError> {
133        // binary for empty zip file so that archive can be created
134        let data: Vec<u8> = vec![
135            0x50, 0x4b, 0x05, 0x06, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
136            00, 00,
137        ];
138
139        let archive = EpubArchive::from_reader(std::io::Cursor::new(data))?;
140        Ok(Self {
141            archive,
142            spine: vec![],
143            toc: vec![],
144            resources: HashMap::new(),
145            metadata: HashMap::new(),
146            root_file: PathBuf::new(),
147            root_base: PathBuf::new(),
148            current: 0,
149            extra_css: vec![],
150            unique_identifier: None,
151            cover_id: None,
152        })
153    }
154}
155
156impl EpubDoc<BufReader<File>> {
157    /// Opens the epub file in `path`.
158    ///
159    /// Initialize some internal variables to be able to access to the epub
160    /// spine definition and to navigate through the epub.
161    ///
162    /// # Examples
163    ///
164    /// ```
165    /// use epub::doc::EpubDoc;
166    ///
167    /// let doc = EpubDoc::new("test.epub");
168    /// assert!(doc.is_ok());
169    /// ```
170    ///
171    /// # Errors
172    ///
173    /// Returns an error if the epub is broken or if the file doesn't
174    /// exists.
175    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, DocError> {
176        let path = path.as_ref();
177        let file = File::open(path)?;
178        let mut doc = Self::from_reader(BufReader::new(file))?;
179        doc.archive.path = path.to_path_buf();
180        Ok(doc)
181    }
182}
183
184impl<R: Read + Seek> EpubDoc<R> {
185    /// Opens the epub contained in `reader`.
186    ///
187    /// Initialize some internal variables to be able to access to the epub
188    /// spine definition and to navigate through the epub.
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// use epub::doc::EpubDoc;
194    /// use std::fs::File;
195    /// use std::io::{Cursor, Read};
196    ///
197    /// let mut file = File::open("test.epub").unwrap();
198    /// let mut buffer = Vec::new();
199    /// file.read_to_end(&mut buffer).unwrap();
200    ///
201    /// let cursor = Cursor::new(buffer);
202    ///
203    /// let doc = EpubDoc::from_reader(cursor);
204    /// assert!(doc.is_ok());
205    /// ```
206    ///
207    /// # Errors
208    ///
209    /// Returns an error if the epub is broken.
210    pub fn from_reader(reader: R) -> Result<Self, DocError> {
211        let mut archive = EpubArchive::from_reader(reader)?;
212
213        let container = archive.get_container_file()?;
214        let root_file = get_root_file(&container)?;
215        let base_path = root_file.parent().expect("All files have a parent");
216        let mut doc = Self {
217            archive,
218            version: EpubVersion::Version2_0,
219            spine: vec![],
220            toc: vec![],
221            toc_title: String::new(),
222            resources: HashMap::new(),
223            metadata: HashMap::new(),
224            root_file: root_file.clone(),
225            root_base: base_path.to_path_buf(),
226            current: 0,
227            extra_css: vec![],
228            unique_identifier: None,
229            cover_id: None,
230        };
231        doc.fill_resources()?;
232        Ok(doc)
233    }
234
235    /// Returns the first metadata found with this name.
236    ///
237    /// # Examples
238    ///
239    /// ```
240    /// # use epub::doc::EpubDoc;
241    /// # let doc = EpubDoc::new("test.epub");
242    /// # let doc = doc.unwrap();
243    /// let title = doc.mdata("title");
244    /// assert_eq!(title.unwrap(), "Todo es mío");
245    pub fn mdata(&self, name: &str) -> Option<String> {
246        self.metadata.get(name).and_then(|v| v.get(0).cloned())
247    }
248
249    /// Returns the id of the epub cover.
250    ///
251    /// The cover is searched in the doc metadata, by the tag `<meta name="cover" value"..">`
252    ///
253    /// # Examples
254    ///
255    /// ```rust
256    /// use epub::doc::EpubDoc;
257    ///
258    /// let doc = EpubDoc::new("test.epub");
259    /// assert!(doc.is_ok());
260    /// let mut doc = doc.unwrap();
261    ///
262    /// let cover_id = doc.get_cover_id();
263    /// ```
264    ///
265    /// This returns the cover id, which can be used to get the cover data.
266    /// The id is not guaranteed to be valid.
267    pub fn get_cover_id(&self) -> Option<String> {
268        self.cover_id.clone()
269    }
270
271    /// Returns the cover's content and mime-type
272    ///
273    /// # Examples
274    ///
275    /// ```rust,ignore
276    /// use std::fs;
277    /// use std::io::Write;
278    /// use epub::doc::EpubDoc;
279    ///
280    /// let doc = EpubDoc::new("test.epub");
281    /// assert!(doc.is_ok());
282    /// let mut doc = doc.unwrap();
283    ///
284    /// let cover_data = doc.get_cover().unwrap();
285    ///
286    /// let f = fs::File::create("/tmp/cover.png");
287    /// assert!(f.is_ok());
288    /// let mut f = f.unwrap();
289    /// let resp = f.write_all(&cover_data);
290    /// ```
291    ///
292    /// Returns [`None`] if the cover can't be found.
293    pub fn get_cover(&mut self) -> Option<(Vec<u8>, String)> {
294        let cover_id = self.get_cover_id();
295        cover_id.and_then(|cid| {
296            self.get_resource(&cid)
297        })
298    }
299
300    /// Returns Release Identifier defined at
301    /// https://www.w3.org/publishing/epub32/epub-packages.html#sec-metadata-elem-identifiers-pid
302    pub fn get_release_identifier(&self) -> Option<String> {
303        match (
304            self.unique_identifier.as_ref(),
305            self.mdata("dcterms:modified"),
306        ) {
307            (Some(unique_identifier), Some(modified)) => {
308                Some(format!("{}@{}", unique_identifier, modified))
309            }
310            _ => None,
311        }
312    }
313
314    /// Returns the resource content by full path in the epub archive
315    ///
316    /// Returns [`None`] if the path doesn't exist in the epub
317    pub fn get_resource_by_path<P: AsRef<Path>>(&mut self, path: P) -> Option<Vec<u8>> {
318        self.archive.get_entry(path).ok()
319    }
320
321    /// Returns the resource content and mime-type by the id defined in the spine
322    ///
323    /// Returns [`None`] if the id doesn't exists in the epub
324    pub fn get_resource(&mut self, id: &str) -> Option<(Vec<u8>, String)> {
325        let (path, mime) = self.resources.get(id)?;
326        let path = path.clone();
327        let mime = mime.clone();
328        let content = self.get_resource_by_path(&path)?;
329        Some((content, mime))
330    }
331
332    /// Returns the resource content by full path in the epub archive, as String
333    ///
334    /// Returns [`None`] if the path doesn't exists in the epub
335    pub fn get_resource_str_by_path<P: AsRef<Path>>(&mut self, path: P) -> Option<String> {
336        self.archive.get_entry_as_str(path).ok()
337    }
338
339    /// Returns the resource content and mime-type by the id defined in the spine, as String
340    ///
341    /// Returns [`None`] if the id doesn't exists in the epub
342    pub fn get_resource_str(&mut self, id: &str) -> Option<(String, String)> {
343        let (path, mime) = self.resources.get(id)?;
344        let mime = mime.clone();
345        let path = path.clone();
346        let content = self.get_resource_str_by_path(path)?;
347        Some((content, mime))
348    }
349
350    /// Returns the resource mime-type
351    ///
352    /// # Examples
353    ///
354    /// ```
355    /// # use epub::doc::EpubDoc;
356    /// # let doc = EpubDoc::new("test.epub");
357    /// # let doc = doc.unwrap();
358    /// let mime = doc.get_resource_mime("portada.png");
359    /// assert_eq!("image/png", mime.unwrap());
360    /// ```
361    ///
362    /// Returns [`None`] the resource can't be found.
363    pub fn get_resource_mime(&self, id: &str) -> Option<String> {
364        self.resources.get(id).map(|r| r.1.clone())
365    }
366
367    /// Returns the resource mime searching by source full path
368    ///
369    /// # Examples
370    ///
371    /// ```
372    /// # use epub::doc::EpubDoc;
373    /// # let doc = EpubDoc::new("test.epub");
374    /// # let doc = doc.unwrap();
375    /// let mime = doc.get_resource_mime_by_path("OEBPS/Images/portada.png");
376    /// assert_eq!("image/png", mime.unwrap());
377    /// ```
378    ///
379    /// Returns [`None`] the resource can't be found.
380    pub fn get_resource_mime_by_path<P: AsRef<Path>>(&self, path: P) -> Option<String> {
381        let path = path.as_ref();
382
383        self.resources.iter().find_map(|(_, r)| {
384            if r.0 == path {
385                Some(r.1.to_string())
386            } else {
387                None
388            }
389        })
390    }
391
392    /// Returns the current chapter content and mime-type
393    ///
394    /// The current follows the epub spine order. You can modify the current
395    /// calling to `go_next`, `go_prev` or `set_current` methods.
396    ///
397    /// Can return [`None`] if the epub is broken.
398    pub fn get_current(&mut self) -> Option<(Vec<u8>, String)> {
399        let current_id = self.get_current_id()?;
400        self.get_resource(&current_id)
401    }
402
403    /// See [`Self::get_current`]
404    pub fn get_current_str(&mut self) -> Option<(String, String)> {
405        let current_id = self.get_current_id()?;
406        self.get_resource_str(&current_id)
407    }
408
409    /// Returns the current chapter data, with resource uris renamed so they
410    /// have the epub:// prefix and all are relative to the root file
411    ///
412    /// This method is useful to render the content with a html engine, because inside the epub
413    /// local paths are relatives, so you can provide that content, because the engine will look
414    /// for the relative path in the filesystem and that file isn't there. You should provide files
415    /// with epub:// using [`Self::get_resource_by_path`]
416    ///
417    /// # Examples
418    ///
419    /// ```
420    /// # use epub::doc::EpubDoc;
421    /// # let mut doc = EpubDoc::new("test.epub").unwrap();
422    /// let current = doc.get_current_with_epub_uris().unwrap();
423    /// let text = String::from_utf8(current).unwrap();
424    /// assert!(text.contains("epub://OEBPS/Images/portada.png"));
425
426    /// doc.go_next();
427    /// let current = doc.get_current_with_epub_uris().unwrap();
428    /// let text = String::from_utf8(current).unwrap();
429    /// assert!(text.contains("epub://OEBPS/Styles/stylesheet.css"));
430    /// assert!(text.contains("http://creativecommons.org/licenses/by-sa/3.0/"));
431    /// ```
432    ///
433    /// # Errors
434    ///
435    /// Returns [`DocError::InvalidEpub`] if the epub is broken.
436    pub fn get_current_with_epub_uris(&mut self) -> Result<Vec<u8>, DocError> {
437        let path = self.get_current_path().ok_or(DocError::InvalidEpub)?;
438        let (current, _mime) = self.get_current().ok_or(DocError::InvalidEpub)?;
439
440        let resp = xmlutils::replace_attrs(
441            current.as_slice(),
442            |element, attr, value| match (element, attr) {
443                ("link", "href") | ("image", "href") | ("a", "href") | ("img", "src") => {
444                    build_epub_uri(&path, value)
445                }
446                _ => String::from(value),
447            },
448            &self.extra_css,
449        );
450
451        resp.map_err(From::from)
452    }
453
454    /// Returns the current chapter mimetype
455    ///
456    /// # Examples
457    ///
458    /// ```
459    /// # use epub::doc::EpubDoc;
460    /// # let doc = EpubDoc::new("test.epub");
461    /// # let doc = doc.unwrap();
462    /// let m = doc.get_current_mime();
463    /// assert_eq!("application/xhtml+xml", m.unwrap());
464    /// ```
465    ///
466    /// Can return [`None`] if the epub is broken.
467    pub fn get_current_mime(&self) -> Option<String> {
468        let current_id = self.get_current_id()?;
469        self.get_resource_mime(&current_id)
470    }
471
472    /// Returns the current chapter full path
473    ///
474    /// # Examples
475    ///
476    /// ```
477    /// # use epub::doc::EpubDoc;
478    /// # use std::path::Path;
479    /// # let doc = EpubDoc::new("test.epub");
480    /// # let doc = doc.unwrap();
481    /// let p = doc.get_current_path();
482    /// assert_eq!(Path::new("OEBPS/Text/titlepage.xhtml"), p.unwrap());
483    /// ```
484    ///
485    /// Can return [`None`] if the epub is broken.
486    pub fn get_current_path(&self) -> Option<PathBuf> {
487        let current_id = self.get_current_id()?;
488        self.resources.get(&current_id).map(|r| r.0.clone())
489    }
490
491    /// Returns the current chapter id
492    ///
493    /// # Examples
494    ///
495    /// ```
496    /// # use epub::doc::EpubDoc;
497    /// # let doc = EpubDoc::new("test.epub");
498    /// # let doc = doc.unwrap();
499    /// let id = doc.get_current_id();
500    /// assert_eq!("titlepage.xhtml", id.unwrap());
501    /// ```
502    ///
503    /// Can return [`None`] if the epub is broken.
504    pub fn get_current_id(&self) -> Option<String> {
505        self.spine.get(self.current).cloned().map(|i| i.idref)
506    }
507
508    /// Changes current to the next chapter
509    ///
510    /// # Examples
511    ///
512    /// ```
513    /// # use epub::doc::EpubDoc;
514    /// # let doc = EpubDoc::new("test.epub");
515    /// # let mut doc = doc.unwrap();
516    /// doc.go_next();
517    /// assert_eq!("000.xhtml", doc.get_current_id().unwrap());
518    ///
519    /// let len = doc.spine.len();
520    /// for i in 1..len {
521    ///     doc.go_next();
522    /// }
523    /// assert!(!doc.go_next());
524    /// ```
525    ///
526    /// Returns [`false`] if the current chapter is the last one
527    pub fn go_next(&mut self) -> bool {
528        if self.current + 1 >= self.spine.len() {
529            false
530        } else {
531            self.current += 1;
532            true
533        }
534    }
535
536    /// Changes current to the prev chapter
537    ///
538    /// # Examples
539    ///
540    /// ```
541    /// # use epub::doc::EpubDoc;
542    /// # let doc = EpubDoc::new("test.epub");
543    /// # let mut doc = doc.unwrap();
544    /// assert!(!doc.go_prev());
545    ///
546    /// doc.go_next(); // 000.xhtml
547    /// doc.go_next(); // 001.xhtml
548    /// doc.go_next(); // 002.xhtml
549    /// doc.go_prev(); // 001.xhtml
550    /// assert_eq!("001.xhtml", doc.get_current_id().unwrap());
551    /// ```
552    ///
553    /// Returns [`false`] if the current chapter is the first one
554    pub fn go_prev(&mut self) -> bool {
555        if self.current < 1 {
556            false
557        } else {
558            self.current -= 1;
559            true
560        }
561    }
562
563    /// Returns the number of chapters
564    ///
565    /// # Examples
566    ///
567    /// ```
568    /// # use epub::doc::EpubDoc;
569    /// # let doc = EpubDoc::new("test.epub");
570    /// # let mut doc = doc.unwrap();
571    /// assert_eq!(17, doc.get_num_pages());
572    /// ```
573    pub fn get_num_pages(&self) -> usize {
574        self.spine.len()
575    }
576
577    /// Returns the current chapter number, starting from 0
578    pub fn get_current_page(&self) -> usize {
579        self.current
580    }
581
582    /// Changes the current page
583    ///
584    /// # Examples
585    ///
586    /// ```
587    /// # use epub::doc::EpubDoc;
588    /// # let doc = EpubDoc::new("test.epub");
589    /// # let mut doc = doc.unwrap();
590    /// assert_eq!(0, doc.get_current_page());
591    /// doc.set_current_page(2);
592    /// assert_eq!("001.xhtml", doc.get_current_id().unwrap());
593    /// assert_eq!(2, doc.get_current_page());
594    /// assert!(!doc.set_current_page(50));
595    /// ```
596    ///
597    /// Returns [`false`] if the page is out of bounds
598    pub fn set_current_page(&mut self, n: usize) -> bool {
599        if n >= self.spine.len() {
600            false
601        } else {
602            self.current = n;
603            true
604        }
605    }
606
607    /// This will inject this css in every html page getted with
608    /// [`Self::get_current_with_epub_uris`]
609    ///
610    /// # Examples
611    ///
612    /// ```
613    /// # use epub::doc::EpubDoc;
614    /// # let doc = EpubDoc::new("test.epub");
615    /// # let mut doc = doc.unwrap();
616    /// # let _ = doc.set_current_page(2);
617    /// let extracss = "body { background-color: black; color: white }";
618    /// doc.add_extra_css(extracss);
619    /// let current = doc.get_current_with_epub_uris().unwrap();
620    /// let text = String::from_utf8(current).unwrap();
621    /// assert!(text.contains(extracss));
622    /// ```
623    pub fn add_extra_css(&mut self, css: &str) {
624        self.extra_css.push(String::from(css));
625    }
626
627    /// Function to convert a resource path to a chapter number in the spine
628    /// If the resource isn't in the spine list, None will be returned
629    ///
630    /// This method is useful to convert a toc [`NavPoint`] content to a chapter number
631    /// to be able to navigate easily
632    pub fn resource_uri_to_chapter(&self, uri: &PathBuf) -> Option<usize> {
633        for (k, (path, _mime)) in &self.resources {
634            if path == uri {
635                return self.resource_id_to_chapter(k);
636            }
637        }
638
639        None
640    }
641
642    /// Function to convert a resource id to a chapter number in the spine
643    /// If the resourse isn't in the spine list, None will be returned
644    pub fn resource_id_to_chapter(&self, uri: &str) -> Option<usize> {
645        self.spine.iter().position(|item| item.idref == uri)
646    }
647
648    fn fill_resources(&mut self) -> Result<(), DocError> {
649        let container = self.archive.get_entry(&self.root_file)?;
650        let root = xmlutils::XMLReader::parse(container.as_slice())?;
651        self.version = match root.borrow().get_attr("version") {
652            Some(v) if v == "2.0" => EpubVersion::Version2_0,
653            Some(v) if v == "3.0" => EpubVersion::Version3_0,
654            Some(v) => EpubVersion::Unknown(String::from(v)),
655            _ => EpubVersion::Unknown(String::from("Unknown")),
656        };
657        let unique_identifier_id = &root.borrow().get_attr("unique-identifier");
658
659        // resources from manifest
660        // This should be run before everything else, because other functions relies on
661        // self.resources and should be filled before calling `fill_toc`
662        let manifest = root
663            .borrow()
664            .find("manifest")
665            .ok_or(DocError::InvalidEpub)?;
666        for r in &manifest.borrow().children {
667            let item = r.borrow();
668            if self.cover_id.is_none() {
669                if let (Some(id), Some(property)) = (item.get_attr("id"), item.get_attr("properties")) {
670                    if property == "cover-image" {
671                        self.cover_id = Some(id);
672                    }
673                }
674            }
675            let _ = self.insert_resource(&item);
676        }
677
678        // items from spine
679        let spine = root.borrow().find("spine").ok_or(DocError::InvalidEpub)?;
680        for r in &spine.borrow().children {
681            let item = r.borrow();
682            let _ = self.insert_spine(&item);
683        }
684
685        // toc.ncx
686        if let Some(toc) = spine.borrow().get_attr("toc") {
687            let _ = self.fill_toc(&toc);
688        }
689
690        // metadata
691        let metadata = root
692            .borrow()
693            .find("metadata")
694            .ok_or(DocError::InvalidEpub)?;
695        for r in &metadata.borrow().children {
696            let item = r.borrow();
697            if item.name.local_name == "meta" {
698                if let (Some(k), Some(v)) = (item.get_attr("name"), item.get_attr("content")) {
699                    if k == "cover" {
700                        self.cover_id = Some(v.clone());
701                    }
702                    self.metadata.entry(k).or_default().push(v);
703                } else if let Some(k) = item.get_attr("property") {
704                    let v = item.text.clone().unwrap_or_default();
705                    self.metadata.entry(k).or_default().push(v);
706                }
707            } else {
708                let k = &item.name.local_name;
709                let v = item.text.clone().unwrap_or_default();
710                if k == "identifier"
711                    && self.unique_identifier.is_none()
712                    && unique_identifier_id.is_some()
713                {
714                    if let Some(id) = item.get_attr("id") {
715                        if &id == unique_identifier_id.as_ref().unwrap() {
716                            self.unique_identifier = Some(v.to_string());
717                        }
718                    }
719                }
720                if self.metadata.contains_key(k) {
721                    if let Some(arr) = self.metadata.get_mut(k) {
722                        arr.push(v);
723                    }
724                } else {
725                    self.metadata.insert(k.clone(), vec![v]);
726                }
727            }
728        }
729
730        Ok(())
731    }
732
733    // Forcibly converts separators in a filepath to unix separators to
734    // to ensure that ZipArchive's by_name method will retrieve the proper
735    // file. Failing to convert to unix-style on Windows causes the
736    // ZipArchive not to find the file.
737    fn convert_path_seps<P: AsRef<Path>>(&self, href: P) -> PathBuf {
738        let mut path = self.root_base.join(href);
739        if cfg!(windows) {
740            path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
741        }
742        path
743    }
744
745    fn insert_resource(&mut self, item: &xmlutils::XMLNode) -> Result<(), XMLError> {
746        let id = item
747            .get_attr("id")
748            .ok_or_else(|| XMLError::AttrNotFound("id".into()))?;
749        let href = item
750            .get_attr("href")
751            .ok_or_else(|| XMLError::AttrNotFound("href".into()))?;
752        let mtype = item
753            .get_attr("media-type")
754            .ok_or_else(|| XMLError::AttrNotFound("media-type".into()))?;
755
756        self.resources
757            .insert(id, (self.convert_path_seps(href), mtype));
758        Ok(())
759    }
760
761    fn insert_spine(&mut self, item: &xmlutils::XMLNode) -> Result<(), DocError> {
762        let idref = item
763            .get_attr("idref")
764            .ok_or_else(|| XMLError::AttrNotFound("idref".into()))?;
765        let linear = item.get_attr("linear").unwrap_or("yes".into()) == "yes";
766        let properties = item.get_attr("properties");
767        let id = item.get_attr("id");
768        self.spine.push(SpineItem { idref, id, linear, properties });
769        Ok(())
770    }
771
772    fn fill_toc(&mut self, id: &str) -> Result<(), DocError> {
773        let toc_res = self.resources.get(id).ok_or(DocError::InvalidEpub)?; // this should be turned into it's own error type, but
774
775        let container = self.archive.get_entry(&toc_res.0)?;
776        let root = xmlutils::XMLReader::parse(container.as_slice())?;
777
778        self.toc_title = root.borrow().find("docTitle").and_then(|dt| {
779            dt.borrow()
780                .children
781                .get(0)
782                .and_then(|t| t.borrow().text.clone())
783        }).unwrap_or_default();
784
785        let mapnode = root
786            .borrow()
787            .find("navMap")
788            .ok_or_else(|| XMLError::AttrNotFound("navMap".into()))?;
789
790        self.toc.append(&mut self.get_navpoints(&mapnode.borrow()));
791        self.toc.sort();
792
793        Ok(())
794    }
795
796    /// Recursively extract all navpoints from a node.
797    fn get_navpoints(&self, parent: &xmlutils::XMLNode) -> Vec<NavPoint> {
798        let mut navpoints = Vec::new();
799
800        // TODO: parse metadata (dtb:totalPageCount, dtb:depth, dtb:maxPageNumber)
801
802        for nav in &parent.children {
803            let item = nav.borrow();
804            if item.name.local_name != "navPoint" {
805                continue;
806            }
807            let play_order = item.get_attr("playOrder").and_then(|n| n.parse().ok());
808            let content = item
809                .find("content")
810                .and_then(|c| c.borrow().get_attr("src").map(|p| self.root_base.join(p)));
811
812            let label = item.find("navLabel").and_then(|l| {
813                l.borrow()
814                    .children
815                    .get(0)
816                    .and_then(|t| t.borrow().text.clone())
817            });
818
819            if let (Some(o), Some(c), Some(l)) = (play_order, content, label) {
820                let navpoint = NavPoint {
821                    label: l.clone(),
822                    content: c.clone(),
823                    children: self.get_navpoints(&item),
824                    play_order: o,
825                };
826                navpoints.push(navpoint);
827            }
828        }
829
830        navpoints.sort();
831        navpoints
832    }
833}
834
835fn get_root_file(container: &[u8]) -> Result<PathBuf, DocError> {
836    let root = xmlutils::XMLReader::parse(container)?;
837    let el = root.borrow();
838    let element = el
839        .find("rootfile")
840        .ok_or_else(|| XMLError::AttrNotFound("rootfile".into()))?;
841    let el2 = element.borrow();
842
843    let attr = el2
844        .get_attr("full-path")
845        .ok_or_else(|| XMLError::AttrNotFound("full-path".into()))?;
846
847    Ok(PathBuf::from(attr))
848}
849
850fn build_epub_uri<P: AsRef<Path>>(path: P, append: &str) -> String {
851    // allowing external links
852    if append.starts_with("http") {
853        return String::from(append);
854    }
855
856    let path = path.as_ref();
857    let mut cpath = path.to_path_buf();
858
859    // current file base dir
860    cpath.pop();
861    for p in Path::new(append).components() {
862        match p {
863            Component::ParentDir => {
864                cpath.pop();
865            }
866            Component::Normal(s) => {
867                cpath.push(s);
868            }
869            _ => {}
870        };
871    }
872
873    // If on Windows, replace all Windows path separators with Unix path separators
874    let path = if cfg!(windows) {
875        cpath.to_string_lossy().replace('\\', "/")
876    } else {
877        cpath.to_string_lossy().to_string()
878    };
879
880    format!("epub://{}", path)
881}