[go: up one dir, main page]

gif/reader/
mod.rs

1use alloc::borrow::Cow;
2use alloc::vec::Vec;
3use core::convert::{TryFrom, TryInto};
4use core::iter::FusedIterator;
5use core::mem;
6use core::num::NonZeroU64;
7use std::io;
8use std::io::prelude::*;
9
10use crate::common::{Block, Frame};
11use crate::{AnyExtension, Extension, Repeat};
12
13mod converter;
14mod decoder;
15
16pub use self::decoder::{
17    Decoded, DecodingError, DecodingFormatError, FrameDataType, FrameDecoder, OutputBuffer,
18    StreamingDecoder, Version, PLTE_CHANNELS,
19};
20
21pub use self::converter::ColorOutput;
22use self::converter::PixelConverter;
23
24#[derive(Clone, Debug)]
25/// The maximum amount of memory the decoder is allowed to use for each frame
26pub enum MemoryLimit {
27    /// Enforce no memory limit.
28    ///
29    /// If you intend to process images from unknown origins this is a potentially dangerous
30    /// constant to use, as your program could be vulnerable to decompression bombs. That is,
31    /// malicious images crafted specifically to require an enormous amount of memory to process
32    /// while having a disproportionately small file size.
33    ///
34    /// The risks for modern machines are a bit smaller as the size of each frame cannot
35    /// exceed 16GiB, but this is still a significant amount of memory.
36    Unlimited,
37    /// Limit the amount of memory that can be used for a single frame to this many bytes.
38    ///
39    /// It may not be enforced precisely due to allocator overhead
40    /// and the decoder potentially allocating small auxiliary buffers,
41    /// but it will precisely limit the size of the output buffer for each frame.
42    //
43    // The `NonZero` type is used to make FFI simpler.
44    // Due to the guaranteed niche optimization, `Unlimited` will be represented as `0`,
45    // and the whole enum as a simple `u64`.
46    Bytes(NonZeroU64),
47}
48
49impl MemoryLimit {
50    fn check_size(&self, size: usize) -> Result<(), DecodingError> {
51        match self {
52            Self::Unlimited => Ok(()),
53            Self::Bytes(limit) => {
54                if size as u64 <= limit.get() {
55                    Ok(())
56                } else {
57                    Err(DecodingError::MemoryLimit)
58                }
59            }
60        }
61    }
62
63    fn buffer_size(&self, color: ColorOutput, width: u16, height: u16) -> Option<usize> {
64        let pixels = u64::from(width) * u64::from(height);
65
66        let bytes_per_pixel = match color {
67            ColorOutput::Indexed => 1,
68            ColorOutput::RGBA => 4,
69        };
70
71        // This cannot overflow because the maximum possible value is 16GiB, well within u64 range
72        let total_bytes = pixels * bytes_per_pixel;
73
74        // On 32-bit platforms the size of the output buffer may not be representable
75        let usize_bytes = usize::try_from(total_bytes).ok()?;
76
77        match self {
78            Self::Unlimited => Some(usize_bytes),
79            Self::Bytes(limit) => {
80                if total_bytes > limit.get() {
81                    None
82                } else {
83                    Some(usize_bytes)
84                }
85            }
86        }
87    }
88
89    #[inline]
90    fn try_reserve(&self, vec: &mut Vec<u8>, additional: usize) -> Result<(), DecodingError> {
91        let len = vec
92            .len()
93            .checked_add(additional)
94            .ok_or(DecodingError::MemoryLimit)?;
95        self.check_size(len)?;
96        vec.try_reserve(additional)
97            .map_err(|_| DecodingError::OutOfMemory)?;
98        Ok(())
99    }
100}
101
102/// Options for opening a GIF decoder. [`DecodeOptions::read_info`] will start the decoder.
103#[derive(Clone, Debug)]
104pub struct DecodeOptions {
105    memory_limit: MemoryLimit,
106    color_output: ColorOutput,
107    check_frame_consistency: bool,
108    skip_frame_decoding: bool,
109    check_for_end_code: bool,
110    allow_unknown_blocks: bool,
111}
112
113impl Default for DecodeOptions {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119impl DecodeOptions {
120    /// Creates a new decoder builder
121    #[must_use]
122    #[inline]
123    pub fn new() -> Self {
124        Self {
125            memory_limit: MemoryLimit::Bytes(50_000_000.try_into().unwrap()), // 50 MB
126            color_output: ColorOutput::Indexed,
127            check_frame_consistency: false,
128            skip_frame_decoding: false,
129            check_for_end_code: false,
130            allow_unknown_blocks: false,
131        }
132    }
133
134    /// Configure how color data is decoded.
135    #[inline]
136    pub fn set_color_output(&mut self, color: ColorOutput) {
137        self.color_output = color;
138    }
139
140    /// Configure a memory limit for decoding.
141    pub fn set_memory_limit(&mut self, limit: MemoryLimit) {
142        self.memory_limit = limit;
143    }
144
145    /// Configure if frames must be within the screen descriptor.
146    ///
147    /// The default is `false`.
148    ///
149    /// When turned on, all frame descriptors being read must fit within the screen descriptor or
150    /// otherwise an error is returned and the stream left in an unspecified state.
151    ///
152    /// When turned off, frames may be arbitrarily larger or offset in relation to the screen. Many
153    /// other decoder libraries handle this in highly divergent ways. This moves all checks to the
154    /// caller, for example to emulate a specific style.
155    pub fn check_frame_consistency(&mut self, check: bool) {
156        self.check_frame_consistency = check;
157    }
158
159    /// Configure whether to skip decoding frames.
160    ///
161    /// The default is false.
162    ///
163    /// When turned on, LZW decoding is skipped. [`Decoder::read_next_frame`] will return
164    /// compressed LZW bytes in frame's data.
165    /// [`Decoder::next_frame_info`] will return the metadata of the next frame as usual.
166    /// This is useful to count frames without incurring the overhead of decoding.
167    pub fn skip_frame_decoding(&mut self, skip: bool) {
168        self.skip_frame_decoding = skip;
169    }
170
171    /// Configure if LZW encoded blocks must end with a marker end code.
172    ///
173    /// The default is `false`.
174    ///
175    /// When turned on, all image data blocks—which are LZW encoded—must contain a special bit
176    /// sequence signalling the end of the data. LZW processing terminates when this code is
177    /// encountered. The specification states that it must be the last code output by the encoder
178    /// for an image.
179    ///
180    /// When turned off then image data blocks can simply end. Note that this might silently ignore
181    /// some bits of the last or second to last byte.
182    pub fn check_lzw_end_code(&mut self, check: bool) {
183        self.check_for_end_code = check;
184    }
185
186    /// Configure if unknown blocks are allowed to be decoded.
187    ///
188    /// The default is `false`.
189    ///
190    /// When turned on, the decoder will allow unknown blocks to be in the
191    /// `BlockStart` position.
192    ///
193    /// When turned off, decoded block starts must mark an `Image`, `Extension`,
194    /// or `Trailer` block. Otherwise, the decoded image will return an error.
195    /// If an unknown block error is returned from decoding, enabling this
196    /// setting may allow for a further state of decoding on the next attempt.
197    ///
198    /// This option also allows unknown extension blocks. The decoder assumes the follow the same
199    /// block layout, i.e. a sequence of zero-length terminated sub-blocks immediately follow the
200    /// extension introducer.
201    pub fn allow_unknown_blocks(&mut self, check: bool) {
202        self.allow_unknown_blocks = check;
203    }
204
205    /// Reads the logical screen descriptor including the global color palette
206    ///
207    /// Returns a [`Decoder`]. All decoder configuration has to be done beforehand.
208    pub fn read_info<R: Read>(self, r: R) -> Result<Decoder<R>, DecodingError> {
209        Decoder::with_no_init(r, StreamingDecoder::with_options(&self), self).init()
210    }
211}
212
213struct ReadDecoder<R: Read> {
214    reader: io::BufReader<R>,
215    decoder: StreamingDecoder,
216    at_eof: bool,
217}
218
219impl<R: Read> ReadDecoder<R> {
220    #[inline(never)]
221    fn decode_next(
222        &mut self,
223        write_into: &mut OutputBuffer<'_>,
224    ) -> Result<Option<Decoded>, DecodingError> {
225        while !self.at_eof {
226            let (consumed, result) = {
227                let buf = self.reader.fill_buf()?;
228                if buf.is_empty() {
229                    return Err(DecodingError::UnexpectedEof);
230                }
231
232                self.decoder.update(buf, write_into)?
233            };
234            self.reader.consume(consumed);
235            match result {
236                Decoded::Nothing => (),
237                Decoded::BlockStart(Block::Trailer) => {
238                    self.at_eof = true;
239                }
240                result => return Ok(Some(result)),
241            }
242        }
243        Ok(None)
244    }
245
246    fn into_inner(self) -> io::BufReader<R> {
247        self.reader
248    }
249
250    fn decode_next_bytes(&mut self, out: &mut OutputBuffer<'_>) -> Result<usize, DecodingError> {
251        match self.decode_next(out)? {
252            Some(Decoded::BytesDecoded(len)) => Ok(len.get()),
253            Some(Decoded::DataEnd) => Ok(0),
254            _ => Err(DecodingError::format("unexpected data")),
255        }
256    }
257}
258/// Headers for supported extensions.
259const EXT_NAME_NETSCAPE: &[u8] = b"NETSCAPE2.0";
260const EXT_NAME_XMP: &[u8] = b"XMP DataXMP";
261const EXT_NAME_ICC: &[u8] = b"ICCRGBG1012";
262
263/// State when parsing application extension
264enum AppExtensionState {
265    /// Waiting for app name
266    None,
267    Netscape,
268    Xmp,
269    Icc,
270    Skip,
271}
272
273#[allow(dead_code)]
274/// GIF decoder. Create [`DecodeOptions`] to get started, and call [`DecodeOptions::read_info`].
275pub struct Decoder<R: Read> {
276    decoder: ReadDecoder<R>,
277    pixel_converter: PixelConverter,
278    memory_limit: MemoryLimit,
279    bg_color: Option<u8>,
280    repeat: Repeat,
281    current_frame: Frame<'static>,
282    current_frame_data_type: FrameDataType,
283    app_extension_state: AppExtensionState,
284    /// XMP metadata bytes.
285    xmp_metadata: Option<Vec<u8>>,
286    /// ICC profile bytes.
287    icc_profile: Option<Vec<u8>>,
288}
289
290impl<R> Decoder<R>
291where
292    R: Read,
293{
294    /// Create a new decoder with default options.
295    #[inline]
296    pub fn new(reader: R) -> Result<Self, DecodingError> {
297        DecodeOptions::new().read_info(reader)
298    }
299
300    /// Return a builder that allows configuring limits etc.
301    #[must_use]
302    #[inline]
303    pub fn build() -> DecodeOptions {
304        DecodeOptions::new()
305    }
306
307    fn with_no_init(reader: R, decoder: StreamingDecoder, options: DecodeOptions) -> Self {
308        Self {
309            decoder: ReadDecoder {
310                reader: io::BufReader::new(reader),
311                decoder,
312                at_eof: false,
313            },
314            bg_color: None,
315            pixel_converter: PixelConverter::new(options.color_output),
316            memory_limit: options.memory_limit.clone(),
317            repeat: Repeat::default(),
318            current_frame: Frame::default(),
319            current_frame_data_type: FrameDataType::Pixels,
320            app_extension_state: AppExtensionState::None,
321            xmp_metadata: None,
322            icc_profile: None,
323        }
324    }
325
326    fn init(mut self) -> Result<Self, DecodingError> {
327        const APP_EXTENSION: AnyExtension = AnyExtension(Extension::Application as u8);
328        loop {
329            match self.decoder.decode_next(&mut OutputBuffer::None)? {
330                Some(Decoded::BackgroundColor(bg_color)) => {
331                    self.bg_color = Some(bg_color);
332                }
333                Some(Decoded::GlobalPalette(palette)) => {
334                    self.pixel_converter.set_global_palette(palette.into());
335                }
336                Some(Decoded::SubBlock {
337                    ext: APP_EXTENSION,
338                    is_last,
339                }) => {
340                    self.read_application_extension(is_last)?;
341                }
342                Some(Decoded::HeaderEnd) => break,
343                Some(_) => {
344                    // There will be extra events when parsing application extension
345                    continue;
346                }
347                None => {
348                    return Err(DecodingError::format(
349                        "file does not contain any image data",
350                    ))
351                }
352            }
353        }
354        // If the background color is invalid, ignore it
355        if let Some(palette) = self.pixel_converter.global_palette() {
356            if self.bg_color.unwrap_or(0) as usize >= (palette.len() / PLTE_CHANNELS) {
357                self.bg_color = None;
358            }
359        }
360        Ok(self)
361    }
362
363    fn read_application_extension(&mut self, is_last: bool) -> Result<(), DecodingError> {
364        let data = self.decoder.decoder.last_ext_sub_block();
365        match self.app_extension_state {
366            AppExtensionState::None => {
367                // GIF spec requires len == 11
368                self.app_extension_state = match data {
369                    EXT_NAME_NETSCAPE => AppExtensionState::Netscape,
370                    EXT_NAME_XMP => {
371                        self.xmp_metadata = Some(Vec::new());
372                        AppExtensionState::Xmp
373                    }
374                    EXT_NAME_ICC => {
375                        self.icc_profile = Some(Vec::new());
376                        AppExtensionState::Icc
377                    }
378                    _ => AppExtensionState::Skip,
379                }
380            }
381            AppExtensionState::Netscape => {
382                if let [1, rest @ ..] = data {
383                    if let Ok(repeat) = rest.try_into().map(u16::from_le_bytes) {
384                        self.repeat = if repeat == 0 {
385                            Repeat::Infinite
386                        } else {
387                            Repeat::Finite(repeat)
388                        };
389                    }
390                }
391                self.app_extension_state = AppExtensionState::Skip;
392            }
393            AppExtensionState::Xmp => {
394                if let Some(xmp_metadata) = &mut self.xmp_metadata {
395                    // XMP is not written as a valid "pascal-string", so we need to stitch together
396                    // the text from our collected sublock-lengths.
397                    self.memory_limit
398                        .try_reserve(xmp_metadata, 1 + data.len())?;
399                    xmp_metadata.push(data.len() as u8);
400                    xmp_metadata.extend_from_slice(data);
401                    if is_last {
402                        // XMP adds a "ramp" of 257 bytes to the end of the metadata to let the "pascal-strings"
403                        // parser converge to the null byte. The ramp looks like "0x01, 0xff, .., 0x01, 0x00".
404                        // For convenience and to allow consumers to not be bothered with this implementation detail,
405                        // we cut the ramp.
406                        const RAMP_SIZE: usize = 257;
407                        if xmp_metadata.len() >= RAMP_SIZE
408                            && xmp_metadata.ends_with(&[0x03, 0x02, 0x01, 0x00])
409                            && xmp_metadata[xmp_metadata.len() - RAMP_SIZE..]
410                                .starts_with(&[0x01, 0x0ff])
411                        {
412                            xmp_metadata.truncate(xmp_metadata.len() - RAMP_SIZE);
413                        }
414                    }
415                }
416            }
417            AppExtensionState::Icc => {
418                if let Some(icc) = &mut self.icc_profile {
419                    self.memory_limit.try_reserve(icc, data.len())?;
420                    icc.extend_from_slice(data);
421                }
422            }
423            AppExtensionState::Skip => {}
424        };
425        if is_last {
426            self.app_extension_state = AppExtensionState::None;
427        }
428        Ok(())
429    }
430
431    /// Returns the next frame info
432    pub fn next_frame_info(&mut self) -> Result<Option<&Frame<'static>>, DecodingError> {
433        loop {
434            match self.decoder.decode_next(&mut OutputBuffer::None)? {
435                Some(Decoded::FrameMetadata(frame_data_type)) => {
436                    self.current_frame = self.decoder.decoder.current_frame_mut().take();
437                    self.current_frame_data_type = frame_data_type;
438                    if self.current_frame.palette.is_none() && self.global_palette().is_none() {
439                        return Err(DecodingError::format(
440                            "no color table available for current frame",
441                        ));
442                    }
443                    break;
444                }
445                Some(_) => (),
446                None => return Ok(None),
447            }
448        }
449        Ok(Some(&self.current_frame))
450    }
451
452    /// Query information about the frame previously advanced with [`Self::next_frame_info`].
453    ///
454    /// Returns `None` past the end of file.
455    pub fn current_frame_info(&self) -> Option<&Frame<'static>> {
456        if self.decoder.at_eof {
457            None
458        } else {
459            Some(&self.current_frame)
460        }
461    }
462
463    /// Reads the next frame from the image.
464    ///
465    /// Do not call `Self::next_frame_info` beforehand.
466    /// Deinterlaces the result.
467    ///
468    /// You can also call `.into_iter()` on the decoder to use it as a regular iterator.
469    pub fn read_next_frame(&mut self) -> Result<Option<&Frame<'static>>, DecodingError> {
470        if self.next_frame_info()?.is_some() {
471            match self.current_frame_data_type {
472                FrameDataType::Pixels => {
473                    self.pixel_converter.read_frame(
474                        &mut self.current_frame,
475                        &mut |out| self.decoder.decode_next_bytes(out),
476                        &self.memory_limit,
477                    )?;
478                }
479                FrameDataType::Lzw { min_code_size } => {
480                    let mut vec = if matches!(self.current_frame.buffer, Cow::Owned(_)) {
481                        let mut vec =
482                            mem::replace(&mut self.current_frame.buffer, Cow::Borrowed(&[]))
483                                .into_owned();
484                        vec.clear();
485                        vec
486                    } else {
487                        Vec::new()
488                    };
489                    // Guesstimate 2bpp
490                    vec.try_reserve(
491                        usize::from(self.current_frame.width)
492                            * usize::from(self.current_frame.height)
493                            / 4,
494                    )
495                    .map_err(|_| DecodingError::OutOfMemory)?;
496                    self.copy_lzw_into_buffer(min_code_size, &mut vec)?;
497                    self.current_frame.buffer = Cow::Owned(vec);
498                }
499            }
500            Ok(Some(&self.current_frame))
501        } else {
502            Ok(None)
503        }
504    }
505
506    /// This is private for iterator's use
507    fn take_current_frame(&mut self) -> Option<Frame<'static>> {
508        if self.current_frame.buffer.is_empty() {
509            return None;
510        }
511        Some(self.current_frame.take())
512    }
513
514    /// Reads the data of the current frame into a pre-allocated buffer.
515    ///
516    /// `Self::next_frame_info` needs to be called beforehand.
517    /// The length of `buf` must be at least `Self::buffer_size`.
518    /// Deinterlaces the result.
519    pub fn read_into_buffer(&mut self, buf: &mut [u8]) -> Result<(), DecodingError> {
520        self.pixel_converter
521            .read_into_buffer(&self.current_frame, buf, &mut |out| {
522                self.decoder.decode_next_bytes(out)
523            })
524    }
525
526    fn copy_lzw_into_buffer(
527        &mut self,
528        min_code_size: u8,
529        buf: &mut Vec<u8>,
530    ) -> Result<(), DecodingError> {
531        // `write_lzw_pre_encoded_frame` smuggles `min_code_size` in the first byte.
532        buf.push(min_code_size);
533        loop {
534            match self.decoder.decode_next(&mut OutputBuffer::Vec(buf))? {
535                Some(Decoded::LzwDataCopied(_len)) => {}
536                Some(Decoded::DataEnd) => return Ok(()),
537                _ => return Err(DecodingError::format("unexpected data")),
538            }
539        }
540    }
541
542    /// Reads data of the current frame into a pre-allocated buffer until the buffer has been
543    /// filled completely.
544    ///
545    /// The buffer length must be an even number of pixels (multiple of 4 if decoding RGBA).
546    ///
547    /// `Self::next_frame_info` needs to be called beforehand. Returns `true` if the supplied
548    /// buffer could be filled completely. Should not be called after `false` had been returned.
549    pub fn fill_buffer(&mut self, buf: &mut [u8]) -> Result<bool, DecodingError> {
550        self.pixel_converter
551            .fill_buffer(&self.current_frame, buf, &mut |out| {
552                self.decoder.decode_next_bytes(out)
553            })
554    }
555
556    /// Output buffer size
557    pub fn buffer_size(&self) -> usize {
558        self.pixel_converter
559            .buffer_size(&self.current_frame)
560            .unwrap()
561    }
562
563    /// Line length of the current frame
564    pub fn line_length(&self) -> usize {
565        self.pixel_converter.line_length(&self.current_frame)
566    }
567
568    /// Returns the color palette relevant for the frame that has been decoded
569    #[inline]
570    pub fn palette(&self) -> Result<&[u8], DecodingError> {
571        Ok(match self.current_frame.palette {
572            Some(ref table) => table,
573            None => self.global_palette().ok_or_else(|| {
574                DecodingError::format("no color table available for current frame")
575            })?,
576        })
577    }
578
579    /// The global color palette
580    pub fn global_palette(&self) -> Option<&[u8]> {
581        self.pixel_converter.global_palette()
582    }
583
584    /// Width of the image
585    #[inline]
586    pub fn width(&self) -> u16 {
587        self.decoder.decoder.width()
588    }
589
590    /// Height of the image
591    #[inline]
592    pub fn height(&self) -> u16 {
593        self.decoder.decoder.height()
594    }
595
596    /// XMP metadata stored in the image.
597    #[inline]
598    #[must_use]
599    pub fn xmp_metadata(&self) -> Option<&[u8]> {
600        self.xmp_metadata.as_deref()
601    }
602
603    /// ICC profile stored in the image.
604    #[inline]
605    #[must_use]
606    pub fn icc_profile(&self) -> Option<&[u8]> {
607        self.icc_profile.as_deref()
608    }
609
610    /// Abort decoding and recover the `io::Read` instance
611    pub fn into_inner(self) -> io::BufReader<R> {
612        self.decoder.into_inner()
613    }
614
615    /// Index of the background color in the global palette
616    ///
617    /// In practice this is not used, and the background is
618    /// always transparent
619    pub fn bg_color(&self) -> Option<usize> {
620        self.bg_color.map(|v| v as usize)
621    }
622
623    /// Number of loop repetitions
624    #[inline]
625    pub fn repeat(&self) -> Repeat {
626        self.repeat
627    }
628}
629
630impl<R: Read> IntoIterator for Decoder<R> {
631    type Item = Result<Frame<'static>, DecodingError>;
632    type IntoIter = DecoderIter<R>;
633
634    #[inline]
635    fn into_iter(self) -> Self::IntoIter {
636        DecoderIter {
637            inner: self,
638            ended: false,
639        }
640    }
641}
642
643/// Use `decoder.into_iter()` to iterate over the frames
644pub struct DecoderIter<R: Read> {
645    inner: Decoder<R>,
646    ended: bool,
647}
648
649impl<R: Read> DecoderIter<R> {
650    /// Abort decoding and recover the `io::Read` instance
651    ///
652    /// Use `for frame in iter.by_ref()` to be able to call this afterwards.
653    pub fn into_inner(self) -> io::BufReader<R> {
654        self.inner.into_inner()
655    }
656}
657
658impl<R: Read> FusedIterator for DecoderIter<R> {}
659
660impl<R: Read> Iterator for DecoderIter<R> {
661    type Item = Result<Frame<'static>, DecodingError>;
662
663    fn next(&mut self) -> Option<Self::Item> {
664        if !self.ended {
665            match self.inner.read_next_frame() {
666                Ok(Some(_)) => self.inner.take_current_frame().map(Ok),
667                Ok(None) => {
668                    self.ended = true;
669                    None
670                }
671                Err(err) => {
672                    self.ended = true;
673                    Some(Err(err))
674                }
675            }
676        } else {
677            None
678        }
679    }
680}