[go: up one dir, main page]

ureq/body/
mod.rs

1use std::fmt;
2use std::io;
3use std::sync::Arc;
4
5pub use build::BodyBuilder;
6use ureq_proto::http::header;
7use ureq_proto::BodyMode;
8
9use crate::http;
10use crate::run::BodyHandler;
11use crate::Error;
12
13use self::limit::LimitReader;
14use self::lossy::LossyUtf8Reader;
15
16mod build;
17mod limit;
18mod lossy;
19
20#[cfg(feature = "charset")]
21mod charset;
22
23#[cfg(feature = "gzip")]
24mod gzip;
25
26#[cfg(feature = "brotli")]
27mod brotli;
28
29/// Default max body size for read_to_string() and read_to_vec().
30const MAX_BODY_SIZE: u64 = 10 * 1024 * 1024;
31
32/// A response body returned as [`http::Response<Body>`].
33///
34/// # Default size limit
35///
36/// Methods like `read_to_string()`, `read_to_vec()`, and `read_json()` have a **default 10MB limit**
37/// to prevent memory exhaustion. To download larger files, use `with_config().limit(new_size)`:
38///
39/// ```
40/// // Download a 20MB file
41/// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
42///     .call()?
43///     .body_mut()
44///     .with_config()
45///     .limit(20 * 1024 * 1024) // 20MB
46///     .read_to_vec()?;
47/// # Ok::<_, ureq::Error>(())
48/// ```
49///
50/// # Body lengths
51///
52/// HTTP/1.1 has two major modes of transfering body data. Either a `Content-Length`
53/// header defines exactly how many bytes to transfer, or `Transfer-Encoding: chunked`
54/// facilitates a streaming style when the size is not known up front.
55///
56/// To protect against a problem called [request smuggling], ureq has heuristics for
57/// how to interpret a server sending both `Transfer-Encoding` and `Content-Length` headers.
58///
59/// 1. `chunked` takes precedence if there both headers are present (not for HTTP/1.0)
60/// 2. `content-length` is used if there is no chunked
61/// 3. If there are no headers, fall back on "close delimited" meaning the socket
62///    must close to end the body
63///
64/// When a `Content-Length` header is used, ureq will ensure the received body is _EXACTLY_
65/// as many bytes as declared (it cannot be less). This mechanic is in `ureq-proto`
66/// and is different to the [`BodyWithConfig::limit()`] below.
67///
68/// # Pool reuse
69///
70/// To return a connection (aka [`Transport`][crate::unversioned::transport::Transport])
71/// to the Agent's pool, the body must be read to end. If [`BodyWithConfig::limit()`] is set
72/// shorter size than the actual response body, the connection will not be reused.
73///
74/// # Example
75///
76/// ```
77/// use std::io::Read;
78/// let mut res = ureq::get("http://httpbin.org/bytes/100")
79///     .call()?;
80///
81/// assert!(res.headers().contains_key("Content-Length"));
82/// let len: usize = res.headers().get("Content-Length")
83///     .unwrap().to_str().unwrap().parse().unwrap();
84///
85/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
86/// res.body_mut().as_reader()
87///     .read_to_end(&mut bytes)?;
88///
89/// assert_eq!(bytes.len(), len);
90/// # Ok::<_, ureq::Error>(())
91/// ```
92///
93/// [request smuggling]: https://en.wikipedia.org/wiki/HTTP_request_smuggling
94pub struct Body {
95    source: BodyDataSource,
96    info: Arc<ResponseInfo>,
97}
98
99enum BodyDataSource {
100    Handler(Box<BodyHandler>),
101    Reader(Box<dyn io::Read + Send + Sync>),
102}
103
104#[derive(Clone)]
105pub(crate) struct ResponseInfo {
106    content_encoding: ContentEncoding,
107    mime_type: Option<String>,
108    charset: Option<String>,
109    body_mode: BodyMode,
110}
111
112impl Body {
113    /// Builder for creating a body
114    ///
115    /// This is useful for testing, or for [`Middleware`][crate::middleware::Middleware] that
116    /// returns another body than the requested one.
117    pub fn builder() -> BodyBuilder {
118        BodyBuilder::new()
119    }
120
121    pub(crate) fn new(handler: BodyHandler, info: ResponseInfo) -> Self {
122        Body {
123            source: BodyDataSource::Handler(Box::new(handler)),
124            info: Arc::new(info),
125        }
126    }
127
128    /// The mime-type of the `content-type` header.
129    ///
130    /// For the below header, we would get `Some("text/plain")`:
131    ///
132    /// ```text
133    ///     Content-Type: text/plain; charset=iso-8859-1
134    /// ```
135    ///
136    /// *Caution:* A bad server might set `Content-Type` to one thing and send
137    /// something else. There is no way ureq can verify this.
138    ///
139    /// # Example
140    ///
141    /// ```
142    /// let res = ureq::get("https://www.google.com/")
143    ///     .call()?;
144    ///
145    /// assert_eq!(res.body().mime_type(), Some("text/html"));
146    /// # Ok::<_, ureq::Error>(())
147    /// ```
148    pub fn mime_type(&self) -> Option<&str> {
149        self.info.mime_type.as_deref()
150    }
151
152    /// The charset of the `content-type` header.
153    ///
154    /// For the below header, we would get `Some("iso-8859-1")`:
155    ///
156    /// ```text
157    ///     Content-Type: text/plain; charset=iso-8859-1
158    /// ```
159    ///
160    /// *Caution:* A bad server might set `Content-Type` to one thing and send
161    /// something else. There is no way ureq can verify this.
162    ///
163    /// # Example
164    ///
165    /// ```
166    /// let res = ureq::get("https://www.google.com/")
167    ///     .call()?;
168    ///
169    /// assert_eq!(res.body().charset(), Some("ISO-8859-1"));
170    /// # Ok::<_, ureq::Error>(())
171    /// ```
172    pub fn charset(&self) -> Option<&str> {
173        self.info.charset.as_deref()
174    }
175
176    /// The content length of the body.
177    ///
178    /// This is the value of the `Content-Length` header, if there is one. For chunked
179    /// responses (`Transfer-Encoding: chunked`) , this will be `None`. Similarly for
180    /// HTTP/1.0 without a `Content-Length` header, the response is close delimited,
181    /// which means the length is unknown.
182    ///
183    /// A bad server might set `Content-Length` to one thing and send something else.
184    /// ureq will double check this, see section on body length heuristics.
185    ///
186    /// # Example
187    ///
188    /// ```
189    /// let res = ureq::get("https://httpbin.org/bytes/100")
190    ///     .call()?;
191    ///
192    /// assert_eq!(res.body().content_length(), Some(100));
193    /// # Ok::<_, ureq::Error>(())
194    /// ```
195    pub fn content_length(&self) -> Option<u64> {
196        match self.info.body_mode {
197            BodyMode::NoBody => None,
198            BodyMode::LengthDelimited(v) => Some(v),
199            BodyMode::Chunked => None,
200            BodyMode::CloseDelimited => None,
201        }
202    }
203
204    /// Handle this body as a shared `impl Read` of the body.
205    ///
206    /// This is the regular API which goes via [`http::Response::body_mut()`] to get a
207    /// mut reference to the `Body`, and then use `as_reader()`. It is also possible to
208    /// get a non-shared, owned reader via [`Body::into_reader()`].
209    ///
210    /// * Reader is not limited by default. That means a malicious server could
211    ///   exhaust all avaliable memory on your client machine.
212    ///   To set a limit use [`Body::into_with_config()`].
213    /// * Reader will error if `Content-Length` is set, but the connection is closed
214    ///   before all bytes are received.
215    ///
216    /// # Example
217    ///
218    /// ```
219    /// use std::io::Read;
220    ///
221    /// let mut res = ureq::get("http://httpbin.org/bytes/100")
222    ///     .call()?;
223    ///
224    /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
225    /// res.body_mut().as_reader()
226    ///     .read_to_end(&mut bytes)?;
227    /// # Ok::<_, ureq::Error>(())
228    /// ```
229    pub fn as_reader(&mut self) -> BodyReader {
230        self.with_config().reader()
231    }
232
233    /// Turn this response into an owned `impl Read` of the body.
234    ///
235    /// Sometimes it might be useful to disconnect the body reader from the body.
236    /// The reader returned by [`Body::as_reader()`] borrows the body, while this
237    /// variant consumes the body and turns it into a reader with lifetime `'static`.
238    /// The reader can for instance be sent to another thread.
239    ///
240    /// * Reader is not limited by default. That means a malicious server could
241    ///   exhaust all avaliable memory on your client machine.
242    ///   To set a limit use [`Body::into_with_config()`].
243    /// * Reader will error if `Content-Length` is set, but the connection is closed
244    ///   before all bytes are received.
245    ///
246    /// ```
247    /// use std::io::Read;
248    ///
249    /// let res = ureq::get("http://httpbin.org/bytes/100")
250    ///     .call()?;
251    ///
252    /// let (_, body) = res.into_parts();
253    ///
254    /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
255    /// body.into_reader()
256    ///     .read_to_end(&mut bytes)?;
257    /// # Ok::<_, ureq::Error>(())
258    /// ```
259    pub fn into_reader(self) -> BodyReader<'static> {
260        self.into_with_config().reader()
261    }
262
263    /// Read the response as a string.
264    ///
265    /// * Response is limited to 10MB
266    /// * Replaces incorrect utf-8 chars to `?`
267    ///
268    /// To change these defaults use [`Body::with_config()`].
269    ///
270    /// ```
271    /// let mut res = ureq::get("http://httpbin.org/robots.txt")
272    ///     .call()?;
273    ///
274    /// let s = res.body_mut().read_to_string()?;
275    /// assert_eq!(s, "User-agent: *\nDisallow: /deny\n");
276    /// # Ok::<_, ureq::Error>(())
277    /// ```
278    ///
279    /// For larger text files, you must explicitly increase the limit:
280    ///
281    /// ```
282    /// // Read a large text file (25MB)
283    /// let text = ureq::get("http://httpbin.org/get")
284    ///     .call()?
285    ///     .body_mut()
286    ///     .with_config()
287    ///     .limit(25 * 1024 * 1024) // 25MB
288    ///     .read_to_string()?;
289    /// # Ok::<_, ureq::Error>(())
290    /// ```
291    pub fn read_to_string(&mut self) -> Result<String, Error> {
292        self.with_config()
293            .limit(MAX_BODY_SIZE)
294            .lossy_utf8(true)
295            .read_to_string()
296    }
297
298    /// Read the response to a vec.
299    ///
300    /// * Response is limited to 10MB.
301    ///
302    /// To change this default use [`Body::with_config()`].
303    /// ```
304    /// let mut res = ureq::get("http://httpbin.org/bytes/100")
305    ///     .call()?;
306    ///
307    /// let bytes = res.body_mut().read_to_vec()?;
308    /// assert_eq!(bytes.len(), 100);
309    /// # Ok::<_, ureq::Error>(())
310    /// ```
311    ///
312    /// For larger files, you must explicitly increase the limit:
313    ///
314    /// ```
315    /// // Download a larger file (50MB)
316    /// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
317    ///     .call()?
318    ///     .body_mut()
319    ///     .with_config()
320    ///     .limit(50 * 1024 * 1024) // 50MB
321    ///     .read_to_vec()?;
322    /// # Ok::<_, ureq::Error>(())
323    /// ```
324    pub fn read_to_vec(&mut self) -> Result<Vec<u8>, Error> {
325        self.with_config()
326            //
327            .limit(MAX_BODY_SIZE)
328            .read_to_vec()
329    }
330
331    /// Read the response from JSON.
332    ///
333    /// * Response is limited to 10MB.
334    ///
335    /// To change this default use [`Body::with_config()`].
336    ///
337    /// The returned value is something that derives [`Deserialize`](serde::Deserialize).
338    /// You might need to be explicit with which type you want. See example below.
339    ///
340    /// ```
341    /// use serde::Deserialize;
342    ///
343    /// #[derive(Deserialize)]
344    /// struct BodyType {
345    ///   slideshow: BodyTypeInner,
346    /// }
347    ///
348    /// #[derive(Deserialize)]
349    /// struct BodyTypeInner {
350    ///   author: String,
351    /// }
352    ///
353    /// let body = ureq::get("https://httpbin.org/json")
354    ///     .call()?
355    ///     .body_mut()
356    ///     .read_json::<BodyType>()?;
357    ///
358    /// assert_eq!(body.slideshow.author, "Yours Truly");
359    /// # Ok::<_, ureq::Error>(())
360    /// ```
361    ///
362    /// For larger JSON files, you must explicitly increase the limit:
363    ///
364    /// ```
365    /// use serde_json::Value;
366    ///
367    /// // Parse a large JSON file (30MB)
368    /// let json: Value = ureq::get("https://httpbin.org/json")
369    ///     .call()?
370    ///     .body_mut()
371    ///     .with_config()
372    ///     .limit(30 * 1024 * 1024) // 30MB
373    ///     .read_json()?;
374    /// # Ok::<_, ureq::Error>(())
375    /// ```
376    #[cfg(feature = "json")]
377    pub fn read_json<T: serde::de::DeserializeOwned>(&mut self) -> Result<T, Error> {
378        let reader = self.with_config().limit(MAX_BODY_SIZE).reader();
379        let value: T = serde_json::from_reader(reader)?;
380        Ok(value)
381    }
382
383    /// Read the body data with configuration.
384    ///
385    /// This borrows the body which gives easier use with [`http::Response::body_mut()`].
386    /// To get a non-borrowed reader use [`Body::into_with_config()`].
387    ///
388    /// # Example
389    ///
390    /// ```
391    /// let reader = ureq::get("http://httpbin.org/bytes/100")
392    ///     .call()?
393    ///     .body_mut()
394    ///     .with_config()
395    ///     // Reader will only read 50 bytes
396    ///     .limit(50)
397    ///     .reader();
398    /// # Ok::<_, ureq::Error>(())
399    /// ```
400    pub fn with_config(&mut self) -> BodyWithConfig {
401        let handler = (&mut self.source).into();
402        BodyWithConfig::new(handler, self.info.clone())
403    }
404
405    /// Consume self and read the body with configuration.
406    ///
407    /// This consumes self and returns a reader with `'static` lifetime.
408    ///
409    /// # Example
410    ///
411    /// ```
412    /// // Get the body out of http::Response
413    /// let (_, body) = ureq::get("http://httpbin.org/bytes/100")
414    ///     .call()?
415    ///     .into_parts();
416    ///
417    /// let reader = body
418    ///     .into_with_config()
419    ///     // Reader will only read 50 bytes
420    ///     .limit(50)
421    ///     .reader();
422    /// # Ok::<_, ureq::Error>(())
423    /// ```
424    ///
425    /// This limit behavior can be used to prevent a malicious server from exhausting
426    /// memory on the client machine. For example, if the machine running
427    /// ureq has 1GB of RAM, you could protect the machine by setting a smaller
428    /// limit such as 128MB. The exact number will vary by your client's download
429    /// needs, available system resources, and system utilization.
430    pub fn into_with_config(self) -> BodyWithConfig<'static> {
431        let handler = self.source.into();
432        BodyWithConfig::new(handler, self.info)
433    }
434}
435
436/// Configuration of how to read the body.
437///
438/// Obtained via one of:
439///
440/// * [Body::with_config()]
441/// * [Body::into_with_config()]
442///
443/// # Handling large responses
444///
445/// The `BodyWithConfig` is the primary way to increase the default 10MB size limit
446/// when downloading large files to memory:
447///
448/// ```
449/// // Download a 50MB file
450/// let large_data = ureq::get("http://httpbin.org/bytes/200000000")
451///     .call()?
452///     .body_mut()
453///     .with_config()
454///     .limit(50 * 1024 * 1024) // 50MB
455///     .read_to_vec()?;
456/// # Ok::<_, ureq::Error>(())
457/// ```
458pub struct BodyWithConfig<'a> {
459    handler: BodySourceRef<'a>,
460    info: Arc<ResponseInfo>,
461    limit: u64,
462    lossy_utf8: bool,
463}
464
465impl<'a> BodyWithConfig<'a> {
466    fn new(handler: BodySourceRef<'a>, info: Arc<ResponseInfo>) -> Self {
467        BodyWithConfig {
468            handler,
469            info,
470            limit: u64::MAX,
471            lossy_utf8: false,
472        }
473    }
474
475    /// Limit the response body.
476    ///
477    /// Controls how many bytes we should read before throwing an error. This is used
478    /// to ensure RAM isn't exhausted by a server sending a very large response body.
479    ///
480    /// The default limit is `u64::MAX` (unlimited).
481    pub fn limit(mut self, value: u64) -> Self {
482        self.limit = value;
483        self
484    }
485
486    /// Replace invalid utf-8 chars.
487    ///
488    /// `true` means that broken utf-8 characters are replaced by a question mark `?`
489    /// (not utf-8 replacement char). This happens after charset conversion regardless of
490    /// whether the **charset** feature is enabled or not.
491    ///
492    /// The default is `false`.
493    pub fn lossy_utf8(mut self, value: bool) -> Self {
494        self.lossy_utf8 = value;
495        self
496    }
497
498    fn do_build(self) -> BodyReader<'a> {
499        BodyReader::new(
500            LimitReader::new(self.handler, self.limit),
501            &self.info,
502            self.info.body_mode,
503            self.lossy_utf8,
504        )
505    }
506
507    /// Creates a reader.
508    ///
509    /// The reader is either shared or owned, depending on `with_config` or `into_with_config`.
510    ///
511    /// # Example of owned vs shared
512    ///
513    /// ```
514    /// // Creates an owned reader.
515    /// let reader = ureq::get("https://httpbin.org/get")
516    ///     .call()?
517    ///     .into_body()
518    ///     // takes ownership of Body
519    ///     .into_with_config()
520    ///     .limit(10)
521    ///     .reader();
522    /// # Ok::<_, ureq::Error>(())
523    /// ```
524    ///
525    /// ```
526    /// // Creates a shared reader.
527    /// let reader = ureq::get("https://httpbin.org/get")
528    ///     .call()?
529    ///     .body_mut()
530    ///     // borrows Body
531    ///     .with_config()
532    ///     .limit(10)
533    ///     .reader();
534    /// # Ok::<_, ureq::Error>(())
535    /// ```
536    pub fn reader(self) -> BodyReader<'a> {
537        self.do_build()
538    }
539
540    /// Read into string.
541    ///
542    /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
543    /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
544    ///
545    /// # Example
546    ///
547    /// ```
548    /// // Reads max 10k to a String.
549    /// let string = ureq::get("https://httpbin.org/get")
550    ///     .call()?
551    ///     .body_mut()
552    ///     .with_config()
553    ///     // Important. Limits body to 10k
554    ///     .limit(10_000)
555    ///     .read_to_string()?;
556    /// # Ok::<_, ureq::Error>(())
557    /// ```
558    pub fn read_to_string(self) -> Result<String, Error> {
559        use std::io::Read;
560        let mut reader = self.do_build();
561        let mut buf = String::new();
562        reader.read_to_string(&mut buf)?;
563        Ok(buf)
564    }
565
566    /// Read into vector.
567    ///
568    /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
569    /// becomes an unbounded sized `Vec`. A bad server could exhaust your memory.
570    ///
571    /// # Example
572    ///
573    /// ```
574    /// // Reads max 10k to a Vec.
575    /// let myvec = ureq::get("https://httpbin.org/get")
576    ///     .call()?
577    ///     .body_mut()
578    ///     .with_config()
579    ///     // Important. Limits body to 10k
580    ///     .limit(10_000)
581    ///     .read_to_vec()?;
582    /// # Ok::<_, ureq::Error>(())
583    /// ```
584    pub fn read_to_vec(self) -> Result<Vec<u8>, Error> {
585        use std::io::Read;
586        let mut reader = self.do_build();
587        let mut buf = Vec::new();
588        reader.read_to_end(&mut buf)?;
589        Ok(buf)
590    }
591
592    /// Read JSON body.
593    ///
594    /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
595    /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
596    ///
597    /// # Example
598    ///
599    /// ```
600    /// use serde_json::Value;
601    ///
602    /// // Reads max 10k as a JSON value.
603    /// let json: Value  = ureq::get("https://httpbin.org/get")
604    ///     .call()?
605    ///     .body_mut()
606    ///     .with_config()
607    ///     // Important. Limits body to 10k
608    ///     .limit(10_000)
609    ///     .read_json()?;
610    /// # Ok::<_, ureq::Error>(())
611    /// ```
612    #[cfg(feature = "json")]
613    pub fn read_json<T: serde::de::DeserializeOwned>(self) -> Result<T, Error> {
614        let reader = self.do_build();
615        let value: T = serde_json::from_reader(reader)?;
616        Ok(value)
617    }
618}
619
620#[derive(Debug, Clone, Copy)]
621enum ContentEncoding {
622    None,
623    Gzip,
624    Brotli,
625    Unknown,
626}
627
628impl ResponseInfo {
629    pub fn new(headers: &http::HeaderMap, body_mode: BodyMode) -> Self {
630        let content_encoding = headers
631            .get(header::CONTENT_ENCODING)
632            .and_then(|v| v.to_str().ok())
633            .map(ContentEncoding::from)
634            .unwrap_or(ContentEncoding::None);
635
636        let (mime_type, charset) = headers
637            .get(header::CONTENT_TYPE)
638            .and_then(|v| v.to_str().ok())
639            .map(split_content_type)
640            .unwrap_or((None, None));
641
642        ResponseInfo {
643            content_encoding,
644            mime_type,
645            charset,
646            body_mode,
647        }
648    }
649
650    /// Whether the mime type indicats text.
651    fn is_text(&self) -> bool {
652        self.mime_type
653            .as_deref()
654            .map(|s| s.starts_with("text/"))
655            .unwrap_or(false)
656    }
657}
658
659fn split_content_type(content_type: &str) -> (Option<String>, Option<String>) {
660    // Content-Type: text/plain; charset=iso-8859-1
661    let mut split = content_type.split(';');
662
663    let Some(mime_type) = split.next() else {
664        return (None, None);
665    };
666
667    let mut charset = None;
668
669    for maybe_charset in split {
670        let maybe_charset = maybe_charset.trim();
671        if let Some(s) = maybe_charset.strip_prefix("charset=") {
672            charset = Some(s.to_string());
673        }
674    }
675
676    (Some(mime_type.to_string()), charset)
677}
678
679/// A reader of the response data.
680///
681/// 1. If `Transfer-Encoding: chunked`, the returned reader will unchunk it
682///    and any `Content-Length` header is ignored.
683/// 2. If `Content-Encoding: gzip` (or `br`) and the corresponding feature
684///    flag is enabled (**gzip** and **brotli**), decompresses the body data.
685/// 3. Given a header like `Content-Type: text/plain; charset=ISO-8859-1`
686///    and the **charset** feature enabled, will translate the body to utf-8.
687///    This mechanic need two components a mime-type starting `text/` and
688///    a non-utf8 charset indication.
689/// 4. If `Content-Length` is set, the returned reader is limited to this byte
690///    length regardless of how many bytes the server sends.
691/// 5. If no length header, the reader is until server stream end.
692/// 6. The limit in the body method used to obtain the reader.
693///
694/// Note: The reader is also limited by the [`Body::as_reader`] and
695/// [`Body::into_reader`] calls. If that limit is set very high, a malicious
696/// server might return enough bytes to exhaust available memory. If you're
697/// making requests to untrusted servers, you should use set that
698/// limit accordingly.
699///
700/// # Example
701///
702/// ```
703/// use std::io::Read;
704/// let mut res = ureq::get("http://httpbin.org/bytes/100")
705///     .call()?;
706///
707/// assert!(res.headers().contains_key("Content-Length"));
708/// let len: usize = res.headers().get("Content-Length")
709///     .unwrap().to_str().unwrap().parse().unwrap();
710///
711/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
712/// res.body_mut().as_reader()
713///     .read_to_end(&mut bytes)?;
714///
715/// assert_eq!(bytes.len(), len);
716/// # Ok::<_, ureq::Error>(())
717/// ```
718pub struct BodyReader<'a> {
719    reader: MaybeLossyDecoder<CharsetDecoder<ContentDecoder<LimitReader<BodySourceRef<'a>>>>>,
720    // If this reader is used as SendBody for another request, this
721    // body mode can indiciate the content-length. Gzip, charset etc
722    // would mean input is not same as output.
723    outgoing_body_mode: BodyMode,
724}
725
726impl<'a> BodyReader<'a> {
727    fn new(
728        reader: LimitReader<BodySourceRef<'a>>,
729        info: &ResponseInfo,
730        incoming_body_mode: BodyMode,
731        lossy_utf8: bool,
732    ) -> BodyReader<'a> {
733        // This is outgoing body_mode in case we are using the BodyReader as a send body
734        // in a proxy situation.
735        let mut outgoing_body_mode = incoming_body_mode;
736
737        let reader = match info.content_encoding {
738            ContentEncoding::None | ContentEncoding::Unknown => ContentDecoder::PassThrough(reader),
739            #[cfg(feature = "gzip")]
740            ContentEncoding::Gzip => {
741                debug!("Decoding gzip");
742                outgoing_body_mode = BodyMode::Chunked;
743                ContentDecoder::Gzip(Box::new(gzip::GzipDecoder::new(reader)))
744            }
745            #[cfg(not(feature = "gzip"))]
746            ContentEncoding::Gzip => ContentDecoder::PassThrough(reader),
747            #[cfg(feature = "brotli")]
748            ContentEncoding::Brotli => {
749                debug!("Decoding brotli");
750                outgoing_body_mode = BodyMode::Chunked;
751                ContentDecoder::Brotli(Box::new(brotli::BrotliDecoder::new(reader)))
752            }
753            #[cfg(not(feature = "brotli"))]
754            ContentEncoding::Brotli => ContentDecoder::PassThrough(reader),
755        };
756
757        let reader = if info.is_text() {
758            charset_decoder(
759                reader,
760                info.mime_type.as_deref(),
761                info.charset.as_deref(),
762                &mut outgoing_body_mode,
763            )
764        } else {
765            CharsetDecoder::PassThrough(reader)
766        };
767
768        let reader = if info.is_text() && lossy_utf8 {
769            MaybeLossyDecoder::Lossy(LossyUtf8Reader::new(reader))
770        } else {
771            MaybeLossyDecoder::PassThrough(reader)
772        };
773
774        BodyReader {
775            outgoing_body_mode,
776            reader,
777        }
778    }
779
780    pub(crate) fn body_mode(&self) -> BodyMode {
781        self.outgoing_body_mode
782    }
783}
784
785#[allow(unused)]
786fn charset_decoder<R: io::Read>(
787    reader: R,
788    mime_type: Option<&str>,
789    charset: Option<&str>,
790    body_mode: &mut BodyMode,
791) -> CharsetDecoder<R> {
792    #[cfg(feature = "charset")]
793    {
794        use encoding_rs::{Encoding, UTF_8};
795
796        let from = charset
797            .and_then(|c| Encoding::for_label(c.as_bytes()))
798            .unwrap_or(UTF_8);
799
800        if from == UTF_8 {
801            // Do nothing
802            CharsetDecoder::PassThrough(reader)
803        } else {
804            debug!("Decoding charset {}", from.name());
805            *body_mode = BodyMode::Chunked;
806            CharsetDecoder::Decoder(self::charset::CharCodec::new(reader, from, UTF_8))
807        }
808    }
809
810    #[cfg(not(feature = "charset"))]
811    {
812        CharsetDecoder::PassThrough(reader)
813    }
814}
815
816enum MaybeLossyDecoder<R> {
817    Lossy(LossyUtf8Reader<R>),
818    PassThrough(R),
819}
820
821impl<R: io::Read> io::Read for MaybeLossyDecoder<R> {
822    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
823        match self {
824            MaybeLossyDecoder::Lossy(r) => r.read(buf),
825            MaybeLossyDecoder::PassThrough(r) => r.read(buf),
826        }
827    }
828}
829
830impl<'a> io::Read for BodyReader<'a> {
831    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
832        self.reader.read(buf)
833    }
834}
835
836enum CharsetDecoder<R> {
837    #[cfg(feature = "charset")]
838    Decoder(charset::CharCodec<R>),
839    PassThrough(R),
840}
841
842impl<R: io::Read> io::Read for CharsetDecoder<R> {
843    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
844        match self {
845            #[cfg(feature = "charset")]
846            CharsetDecoder::Decoder(v) => v.read(buf),
847            CharsetDecoder::PassThrough(v) => v.read(buf),
848        }
849    }
850}
851
852enum ContentDecoder<R: io::Read> {
853    #[cfg(feature = "gzip")]
854    Gzip(Box<gzip::GzipDecoder<R>>),
855    #[cfg(feature = "brotli")]
856    Brotli(Box<brotli::BrotliDecoder<R>>),
857    PassThrough(R),
858}
859
860impl<R: io::Read> io::Read for ContentDecoder<R> {
861    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
862        match self {
863            #[cfg(feature = "gzip")]
864            ContentDecoder::Gzip(v) => v.read(buf),
865            #[cfg(feature = "brotli")]
866            ContentDecoder::Brotli(v) => v.read(buf),
867            ContentDecoder::PassThrough(v) => v.read(buf),
868        }
869    }
870}
871
872impl fmt::Debug for Body {
873    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
874        f.debug_struct("Body").finish()
875    }
876}
877
878impl From<&str> for ContentEncoding {
879    fn from(s: &str) -> Self {
880        match s {
881            "gzip" => ContentEncoding::Gzip,
882            "br" => ContentEncoding::Brotli,
883            _ => {
884                debug!("Unknown content-encoding: {}", s);
885                ContentEncoding::Unknown
886            }
887        }
888    }
889}
890
891impl<'a> From<&'a mut BodyDataSource> for BodySourceRef<'a> {
892    fn from(value: &'a mut BodyDataSource) -> Self {
893        match value {
894            BodyDataSource::Handler(v) => Self::HandlerShared(v),
895            BodyDataSource::Reader(v) => Self::ReaderShared(v),
896        }
897    }
898}
899
900impl From<BodyDataSource> for BodySourceRef<'static> {
901    fn from(value: BodyDataSource) -> Self {
902        match value {
903            BodyDataSource::Handler(v) => Self::HandlerOwned(v),
904            BodyDataSource::Reader(v) => Self::ReaderOwned(v),
905        }
906    }
907}
908
909pub(crate) enum BodySourceRef<'a> {
910    HandlerShared(&'a mut BodyHandler),
911    HandlerOwned(Box<BodyHandler>),
912    ReaderShared(&'a mut (dyn io::Read + Send + Sync)),
913    ReaderOwned(Box<dyn io::Read + Send + Sync>),
914}
915
916impl<'a> io::Read for BodySourceRef<'a> {
917    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
918        match self {
919            BodySourceRef::HandlerShared(v) => v.read(buf),
920            BodySourceRef::HandlerOwned(v) => v.read(buf),
921            BodySourceRef::ReaderShared(v) => v.read(buf),
922            BodySourceRef::ReaderOwned(v) => v.read(buf),
923        }
924    }
925}
926
927#[cfg(all(test, feature = "_test"))]
928mod test {
929    use crate::test::init_test_log;
930    use crate::transport::set_handler;
931    use crate::Error;
932
933    #[test]
934    fn content_type_without_charset() {
935        init_test_log();
936        set_handler("/get", 200, &[("content-type", "application/json")], b"{}");
937
938        let res = crate::get("https://my.test/get").call().unwrap();
939        assert_eq!(res.body().mime_type(), Some("application/json"));
940        assert!(res.body().charset().is_none());
941    }
942
943    #[test]
944    fn content_type_with_charset() {
945        init_test_log();
946        set_handler(
947            "/get",
948            200,
949            &[("content-type", "application/json; charset=iso-8859-4")],
950            b"{}",
951        );
952
953        let res = crate::get("https://my.test/get").call().unwrap();
954        assert_eq!(res.body().mime_type(), Some("application/json"));
955        assert_eq!(res.body().charset(), Some("iso-8859-4"));
956    }
957
958    #[test]
959    fn chunked_transfer() {
960        init_test_log();
961
962        let s = "3\r\n\
963            hel\r\n\
964            b\r\n\
965            lo world!!!\r\n\
966            0\r\n\
967            \r\n";
968
969        set_handler(
970            "/get",
971            200,
972            &[("transfer-encoding", "chunked")],
973            s.as_bytes(),
974        );
975
976        let mut res = crate::get("https://my.test/get").call().unwrap();
977        let b = res.body_mut().read_to_string().unwrap();
978        assert_eq!(b, "hello world!!!");
979    }
980
981    #[test]
982    fn large_response_header() {
983        init_test_log();
984        set_handler(
985            "/get",
986            200,
987            &[("content-type", &"b".repeat(64 * 1024))],
988            b"{}",
989        );
990
991        let err = crate::get("https://my.test/get").call().unwrap_err();
992        assert!(matches!(err, Error::LargeResponseHeader(_, _)));
993    }
994}