ureq/body/mod.rs
1use std::fmt;
2use std::io;
3use std::sync::Arc;
4
5pub use build::BodyBuilder;
6use ureq_proto::http::header;
7use ureq_proto::BodyMode;
8
9use crate::http;
10use crate::run::BodyHandler;
11use crate::Error;
12
13use self::limit::LimitReader;
14use self::lossy::LossyUtf8Reader;
15
16mod build;
17mod limit;
18mod lossy;
19
20#[cfg(feature = "charset")]
21mod charset;
22
23#[cfg(feature = "gzip")]
24mod gzip;
25
26#[cfg(feature = "brotli")]
27mod brotli;
28
29/// Default max body size for read_to_string() and read_to_vec().
30const MAX_BODY_SIZE: u64 = 10 * 1024 * 1024;
31
32/// A response body returned as [`http::Response<Body>`].
33///
34/// # Default size limit
35///
36/// Methods like `read_to_string()`, `read_to_vec()`, and `read_json()` have a **default 10MB limit**
37/// to prevent memory exhaustion. To download larger files, use `with_config().limit(new_size)`:
38///
39/// ```
40/// // Download a 20MB file
41/// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
42/// .call()?
43/// .body_mut()
44/// .with_config()
45/// .limit(20 * 1024 * 1024) // 20MB
46/// .read_to_vec()?;
47/// # Ok::<_, ureq::Error>(())
48/// ```
49///
50/// # Body lengths
51///
52/// HTTP/1.1 has two major modes of transfering body data. Either a `Content-Length`
53/// header defines exactly how many bytes to transfer, or `Transfer-Encoding: chunked`
54/// facilitates a streaming style when the size is not known up front.
55///
56/// To protect against a problem called [request smuggling], ureq has heuristics for
57/// how to interpret a server sending both `Transfer-Encoding` and `Content-Length` headers.
58///
59/// 1. `chunked` takes precedence if there both headers are present (not for HTTP/1.0)
60/// 2. `content-length` is used if there is no chunked
61/// 3. If there are no headers, fall back on "close delimited" meaning the socket
62/// must close to end the body
63///
64/// When a `Content-Length` header is used, ureq will ensure the received body is _EXACTLY_
65/// as many bytes as declared (it cannot be less). This mechanic is in `ureq-proto`
66/// and is different to the [`BodyWithConfig::limit()`] below.
67///
68/// # Pool reuse
69///
70/// To return a connection (aka [`Transport`][crate::unversioned::transport::Transport])
71/// to the Agent's pool, the body must be read to end. If [`BodyWithConfig::limit()`] is set
72/// shorter size than the actual response body, the connection will not be reused.
73///
74/// # Example
75///
76/// ```
77/// use std::io::Read;
78/// let mut res = ureq::get("http://httpbin.org/bytes/100")
79/// .call()?;
80///
81/// assert!(res.headers().contains_key("Content-Length"));
82/// let len: usize = res.headers().get("Content-Length")
83/// .unwrap().to_str().unwrap().parse().unwrap();
84///
85/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
86/// res.body_mut().as_reader()
87/// .read_to_end(&mut bytes)?;
88///
89/// assert_eq!(bytes.len(), len);
90/// # Ok::<_, ureq::Error>(())
91/// ```
92///
93/// [request smuggling]: https://en.wikipedia.org/wiki/HTTP_request_smuggling
94pub struct Body {
95 source: BodyDataSource,
96 info: Arc<ResponseInfo>,
97}
98
99enum BodyDataSource {
100 Handler(Box<BodyHandler>),
101 Reader(Box<dyn io::Read + Send + Sync>),
102}
103
104#[derive(Clone)]
105pub(crate) struct ResponseInfo {
106 content_encoding: ContentEncoding,
107 mime_type: Option<String>,
108 charset: Option<String>,
109 body_mode: BodyMode,
110}
111
112impl Body {
113 /// Builder for creating a body
114 ///
115 /// This is useful for testing, or for [`Middleware`][crate::middleware::Middleware] that
116 /// returns another body than the requested one.
117 pub fn builder() -> BodyBuilder {
118 BodyBuilder::new()
119 }
120
121 pub(crate) fn new(handler: BodyHandler, info: ResponseInfo) -> Self {
122 Body {
123 source: BodyDataSource::Handler(Box::new(handler)),
124 info: Arc::new(info),
125 }
126 }
127
128 /// The mime-type of the `content-type` header.
129 ///
130 /// For the below header, we would get `Some("text/plain")`:
131 ///
132 /// ```text
133 /// Content-Type: text/plain; charset=iso-8859-1
134 /// ```
135 ///
136 /// *Caution:* A bad server might set `Content-Type` to one thing and send
137 /// something else. There is no way ureq can verify this.
138 ///
139 /// # Example
140 ///
141 /// ```
142 /// let res = ureq::get("https://www.google.com/")
143 /// .call()?;
144 ///
145 /// assert_eq!(res.body().mime_type(), Some("text/html"));
146 /// # Ok::<_, ureq::Error>(())
147 /// ```
148 pub fn mime_type(&self) -> Option<&str> {
149 self.info.mime_type.as_deref()
150 }
151
152 /// The charset of the `content-type` header.
153 ///
154 /// For the below header, we would get `Some("iso-8859-1")`:
155 ///
156 /// ```text
157 /// Content-Type: text/plain; charset=iso-8859-1
158 /// ```
159 ///
160 /// *Caution:* A bad server might set `Content-Type` to one thing and send
161 /// something else. There is no way ureq can verify this.
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// let res = ureq::get("https://www.google.com/")
167 /// .call()?;
168 ///
169 /// assert_eq!(res.body().charset(), Some("ISO-8859-1"));
170 /// # Ok::<_, ureq::Error>(())
171 /// ```
172 pub fn charset(&self) -> Option<&str> {
173 self.info.charset.as_deref()
174 }
175
176 /// The content length of the body.
177 ///
178 /// This is the value of the `Content-Length` header, if there is one. For chunked
179 /// responses (`Transfer-Encoding: chunked`) , this will be `None`. Similarly for
180 /// HTTP/1.0 without a `Content-Length` header, the response is close delimited,
181 /// which means the length is unknown.
182 ///
183 /// A bad server might set `Content-Length` to one thing and send something else.
184 /// ureq will double check this, see section on body length heuristics.
185 ///
186 /// # Example
187 ///
188 /// ```
189 /// let res = ureq::get("https://httpbin.org/bytes/100")
190 /// .call()?;
191 ///
192 /// assert_eq!(res.body().content_length(), Some(100));
193 /// # Ok::<_, ureq::Error>(())
194 /// ```
195 pub fn content_length(&self) -> Option<u64> {
196 match self.info.body_mode {
197 BodyMode::NoBody => None,
198 BodyMode::LengthDelimited(v) => Some(v),
199 BodyMode::Chunked => None,
200 BodyMode::CloseDelimited => None,
201 }
202 }
203
204 /// Handle this body as a shared `impl Read` of the body.
205 ///
206 /// This is the regular API which goes via [`http::Response::body_mut()`] to get a
207 /// mut reference to the `Body`, and then use `as_reader()`. It is also possible to
208 /// get a non-shared, owned reader via [`Body::into_reader()`].
209 ///
210 /// * Reader is not limited by default. That means a malicious server could
211 /// exhaust all avaliable memory on your client machine.
212 /// To set a limit use [`Body::into_with_config()`].
213 /// * Reader will error if `Content-Length` is set, but the connection is closed
214 /// before all bytes are received.
215 ///
216 /// # Example
217 ///
218 /// ```
219 /// use std::io::Read;
220 ///
221 /// let mut res = ureq::get("http://httpbin.org/bytes/100")
222 /// .call()?;
223 ///
224 /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
225 /// res.body_mut().as_reader()
226 /// .read_to_end(&mut bytes)?;
227 /// # Ok::<_, ureq::Error>(())
228 /// ```
229 pub fn as_reader(&mut self) -> BodyReader {
230 self.with_config().reader()
231 }
232
233 /// Turn this response into an owned `impl Read` of the body.
234 ///
235 /// Sometimes it might be useful to disconnect the body reader from the body.
236 /// The reader returned by [`Body::as_reader()`] borrows the body, while this
237 /// variant consumes the body and turns it into a reader with lifetime `'static`.
238 /// The reader can for instance be sent to another thread.
239 ///
240 /// * Reader is not limited by default. That means a malicious server could
241 /// exhaust all avaliable memory on your client machine.
242 /// To set a limit use [`Body::into_with_config()`].
243 /// * Reader will error if `Content-Length` is set, but the connection is closed
244 /// before all bytes are received.
245 ///
246 /// ```
247 /// use std::io::Read;
248 ///
249 /// let res = ureq::get("http://httpbin.org/bytes/100")
250 /// .call()?;
251 ///
252 /// let (_, body) = res.into_parts();
253 ///
254 /// let mut bytes: Vec<u8> = Vec::with_capacity(1000);
255 /// body.into_reader()
256 /// .read_to_end(&mut bytes)?;
257 /// # Ok::<_, ureq::Error>(())
258 /// ```
259 pub fn into_reader(self) -> BodyReader<'static> {
260 self.into_with_config().reader()
261 }
262
263 /// Read the response as a string.
264 ///
265 /// * Response is limited to 10MB
266 /// * Replaces incorrect utf-8 chars to `?`
267 ///
268 /// To change these defaults use [`Body::with_config()`].
269 ///
270 /// ```
271 /// let mut res = ureq::get("http://httpbin.org/robots.txt")
272 /// .call()?;
273 ///
274 /// let s = res.body_mut().read_to_string()?;
275 /// assert_eq!(s, "User-agent: *\nDisallow: /deny\n");
276 /// # Ok::<_, ureq::Error>(())
277 /// ```
278 ///
279 /// For larger text files, you must explicitly increase the limit:
280 ///
281 /// ```
282 /// // Read a large text file (25MB)
283 /// let text = ureq::get("http://httpbin.org/get")
284 /// .call()?
285 /// .body_mut()
286 /// .with_config()
287 /// .limit(25 * 1024 * 1024) // 25MB
288 /// .read_to_string()?;
289 /// # Ok::<_, ureq::Error>(())
290 /// ```
291 pub fn read_to_string(&mut self) -> Result<String, Error> {
292 self.with_config()
293 .limit(MAX_BODY_SIZE)
294 .lossy_utf8(true)
295 .read_to_string()
296 }
297
298 /// Read the response to a vec.
299 ///
300 /// * Response is limited to 10MB.
301 ///
302 /// To change this default use [`Body::with_config()`].
303 /// ```
304 /// let mut res = ureq::get("http://httpbin.org/bytes/100")
305 /// .call()?;
306 ///
307 /// let bytes = res.body_mut().read_to_vec()?;
308 /// assert_eq!(bytes.len(), 100);
309 /// # Ok::<_, ureq::Error>(())
310 /// ```
311 ///
312 /// For larger files, you must explicitly increase the limit:
313 ///
314 /// ```
315 /// // Download a larger file (50MB)
316 /// let bytes = ureq::get("http://httpbin.org/bytes/200000000")
317 /// .call()?
318 /// .body_mut()
319 /// .with_config()
320 /// .limit(50 * 1024 * 1024) // 50MB
321 /// .read_to_vec()?;
322 /// # Ok::<_, ureq::Error>(())
323 /// ```
324 pub fn read_to_vec(&mut self) -> Result<Vec<u8>, Error> {
325 self.with_config()
326 //
327 .limit(MAX_BODY_SIZE)
328 .read_to_vec()
329 }
330
331 /// Read the response from JSON.
332 ///
333 /// * Response is limited to 10MB.
334 ///
335 /// To change this default use [`Body::with_config()`].
336 ///
337 /// The returned value is something that derives [`Deserialize`](serde::Deserialize).
338 /// You might need to be explicit with which type you want. See example below.
339 ///
340 /// ```
341 /// use serde::Deserialize;
342 ///
343 /// #[derive(Deserialize)]
344 /// struct BodyType {
345 /// slideshow: BodyTypeInner,
346 /// }
347 ///
348 /// #[derive(Deserialize)]
349 /// struct BodyTypeInner {
350 /// author: String,
351 /// }
352 ///
353 /// let body = ureq::get("https://httpbin.org/json")
354 /// .call()?
355 /// .body_mut()
356 /// .read_json::<BodyType>()?;
357 ///
358 /// assert_eq!(body.slideshow.author, "Yours Truly");
359 /// # Ok::<_, ureq::Error>(())
360 /// ```
361 ///
362 /// For larger JSON files, you must explicitly increase the limit:
363 ///
364 /// ```
365 /// use serde_json::Value;
366 ///
367 /// // Parse a large JSON file (30MB)
368 /// let json: Value = ureq::get("https://httpbin.org/json")
369 /// .call()?
370 /// .body_mut()
371 /// .with_config()
372 /// .limit(30 * 1024 * 1024) // 30MB
373 /// .read_json()?;
374 /// # Ok::<_, ureq::Error>(())
375 /// ```
376 #[cfg(feature = "json")]
377 pub fn read_json<T: serde::de::DeserializeOwned>(&mut self) -> Result<T, Error> {
378 let reader = self.with_config().limit(MAX_BODY_SIZE).reader();
379 let value: T = serde_json::from_reader(reader)?;
380 Ok(value)
381 }
382
383 /// Read the body data with configuration.
384 ///
385 /// This borrows the body which gives easier use with [`http::Response::body_mut()`].
386 /// To get a non-borrowed reader use [`Body::into_with_config()`].
387 ///
388 /// # Example
389 ///
390 /// ```
391 /// let reader = ureq::get("http://httpbin.org/bytes/100")
392 /// .call()?
393 /// .body_mut()
394 /// .with_config()
395 /// // Reader will only read 50 bytes
396 /// .limit(50)
397 /// .reader();
398 /// # Ok::<_, ureq::Error>(())
399 /// ```
400 pub fn with_config(&mut self) -> BodyWithConfig {
401 let handler = (&mut self.source).into();
402 BodyWithConfig::new(handler, self.info.clone())
403 }
404
405 /// Consume self and read the body with configuration.
406 ///
407 /// This consumes self and returns a reader with `'static` lifetime.
408 ///
409 /// # Example
410 ///
411 /// ```
412 /// // Get the body out of http::Response
413 /// let (_, body) = ureq::get("http://httpbin.org/bytes/100")
414 /// .call()?
415 /// .into_parts();
416 ///
417 /// let reader = body
418 /// .into_with_config()
419 /// // Reader will only read 50 bytes
420 /// .limit(50)
421 /// .reader();
422 /// # Ok::<_, ureq::Error>(())
423 /// ```
424 ///
425 /// This limit behavior can be used to prevent a malicious server from exhausting
426 /// memory on the client machine. For example, if the machine running
427 /// ureq has 1GB of RAM, you could protect the machine by setting a smaller
428 /// limit such as 128MB. The exact number will vary by your client's download
429 /// needs, available system resources, and system utilization.
430 pub fn into_with_config(self) -> BodyWithConfig<'static> {
431 let handler = self.source.into();
432 BodyWithConfig::new(handler, self.info)
433 }
434}
435
436/// Configuration of how to read the body.
437///
438/// Obtained via one of:
439///
440/// * [Body::with_config()]
441/// * [Body::into_with_config()]
442///
443/// # Handling large responses
444///
445/// The `BodyWithConfig` is the primary way to increase the default 10MB size limit
446/// when downloading large files to memory:
447///
448/// ```
449/// // Download a 50MB file
450/// let large_data = ureq::get("http://httpbin.org/bytes/200000000")
451/// .call()?
452/// .body_mut()
453/// .with_config()
454/// .limit(50 * 1024 * 1024) // 50MB
455/// .read_to_vec()?;
456/// # Ok::<_, ureq::Error>(())
457/// ```
458pub struct BodyWithConfig<'a> {
459 handler: BodySourceRef<'a>,
460 info: Arc<ResponseInfo>,
461 limit: u64,
462 lossy_utf8: bool,
463}
464
465impl<'a> BodyWithConfig<'a> {
466 fn new(handler: BodySourceRef<'a>, info: Arc<ResponseInfo>) -> Self {
467 BodyWithConfig {
468 handler,
469 info,
470 limit: u64::MAX,
471 lossy_utf8: false,
472 }
473 }
474
475 /// Limit the response body.
476 ///
477 /// Controls how many bytes we should read before throwing an error. This is used
478 /// to ensure RAM isn't exhausted by a server sending a very large response body.
479 ///
480 /// The default limit is `u64::MAX` (unlimited).
481 pub fn limit(mut self, value: u64) -> Self {
482 self.limit = value;
483 self
484 }
485
486 /// Replace invalid utf-8 chars.
487 ///
488 /// `true` means that broken utf-8 characters are replaced by a question mark `?`
489 /// (not utf-8 replacement char). This happens after charset conversion regardless of
490 /// whether the **charset** feature is enabled or not.
491 ///
492 /// The default is `false`.
493 pub fn lossy_utf8(mut self, value: bool) -> Self {
494 self.lossy_utf8 = value;
495 self
496 }
497
498 fn do_build(self) -> BodyReader<'a> {
499 BodyReader::new(
500 LimitReader::new(self.handler, self.limit),
501 &self.info,
502 self.info.body_mode,
503 self.lossy_utf8,
504 )
505 }
506
507 /// Creates a reader.
508 ///
509 /// The reader is either shared or owned, depending on `with_config` or `into_with_config`.
510 ///
511 /// # Example of owned vs shared
512 ///
513 /// ```
514 /// // Creates an owned reader.
515 /// let reader = ureq::get("https://httpbin.org/get")
516 /// .call()?
517 /// .into_body()
518 /// // takes ownership of Body
519 /// .into_with_config()
520 /// .limit(10)
521 /// .reader();
522 /// # Ok::<_, ureq::Error>(())
523 /// ```
524 ///
525 /// ```
526 /// // Creates a shared reader.
527 /// let reader = ureq::get("https://httpbin.org/get")
528 /// .call()?
529 /// .body_mut()
530 /// // borrows Body
531 /// .with_config()
532 /// .limit(10)
533 /// .reader();
534 /// # Ok::<_, ureq::Error>(())
535 /// ```
536 pub fn reader(self) -> BodyReader<'a> {
537 self.do_build()
538 }
539
540 /// Read into string.
541 ///
542 /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
543 /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
544 ///
545 /// # Example
546 ///
547 /// ```
548 /// // Reads max 10k to a String.
549 /// let string = ureq::get("https://httpbin.org/get")
550 /// .call()?
551 /// .body_mut()
552 /// .with_config()
553 /// // Important. Limits body to 10k
554 /// .limit(10_000)
555 /// .read_to_string()?;
556 /// # Ok::<_, ureq::Error>(())
557 /// ```
558 pub fn read_to_string(self) -> Result<String, Error> {
559 use std::io::Read;
560 let mut reader = self.do_build();
561 let mut buf = String::new();
562 reader.read_to_string(&mut buf)?;
563 Ok(buf)
564 }
565
566 /// Read into vector.
567 ///
568 /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
569 /// becomes an unbounded sized `Vec`. A bad server could exhaust your memory.
570 ///
571 /// # Example
572 ///
573 /// ```
574 /// // Reads max 10k to a Vec.
575 /// let myvec = ureq::get("https://httpbin.org/get")
576 /// .call()?
577 /// .body_mut()
578 /// .with_config()
579 /// // Important. Limits body to 10k
580 /// .limit(10_000)
581 /// .read_to_vec()?;
582 /// # Ok::<_, ureq::Error>(())
583 /// ```
584 pub fn read_to_vec(self) -> Result<Vec<u8>, Error> {
585 use std::io::Read;
586 let mut reader = self.do_build();
587 let mut buf = Vec::new();
588 reader.read_to_end(&mut buf)?;
589 Ok(buf)
590 }
591
592 /// Read JSON body.
593 ///
594 /// *Caution:* without a preceeding [`limit()`][BodyWithConfig::limit], this
595 /// becomes an unbounded sized `String`. A bad server could exhaust your memory.
596 ///
597 /// # Example
598 ///
599 /// ```
600 /// use serde_json::Value;
601 ///
602 /// // Reads max 10k as a JSON value.
603 /// let json: Value = ureq::get("https://httpbin.org/get")
604 /// .call()?
605 /// .body_mut()
606 /// .with_config()
607 /// // Important. Limits body to 10k
608 /// .limit(10_000)
609 /// .read_json()?;
610 /// # Ok::<_, ureq::Error>(())
611 /// ```
612 #[cfg(feature = "json")]
613 pub fn read_json<T: serde::de::DeserializeOwned>(self) -> Result<T, Error> {
614 let reader = self.do_build();
615 let value: T = serde_json::from_reader(reader)?;
616 Ok(value)
617 }
618}
619
620#[derive(Debug, Clone, Copy)]
621enum ContentEncoding {
622 None,
623 Gzip,
624 Brotli,
625 Unknown,
626}
627
628impl ResponseInfo {
629 pub fn new(headers: &http::HeaderMap, body_mode: BodyMode) -> Self {
630 let content_encoding = headers
631 .get(header::CONTENT_ENCODING)
632 .and_then(|v| v.to_str().ok())
633 .map(ContentEncoding::from)
634 .unwrap_or(ContentEncoding::None);
635
636 let (mime_type, charset) = headers
637 .get(header::CONTENT_TYPE)
638 .and_then(|v| v.to_str().ok())
639 .map(split_content_type)
640 .unwrap_or((None, None));
641
642 ResponseInfo {
643 content_encoding,
644 mime_type,
645 charset,
646 body_mode,
647 }
648 }
649
650 /// Whether the mime type indicats text.
651 fn is_text(&self) -> bool {
652 self.mime_type
653 .as_deref()
654 .map(|s| s.starts_with("text/"))
655 .unwrap_or(false)
656 }
657}
658
659fn split_content_type(content_type: &str) -> (Option<String>, Option<String>) {
660 // Content-Type: text/plain; charset=iso-8859-1
661 let mut split = content_type.split(';');
662
663 let Some(mime_type) = split.next() else {
664 return (None, None);
665 };
666
667 let mut charset = None;
668
669 for maybe_charset in split {
670 let maybe_charset = maybe_charset.trim();
671 if let Some(s) = maybe_charset.strip_prefix("charset=") {
672 charset = Some(s.to_string());
673 }
674 }
675
676 (Some(mime_type.to_string()), charset)
677}
678
679/// A reader of the response data.
680///
681/// 1. If `Transfer-Encoding: chunked`, the returned reader will unchunk it
682/// and any `Content-Length` header is ignored.
683/// 2. If `Content-Encoding: gzip` (or `br`) and the corresponding feature
684/// flag is enabled (**gzip** and **brotli**), decompresses the body data.
685/// 3. Given a header like `Content-Type: text/plain; charset=ISO-8859-1`
686/// and the **charset** feature enabled, will translate the body to utf-8.
687/// This mechanic need two components a mime-type starting `text/` and
688/// a non-utf8 charset indication.
689/// 4. If `Content-Length` is set, the returned reader is limited to this byte
690/// length regardless of how many bytes the server sends.
691/// 5. If no length header, the reader is until server stream end.
692/// 6. The limit in the body method used to obtain the reader.
693///
694/// Note: The reader is also limited by the [`Body::as_reader`] and
695/// [`Body::into_reader`] calls. If that limit is set very high, a malicious
696/// server might return enough bytes to exhaust available memory. If you're
697/// making requests to untrusted servers, you should use set that
698/// limit accordingly.
699///
700/// # Example
701///
702/// ```
703/// use std::io::Read;
704/// let mut res = ureq::get("http://httpbin.org/bytes/100")
705/// .call()?;
706///
707/// assert!(res.headers().contains_key("Content-Length"));
708/// let len: usize = res.headers().get("Content-Length")
709/// .unwrap().to_str().unwrap().parse().unwrap();
710///
711/// let mut bytes: Vec<u8> = Vec::with_capacity(len);
712/// res.body_mut().as_reader()
713/// .read_to_end(&mut bytes)?;
714///
715/// assert_eq!(bytes.len(), len);
716/// # Ok::<_, ureq::Error>(())
717/// ```
718pub struct BodyReader<'a> {
719 reader: MaybeLossyDecoder<CharsetDecoder<ContentDecoder<LimitReader<BodySourceRef<'a>>>>>,
720 // If this reader is used as SendBody for another request, this
721 // body mode can indiciate the content-length. Gzip, charset etc
722 // would mean input is not same as output.
723 outgoing_body_mode: BodyMode,
724}
725
726impl<'a> BodyReader<'a> {
727 fn new(
728 reader: LimitReader<BodySourceRef<'a>>,
729 info: &ResponseInfo,
730 incoming_body_mode: BodyMode,
731 lossy_utf8: bool,
732 ) -> BodyReader<'a> {
733 // This is outgoing body_mode in case we are using the BodyReader as a send body
734 // in a proxy situation.
735 let mut outgoing_body_mode = incoming_body_mode;
736
737 let reader = match info.content_encoding {
738 ContentEncoding::None | ContentEncoding::Unknown => ContentDecoder::PassThrough(reader),
739 #[cfg(feature = "gzip")]
740 ContentEncoding::Gzip => {
741 debug!("Decoding gzip");
742 outgoing_body_mode = BodyMode::Chunked;
743 ContentDecoder::Gzip(Box::new(gzip::GzipDecoder::new(reader)))
744 }
745 #[cfg(not(feature = "gzip"))]
746 ContentEncoding::Gzip => ContentDecoder::PassThrough(reader),
747 #[cfg(feature = "brotli")]
748 ContentEncoding::Brotli => {
749 debug!("Decoding brotli");
750 outgoing_body_mode = BodyMode::Chunked;
751 ContentDecoder::Brotli(Box::new(brotli::BrotliDecoder::new(reader)))
752 }
753 #[cfg(not(feature = "brotli"))]
754 ContentEncoding::Brotli => ContentDecoder::PassThrough(reader),
755 };
756
757 let reader = if info.is_text() {
758 charset_decoder(
759 reader,
760 info.mime_type.as_deref(),
761 info.charset.as_deref(),
762 &mut outgoing_body_mode,
763 )
764 } else {
765 CharsetDecoder::PassThrough(reader)
766 };
767
768 let reader = if info.is_text() && lossy_utf8 {
769 MaybeLossyDecoder::Lossy(LossyUtf8Reader::new(reader))
770 } else {
771 MaybeLossyDecoder::PassThrough(reader)
772 };
773
774 BodyReader {
775 outgoing_body_mode,
776 reader,
777 }
778 }
779
780 pub(crate) fn body_mode(&self) -> BodyMode {
781 self.outgoing_body_mode
782 }
783}
784
785#[allow(unused)]
786fn charset_decoder<R: io::Read>(
787 reader: R,
788 mime_type: Option<&str>,
789 charset: Option<&str>,
790 body_mode: &mut BodyMode,
791) -> CharsetDecoder<R> {
792 #[cfg(feature = "charset")]
793 {
794 use encoding_rs::{Encoding, UTF_8};
795
796 let from = charset
797 .and_then(|c| Encoding::for_label(c.as_bytes()))
798 .unwrap_or(UTF_8);
799
800 if from == UTF_8 {
801 // Do nothing
802 CharsetDecoder::PassThrough(reader)
803 } else {
804 debug!("Decoding charset {}", from.name());
805 *body_mode = BodyMode::Chunked;
806 CharsetDecoder::Decoder(self::charset::CharCodec::new(reader, from, UTF_8))
807 }
808 }
809
810 #[cfg(not(feature = "charset"))]
811 {
812 CharsetDecoder::PassThrough(reader)
813 }
814}
815
816enum MaybeLossyDecoder<R> {
817 Lossy(LossyUtf8Reader<R>),
818 PassThrough(R),
819}
820
821impl<R: io::Read> io::Read for MaybeLossyDecoder<R> {
822 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
823 match self {
824 MaybeLossyDecoder::Lossy(r) => r.read(buf),
825 MaybeLossyDecoder::PassThrough(r) => r.read(buf),
826 }
827 }
828}
829
830impl<'a> io::Read for BodyReader<'a> {
831 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
832 self.reader.read(buf)
833 }
834}
835
836enum CharsetDecoder<R> {
837 #[cfg(feature = "charset")]
838 Decoder(charset::CharCodec<R>),
839 PassThrough(R),
840}
841
842impl<R: io::Read> io::Read for CharsetDecoder<R> {
843 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
844 match self {
845 #[cfg(feature = "charset")]
846 CharsetDecoder::Decoder(v) => v.read(buf),
847 CharsetDecoder::PassThrough(v) => v.read(buf),
848 }
849 }
850}
851
852enum ContentDecoder<R: io::Read> {
853 #[cfg(feature = "gzip")]
854 Gzip(Box<gzip::GzipDecoder<R>>),
855 #[cfg(feature = "brotli")]
856 Brotli(Box<brotli::BrotliDecoder<R>>),
857 PassThrough(R),
858}
859
860impl<R: io::Read> io::Read for ContentDecoder<R> {
861 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
862 match self {
863 #[cfg(feature = "gzip")]
864 ContentDecoder::Gzip(v) => v.read(buf),
865 #[cfg(feature = "brotli")]
866 ContentDecoder::Brotli(v) => v.read(buf),
867 ContentDecoder::PassThrough(v) => v.read(buf),
868 }
869 }
870}
871
872impl fmt::Debug for Body {
873 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
874 f.debug_struct("Body").finish()
875 }
876}
877
878impl From<&str> for ContentEncoding {
879 fn from(s: &str) -> Self {
880 match s {
881 "gzip" => ContentEncoding::Gzip,
882 "br" => ContentEncoding::Brotli,
883 _ => {
884 debug!("Unknown content-encoding: {}", s);
885 ContentEncoding::Unknown
886 }
887 }
888 }
889}
890
891impl<'a> From<&'a mut BodyDataSource> for BodySourceRef<'a> {
892 fn from(value: &'a mut BodyDataSource) -> Self {
893 match value {
894 BodyDataSource::Handler(v) => Self::HandlerShared(v),
895 BodyDataSource::Reader(v) => Self::ReaderShared(v),
896 }
897 }
898}
899
900impl From<BodyDataSource> for BodySourceRef<'static> {
901 fn from(value: BodyDataSource) -> Self {
902 match value {
903 BodyDataSource::Handler(v) => Self::HandlerOwned(v),
904 BodyDataSource::Reader(v) => Self::ReaderOwned(v),
905 }
906 }
907}
908
909pub(crate) enum BodySourceRef<'a> {
910 HandlerShared(&'a mut BodyHandler),
911 HandlerOwned(Box<BodyHandler>),
912 ReaderShared(&'a mut (dyn io::Read + Send + Sync)),
913 ReaderOwned(Box<dyn io::Read + Send + Sync>),
914}
915
916impl<'a> io::Read for BodySourceRef<'a> {
917 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
918 match self {
919 BodySourceRef::HandlerShared(v) => v.read(buf),
920 BodySourceRef::HandlerOwned(v) => v.read(buf),
921 BodySourceRef::ReaderShared(v) => v.read(buf),
922 BodySourceRef::ReaderOwned(v) => v.read(buf),
923 }
924 }
925}
926
927#[cfg(all(test, feature = "_test"))]
928mod test {
929 use crate::test::init_test_log;
930 use crate::transport::set_handler;
931 use crate::Error;
932
933 #[test]
934 fn content_type_without_charset() {
935 init_test_log();
936 set_handler("/get", 200, &[("content-type", "application/json")], b"{}");
937
938 let res = crate::get("https://my.test/get").call().unwrap();
939 assert_eq!(res.body().mime_type(), Some("application/json"));
940 assert!(res.body().charset().is_none());
941 }
942
943 #[test]
944 fn content_type_with_charset() {
945 init_test_log();
946 set_handler(
947 "/get",
948 200,
949 &[("content-type", "application/json; charset=iso-8859-4")],
950 b"{}",
951 );
952
953 let res = crate::get("https://my.test/get").call().unwrap();
954 assert_eq!(res.body().mime_type(), Some("application/json"));
955 assert_eq!(res.body().charset(), Some("iso-8859-4"));
956 }
957
958 #[test]
959 fn chunked_transfer() {
960 init_test_log();
961
962 let s = "3\r\n\
963 hel\r\n\
964 b\r\n\
965 lo world!!!\r\n\
966 0\r\n\
967 \r\n";
968
969 set_handler(
970 "/get",
971 200,
972 &[("transfer-encoding", "chunked")],
973 s.as_bytes(),
974 );
975
976 let mut res = crate::get("https://my.test/get").call().unwrap();
977 let b = res.body_mut().read_to_string().unwrap();
978 assert_eq!(b, "hello world!!!");
979 }
980
981 #[test]
982 fn large_response_header() {
983 init_test_log();
984 set_handler(
985 "/get",
986 200,
987 &[("content-type", &"b".repeat(64 * 1024))],
988 b"{}",
989 );
990
991 let err = crate::get("https://my.test/get").call().unwrap_err();
992 assert!(matches!(err, Error::LargeResponseHeader(_, _)));
993 }
994}