yore/lib.rs
1use std::borrow::Cow;
2
3use thiserror::Error;
4
5pub mod code_pages;
6pub(crate) mod decoder;
7mod encoder;
8pub(crate) use encoder::Encoder;
9
10#[derive(Error, Debug)]
11#[error("Character in UTF-8 string has no mapping defined in code page")]
12pub struct EncodeError {}
13
14pub trait CodePage: Encoder {
15 /// Encode UTF-8 string into single-byte encoding
16 ///
17 /// Undefined characters will result in [`EncodeError`]
18 ///
19 /// # Examples
20 ///
21 /// ```
22 /// use yore::{CodePage, EncodeError};
23 ///
24 /// // Erase type for example - prefer concrete type over trait object whenever possible
25 /// let cp850: &dyn CodePage = &yore::code_pages::CP850;
26 /// assert_eq!(cp850.encode("text").unwrap(), vec![116, 101, 120, 116]);
27 /// assert!(matches!(cp850.encode("text 🦀"), EncodeError));
28 /// ```
29 #[inline]
30 fn encode<'a>(&self, s: &'a str) -> Result<Cow<'a, [u8]>, EncodeError> {
31 self.encode_helper(s, None)
32 }
33
34 /// Encode UTF-8 string into single-byte encoding
35 ///
36 /// Undefined characters will be replaced with byte `fallback`
37 ///
38 /// # Examples
39 ///
40 /// ```
41 /// use yore::CodePage;
42 ///
43 /// // Erase type for example - prefer concrete type over trait object whenever possible
44 /// let cp850: &dyn CodePage = &yore::code_pages::CP850;
45 /// assert_eq!(cp850.encode_lossy("text 🦀", 168), vec![116, 101, 120, 116, 32, 168])
46 /// ```
47 #[inline]
48 fn encode_lossy<'a>(&self, s: &'a str, fallback: u8) -> Cow<'a, [u8]> {
49 self.encode_helper(s, Some(fallback)).unwrap()
50 }
51
52 /// Decode single-byte encoding into UTF-8 string
53 ///
54 /// Undefined codepoints will result in [`DecodeError`]
55 ///
56 /// # Examples
57 ///
58 /// ```
59 /// use yore::{CodePage, DecodeError};
60 ///
61 /// // Erase types for example - prefer concrete type over trait object whenever possible
62 /// let cp850: &dyn CodePage = &yore::code_pages::CP850;
63 /// let cp857: &dyn CodePage = &yore::code_pages::CP857;
64 /// assert_eq!(cp850.decode(&[116, 101, 120, 116]).unwrap(), "text");
65 ///
66 /// //codepoint 231 is undefined
67 /// assert!(matches!(cp857.decode(&[116, 101, 120, 116, 231]), Err(DecodeError{position: 4, value: 231})));
68 /// ```
69 fn decode<'a>(&self, bytes: &'a [u8]) -> Result<Cow<'a, str>, DecodeError>;
70
71 /// Decode single-byte encoding into UTF-8 string
72 ///
73 /// Undefined codepoints will be replaced with `'�'`
74 ///
75 /// # Examples
76 ///
77 /// ```
78 /// use yore::CodePage;
79 ///
80 /// // Erase type for example - prefer concrete type over trait object whenever possible
81 /// let cp857: &dyn CodePage = &yore::code_pages::CP857;
82 /// //codepoint 231 is undefined
83 /// assert_eq!(cp857.decode_lossy(&[116, 101, 120, 116, 32, 231]), "text �");
84 /// ```
85 #[inline(always)]
86 fn decode_lossy<'a>(&self, bytes: &'a [u8]) -> Cow<'a, str> {
87 self.decode(bytes).unwrap()
88 }
89
90 /// Decode single-byte encoding into UTF-8 string
91 ///
92 /// Undefined codepoints will be replaced with `fallback`
93 ///
94 /// # Examples
95 ///
96 /// ```
97 /// use yore::CodePage;
98 ///
99 /// // Erase type for example - prefer concrete type over trait object whenever possible
100 /// let cp857: &dyn CodePage = &yore::code_pages::CP857;
101 /// //codepoint 231 is undefined
102 /// assert_eq!(cp857.decode_lossy_fallback(&[116, 101, 120, 116, 32, 231], '�'), "text �");
103 /// ```
104 #[inline(always)]
105 fn decode_lossy_fallback<'a>(&self, bytes: &'a [u8], _fallback: char) -> Cow<'a, str> {
106 self.decode(bytes).unwrap()
107 }
108}
109
110#[derive(Error, Debug)]
111#[error("Undefined codepoint {value} at offset {position}")]
112pub struct DecodeError {
113 pub position: usize,
114 pub value: u8,
115}
116
117#[cfg(test)]
118mod tests {
119 use crate::code_pages::CP864;
120
121 #[test]
122 fn test_nonstandard_ascii() {
123 let bytes = [0x25, 253];
124 //CP864 has nonstandard mapping for 0x25
125 let s = "٪ﻱ";
126 assert_eq!(CP864.decode(&bytes).unwrap(), s);
127 assert_eq!(bytes, *CP864.encode(s).unwrap());
128
129 //Standard '%' should still map to 0x25
130 let s = "%ﻱ";
131 assert_eq!(bytes, *CP864.encode(s).unwrap());
132
133 let s = "AAAAAAA٪";
134 let bytes = [65, 65, 65, 65, 65, 65, 65, 0x25];
135 //Should decode to nonstandard, even if whole usize-len is ascii
136 assert_eq!(CP864.decode(&bytes).unwrap(), s);
137 }
138}