use std::borrow::Cow;
#[unstable]
pub struct CodecError {
pub upto: isize,
pub cause: Cow<'static, str>,
}
#[unstable]
pub trait ByteWriter {
fn writer_hint(&mut self, _expectedlen: usize) {}
fn write_byte(&mut self, b: u8);
fn write_bytes(&mut self, v: &[u8]);
}
impl ByteWriter for Vec<u8> {
fn writer_hint(&mut self, expectedlen: usize) {
self.reserve(expectedlen);
}
fn write_byte(&mut self, b: u8) {
self.push(b);
}
fn write_bytes(&mut self, v: &[u8]) {
self.extend(v.iter().cloned());
}
}
#[unstable]
pub trait StringWriter {
fn writer_hint(&mut self, _expectedlen: usize) {}
fn write_char(&mut self, c: char);
fn write_str(&mut self, s: &str);
}
impl StringWriter for String {
fn writer_hint(&mut self, expectedlen: usize) {
let newlen = self.len() + expectedlen;
self.reserve(newlen);
}
fn write_char(&mut self, c: char) {
self.push(c);
}
fn write_str(&mut self, s: &str) {
self.push_str(s);
}
}
#[unstable]
pub trait RawEncoder: 'static {
fn from_self(&self) -> Box<RawEncoder>;
fn is_ascii_compatible(&self) -> bool { false }
fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>);
fn raw_finish(&mut self, output: &mut ByteWriter) -> Option<CodecError>;
#[cfg(test)]
fn test_norm_input<'r>(&self, input: &'r str) -> &'r str { input }
#[cfg(test)]
fn test_norm_output<'r>(&self, output: &'r [u8]) -> &'r [u8] { output }
#[cfg(test)]
fn test_feed(&mut self, input: &str) -> (usize, Option<CodecError>, Vec<u8>) {
let mut buf = Vec::new();
let (nprocessed, err) = self.raw_feed(input, &mut buf);
(nprocessed, err, buf)
}
#[cfg(test)]
fn test_finish(&mut self) -> (Option<CodecError>, Vec<u8>) {
let mut buf = Vec::new();
let err = self.raw_finish(&mut buf);
(err, buf)
}
#[cfg(test)]
fn test_concat(&self, a: &str, b: &str) -> String {
let mut s = a.to_string();
s.push_str(b);
s
}
}
#[unstable]
pub trait RawDecoder: 'static {
fn from_self(&self) -> Box<RawDecoder>;
fn is_ascii_compatible(&self) -> bool { false }
fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>);
fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError>;
#[cfg(test)]
fn test_norm_input<'r>(&self, input: &'r [u8]) -> &'r [u8] { input }
#[cfg(test)]
fn test_norm_output<'r>(&self, output: &'r str) -> &'r str { output }
#[cfg(test)]
fn test_feed(&mut self, input: &[u8]) -> (usize, Option<CodecError>, String) {
let mut buf = String::new();
let (nprocessed, err) = self.raw_feed(input, &mut buf);
(nprocessed, err, buf)
}
#[cfg(test)]
fn test_finish(&mut self) -> (Option<CodecError>, String) {
let mut buf = String::new();
let err = self.raw_finish(&mut buf);
(err, buf)
}
#[cfg(test)]
fn test_concat(&self, a: &[u8], b: &[u8]) -> Vec<u8> {
let mut v = Vec::with_capacity(a.len() + b.len());
v.push_all(a);
v.push_all(b);
v
}
}
#[stable]
pub type EncodingRef = &'static (Encoding + Send + Sync);
#[stable]
pub trait Encoding {
#[stable]
fn name(&self) -> &'static str;
#[unstable]
fn whatwg_name(&self) -> Option<&'static str> { None }
#[unstable]
fn raw_encoder(&self) -> Box<RawEncoder>;
#[unstable]
fn raw_decoder(&self) -> Box<RawDecoder>;
#[stable]
fn encode(&self, input: &str, trap: EncoderTrap) -> Result<Vec<u8>, Cow<'static, str>> {
let mut encoder = self.raw_encoder();
let mut remaining = 0;
let mut ret = Vec::new();
loop {
let (offset, err) = encoder.raw_feed(&input[remaining..], &mut ret);
let unprocessed = remaining + offset;
match err {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *encoder, &input[unprocessed..remaining], &mut ret) {
return Err(err.cause);
}
}
None => {
remaining = input.len();
match encoder.raw_finish(&mut ret) {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *encoder, &input[unprocessed..remaining], &mut ret) {
return Err(err.cause);
}
}
None => {}
}
if remaining >= input.len() { return Ok(ret); }
}
}
}
}
#[stable]
fn decode(&self, input: &[u8], trap: DecoderTrap) -> Result<String, Cow<'static, str>> {
let mut decoder = self.raw_decoder();
let mut remaining = 0;
let mut ret = String::new();
loop {
let (offset, err) = decoder.raw_feed(&input[remaining..], &mut ret);
let unprocessed = remaining + offset;
match err {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *decoder, &input[unprocessed..remaining], &mut ret) {
return Err(err.cause);
}
}
None => {
remaining = input.len();
match decoder.raw_finish(&mut ret) {
Some(err) => {
remaining = (remaining as isize + err.upto) as usize;
if !trap.trap(&mut *decoder, &input[unprocessed..remaining], &mut ret) {
return Err(err.cause);
}
}
None => {}
}
if remaining >= input.len() { return Ok(ret); }
}
}
}
}
}
#[unstable]
pub type EncoderTrapFunc =
extern "Rust" fn(encoder: &mut RawEncoder, input: &str, output: &mut ByteWriter) -> bool;
#[unstable]
pub type DecoderTrapFunc =
extern "Rust" fn(decoder: &mut RawDecoder, input: &[u8], output: &mut StringWriter) -> bool;
#[stable]
#[derive(Copy)]
pub enum DecoderTrap {
Strict,
Replace,
Ignore,
#[unstable] Call(DecoderTrapFunc),
}
impl DecoderTrap {
fn trap(&self, decoder: &mut RawDecoder, input: &[u8], output: &mut StringWriter) -> bool {
match *self {
DecoderTrap::Strict => false,
DecoderTrap::Replace => { output.write_char('\u{fffd}'); true },
DecoderTrap::Ignore => true,
DecoderTrap::Call(func) => func(decoder, input, output),
}
}
}
#[stable]
#[derive(Copy)]
pub enum EncoderTrap {
Strict,
Replace,
Ignore,
NcrEscape,
#[unstable] Call(EncoderTrapFunc),
}
impl EncoderTrap {
fn trap(&self, encoder: &mut RawEncoder, input: &str, output: &mut ByteWriter) -> bool {
fn reencode(encoder: &mut RawEncoder, input: &str, output: &mut ByteWriter,
trapname: &str) -> bool {
if encoder.is_ascii_compatible() { output.write_bytes(input.as_bytes());
} else {
let (_, err) = encoder.raw_feed(input, output);
if err.is_some() {
panic!("{} cannot reencode a replacement string", trapname);
}
}
true
}
match *self {
EncoderTrap::Strict => false,
EncoderTrap::Replace => reencode(encoder, "?", output, "Replace"),
EncoderTrap::Ignore => true,
EncoderTrap::NcrEscape => {
let mut escapes = String::new();
for ch in input.chars() {
escapes.push_str(&format!("&#{};", ch as isize));
}
reencode(encoder, &escapes, output, "NcrEscape")
},
EncoderTrap::Call(func) => func(encoder, input, output),
}
}
}
#[unstable]
pub fn decode(input: &[u8], trap: DecoderTrap, fallback_encoding: EncodingRef)
-> (Result<String, Cow<'static, str>>, EncodingRef) {
use all::{UTF_8, UTF_16LE, UTF_16BE};
if input.starts_with(&[0xEF, 0xBB, 0xBF]) {
(UTF_8.decode(&input[3..], trap), UTF_8 as EncodingRef)
} else if input.starts_with(&[0xFE, 0xFF]) {
(UTF_16BE.decode(&input[2..], trap), UTF_16BE as EncodingRef)
} else if input.starts_with(&[0xFF, 0xFE]) {
(UTF_16LE.decode(&input[2..], trap), UTF_16LE as EncodingRef)
} else {
(fallback_encoding.decode(input, trap), fallback_encoding)
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::EncoderTrap::NcrEscape;
use util::StrCharIndex;
use std::borrow::IntoCow;
struct MyEncoder { flag: bool, prohibit: char, prepend: &'static str, toggle: bool }
impl RawEncoder for MyEncoder {
fn from_self(&self) -> Box<RawEncoder> {
Box::new(MyEncoder { flag: self.flag,
prohibit: self.prohibit,
prepend: self.prepend,
toggle: false })
}
fn is_ascii_compatible(&self) -> bool { self.flag }
fn raw_feed(&mut self, input: &str,
output: &mut ByteWriter) -> (usize, Option<CodecError>) {
for ((i,j), ch) in input.index_iter() {
if ch <= '\u{7f}' && ch != self.prohibit {
if self.toggle && !self.prepend.is_empty() {
output.write_bytes(self.prepend.as_bytes());
}
output.write_byte(ch as u8);
if ch == 'e' {
self.toggle = !self.toggle;
}
} else {
return (i, Some(CodecError { upto: j as isize,
cause: "!!!".into_cow() }));
}
}
(input.len(), None)
}
fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> { None }
}
struct MyEncoding { flag: bool, prohibit: char, prepend: &'static str }
impl Encoding for MyEncoding {
fn name(&self) -> &'static str { "my encoding" }
fn raw_encoder(&self) -> Box<RawEncoder> {
Box::new(MyEncoder { flag: self.flag,
prohibit: self.prohibit,
prepend: self.prepend,
toggle: false })
}
fn raw_decoder(&self) -> Box<RawDecoder> { panic!("not supported") }
}
#[test]
fn test_reencoding_trap_with_ascii_compatible_encoding() {
static COMPAT: &'static MyEncoding =
&MyEncoding { flag: true, prohibit: '\u{80}', prepend: "" };
static INCOMPAT: &'static MyEncoding =
&MyEncoding { flag: false, prohibit: '\u{80}', prepend: "" };
assert_eq!(COMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"Hello‽ I'm fine.".to_vec()));
assert_eq!(INCOMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"Hello‽ I'm fine.".to_vec()));
}
#[test]
fn test_reencoding_trap_with_ascii_incompatible_encoding() {
static COMPAT: &'static MyEncoding =
&MyEncoding { flag: true, prohibit: '\u{80}', prepend: "*" };
static INCOMPAT: &'static MyEncoding =
&MyEncoding { flag: false, prohibit: '\u{80}', prepend: "*" };
assert_eq!(COMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"He*l*l*o‽* *I*'*m* *f*i*n*e.".to_vec()));
assert_eq!(INCOMPAT.encode("Hello\u{203d} I'm fine.", NcrEscape),
Ok(b"He*l*l*o*&*#*8*2*5*3*;* *I*'*m* *f*i*n*e.".to_vec()));
}
#[test]
#[should_panic]
fn test_reencoding_trap_can_fail() {
static FAIL: &'static MyEncoding = &MyEncoding { flag: false, prohibit: '&', prepend: "" };
let _ = FAIL.encode("Hello\u{203d} I'm fine.", NcrEscape);
}
}