#![allow(bad_style)]
extern crate proc_macro;
use proc_macro::{TokenStream, TokenTree};
use core::str::FromStr;
use std::fmt::Write;
#[proc_macro]
pub fn utf16(stream: TokenStream) -> TokenStream {
const USAGE: &str = "Usage: utf16!(string_lit)";
let mut tt_iter = stream.into_iter();
let lit = match tt_iter.next().expect(USAGE) {
TokenTree::Literal(lit) => lit,
_ => panic!(USAGE),
};
assert!(tt_iter.next().is_none(), USAGE);
let mut lit_string = format!("{}", lit);
let is_raw = lit_string.starts_with("r");
if is_raw {
lit_string.remove(0);
while lit_string.chars().nth(0) == Some('#') {
lit_string.pop();
lit_string.remove(0);
}
}
assert!(lit_string.as_bytes().first() == Some(&b'"'), USAGE);
assert!(lit_string.as_bytes().last() == Some(&b'"'), USAGE);
let lit_str = &lit_string[1..lit_string.len() - 1];
str_to_utf16_units_tokenstream(lit_str, is_raw)
}
#[proc_macro]
pub fn utf16_null(stream: TokenStream) -> TokenStream {
const USAGE: &str = "Usage: utf16!(string_lit)";
let mut tt_iter = stream.into_iter();
let lit = match tt_iter.next().expect(USAGE) {
TokenTree::Literal(lit) => lit,
_ => panic!(USAGE),
};
assert!(tt_iter.next().is_none(), USAGE);
let mut lit_string = format!("{}", lit);
let is_raw = lit_string.starts_with("r");
if is_raw {
lit_string.remove(0);
while lit_string.chars().nth(0) == Some('#') {
lit_string.pop();
lit_string.remove(0);
}
}
assert!(lit_string.as_bytes().first() == Some(&b'"'), USAGE);
assert!(lit_string.as_bytes().last() == Some(&b'"'), USAGE);
lit_string.pop();
lit_string.push('\0');
let lit_str = &lit_string[1..];
str_to_utf16_units_tokenstream(lit_str, is_raw)
}
fn str_to_utf16_units_tokenstream(s: &str, is_raw: bool) -> TokenStream {
let mut encode_buf = [0_u16; 2];
let mut buf = String::with_capacity(s.as_bytes().len() * 8 + 10);
buf.push_str("&[");
if is_raw {
for c in s.chars() {
for unit in c.encode_utf16(&mut encode_buf) {
let _cant_fail = write!(buf, "{},", unit);
}
}
} else {
for char_escape in CharEscapeIterator::new(s.chars()) {
match char_escape {
CharEscape::Escaped(ch) | CharEscape::Literal(ch) => {
for unit in ch.encode_utf16(&mut encode_buf) {
let _cant_fail = write!(buf, "{},", unit);
}
}
other => panic!("Illegal character escape sequence: {:?}", other),
}
}
}
buf.push_str("]");
TokenStream::from_str(&buf).unwrap()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CharEscape {
Literal(char),
Escaped(char),
Improper(char),
DanglingBackslash,
}
#[derive(Debug, Clone, Copy)]
struct CharEscapeIterator<I> {
it: I,
}
impl<I> CharEscapeIterator<I>
where
I: Iterator<Item = char>,
{
fn new(it: I) -> Self {
Self { it }
}
}
impl<I> Iterator for CharEscapeIterator<I>
where
I: Iterator<Item = char>,
{
type Item = CharEscape;
fn next(&mut self) -> Option<CharEscape> {
if let Some(ch) = self.it.next() {
match ch {
'\\' => {
if let Some(follow) = self.it.next() {
match follow {
'0' => Some(CharEscape::Escaped('\0')),
'n' => Some(CharEscape::Escaped('\n')),
'r' => Some(CharEscape::Escaped('\r')),
't' => Some(CharEscape::Escaped('\t')),
'\\' => Some(CharEscape::Escaped('\\')),
'\'' => Some(CharEscape::Escaped('\'')),
'"' => Some(CharEscape::Escaped('"')),
'x' => {
let mut inner = || {
let d1 = self.it.next()?;
let d2 = self.it.next()?;
let mut temp = [0; 4];
let a =
u8::from_str_radix(d1.encode_utf8(&mut temp), 16).ok()?;
let b =
u8::from_str_radix(d2.encode_utf8(&mut temp), 16).ok()?;
let c = a << 4 | b;
if c < 128 {
Some(CharEscape::Escaped(c as char))
} else {
None
}
};
inner().or(Some(CharEscape::Improper('x')))
}
'u' => {
let mut inner = || {
let open_brace = self.it.next();
if open_brace != Some('{') {
return None;
}
let mut buffer = [0_u8; 6];
let mut buffer_index = 0;
loop {
let next_ch = self.it.next()?;
if next_ch == '}' {
break;
} else if buffer_index >= buffer.len() {
buffer_index = usize::max_value();
} else {
buffer[buffer_index] = next_ch as u8;
buffer_index += 1;
}
}
if buffer_index == usize::max_value() {
return None;
}
let s = core::str::from_utf8(&buffer[..buffer_index]).ok()?;
let u = u32::from_str_radix(s, 16).ok()?;
core::char::from_u32(u).map(CharEscape::Escaped)
};
inner().or(Some(CharEscape::Improper('u')))
}
imp => Some(CharEscape::Improper(imp)),
}
} else {
Some(CharEscape::DanglingBackslash)
}
}
other => Some(CharEscape::Literal(other)),
}
} else {
None
}
}
}