use std::fmt;
use std::io;
use std::iter::FromIterator;
use std::ops::{self, Range};
use std::result;
use std::str;
use serde::de::Deserialize;
use deserializer::deserialize_string_record;
use error::{ErrorKind, FromUtf8Error, Result, new_error, new_from_utf8_error};
use reader::Reader;
use byte_record::{self, ByteRecord, ByteRecordIter, Position};
#[inline(always)]
pub fn read<R: io::Read>(
rdr: &mut Reader<R>,
record: &mut StringRecord,
) -> Result<bool> {
let pos = rdr.position().clone();
let read_res = rdr.read_byte_record(&mut record.0);
let utf8_res = match byte_record::validate(&record.0) {
Ok(()) => Ok(()),
Err(err) => {
record.0.clear();
Err(err)
}
};
match (read_res, utf8_res) {
(Err(err), _) => Err(err),
(Ok(_), Err(err)) => {
Err(new_error(ErrorKind::Utf8 { pos: Some(pos), err: err }))
}
(Ok(eof), Ok(())) => Ok(eof),
}
}
#[derive(Clone, Eq)]
pub struct StringRecord(ByteRecord);
impl PartialEq for StringRecord {
fn eq(&self, other: &StringRecord) -> bool {
self.as_slice() == other.as_slice()
}
}
impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for StringRecord {
fn eq(&self, other: &Vec<T>) -> bool {
byte_record::eq(&self.0, other)
}
}
impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a StringRecord {
fn eq(&self, other: &Vec<T>) -> bool {
byte_record::eq(&self.0, other)
}
}
impl<T: AsRef<[u8]>> PartialEq<[T]> for StringRecord {
fn eq(&self, other: &[T]) -> bool {
byte_record::eq(&self.0, other)
}
}
impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a StringRecord {
fn eq(&self, other: &[T]) -> bool {
byte_record::eq(&self.0, other)
}
}
impl fmt::Debug for StringRecord {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let fields: Vec<&str> = self.iter().collect();
write!(f, "StringRecord({:?})", fields)
}
}
impl Default for StringRecord {
#[inline]
fn default() -> StringRecord {
StringRecord::new()
}
}
impl StringRecord {
#[inline]
pub fn new() -> StringRecord {
StringRecord(ByteRecord::new())
}
#[inline]
pub fn with_capacity(buffer: usize, fields: usize) -> StringRecord {
StringRecord(ByteRecord::with_capacity(buffer, fields))
}
#[inline]
pub fn from_byte_record(
record: ByteRecord,
) -> result::Result<StringRecord, FromUtf8Error> {
match byte_record::validate(&record) {
Ok(()) => Ok(StringRecord(record)),
Err(err) => Err(new_from_utf8_error(record, err)),
}
}
#[inline]
pub fn from_byte_record_lossy(record: ByteRecord) -> StringRecord {
if let Ok(()) = byte_record::validate(&record) {
return StringRecord(record);
}
let mut str_record = StringRecord::with_capacity(
record.as_slice().len(), record.len());
for field in &record {
str_record.push_field(&String::from_utf8_lossy(field));
}
str_record
}
pub fn deserialize<'de, D: Deserialize<'de>>(
&'de self,
headers: Option<&'de StringRecord>,
) -> Result<D> {
deserialize_string_record(self, headers)
}
#[inline]
pub fn iter(&self) -> StringRecordIter {
self.into_iter()
}
#[inline]
pub fn get(&self, i: usize) -> Option<&str> {
self.0.get(i).map(|bytes| {
unsafe { str::from_utf8_unchecked(bytes) }
})
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn truncate(&mut self, n: usize) {
self.0.truncate(n);
}
#[inline]
pub fn clear(&mut self) {
self.0.clear();
}
pub fn trim(&mut self) {
let length = self.len();
if length == 0 {
return;
}
let mut trimmed = StringRecord::with_capacity(
self.as_slice().len(), self.len());
for mut field in &*self {
trimmed.push_field(field.trim());
}
*self = trimmed;
}
#[inline]
pub fn push_field(&mut self, field: &str) {
self.0.push_field(field.as_bytes());
}
#[inline]
pub fn position(&self) -> Option<&Position> {
self.0.position()
}
#[inline]
pub fn set_position(&mut self, pos: Option<Position>) {
self.0.set_position(pos);
}
#[inline]
pub fn range(&self, i: usize) -> Option<Range<usize>> {
self.0.range(i)
}
#[inline]
pub fn as_slice(&self) -> &str {
unsafe { str::from_utf8_unchecked(self.0.as_slice()) }
}
#[inline]
pub fn as_byte_record(&self) -> &ByteRecord {
&self.0
}
#[inline]
pub fn into_byte_record(self) -> ByteRecord {
self.0
}
}
impl ops::Index<usize> for StringRecord {
type Output = str;
#[inline]
fn index(&self, i: usize) -> &str { self.get(i).unwrap() }
}
impl<T: AsRef<str>> From<Vec<T>> for StringRecord {
#[inline]
fn from(xs: Vec<T>) -> StringRecord {
StringRecord::from_iter(xs.into_iter())
}
}
impl<'a, T: AsRef<str>> From<&'a [T]> for StringRecord {
#[inline]
fn from(xs: &'a [T]) -> StringRecord {
StringRecord::from_iter(xs)
}
}
impl<T: AsRef<str>> FromIterator<T> for StringRecord {
#[inline]
fn from_iter<I: IntoIterator<Item=T>>(iter: I) -> StringRecord {
let mut record = StringRecord::new();
record.extend(iter);
record
}
}
impl<T: AsRef<str>> Extend<T> for StringRecord {
#[inline]
fn extend<I: IntoIterator<Item=T>>(&mut self, iter: I) {
for x in iter {
self.push_field(x.as_ref());
}
}
}
impl<'a> IntoIterator for &'a StringRecord {
type IntoIter = StringRecordIter<'a>;
type Item = &'a str;
#[inline]
fn into_iter(self) -> StringRecordIter<'a> {
StringRecordIter(self.0.iter())
}
}
pub struct StringRecordIter<'r>(ByteRecordIter<'r>);
impl<'r> Iterator for StringRecordIter<'r> {
type Item = &'r str;
#[inline]
fn next(&mut self) -> Option<&'r str> {
self.0.next().map(|bytes| {
unsafe { str::from_utf8_unchecked(bytes) }
})
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
#[inline]
fn count(self) -> usize {
self.0.len()
}
}
impl<'r> DoubleEndedIterator for StringRecordIter<'r> {
#[inline]
fn next_back(&mut self) -> Option<&'r str> {
self.0.next_back().map(|bytes| {
unsafe { str::from_utf8_unchecked(bytes) }
})
}
}
#[cfg(test)]
mod tests {
use string_record::StringRecord;
#[test]
fn trim_front() {
let mut rec = StringRecord::from(vec![" abc"]);
rec.trim();
assert_eq!(rec.get(0), Some("abc"));
let mut rec = StringRecord::from(vec![" abc", " xyz"]);
rec.trim();
assert_eq!(rec.get(0), Some("abc"));
assert_eq!(rec.get(1), Some("xyz"));
}
#[test]
fn trim_back() {
let mut rec = StringRecord::from(vec!["abc "]);
rec.trim();
assert_eq!(rec.get(0), Some("abc"));
let mut rec = StringRecord::from(vec!["abc ", "xyz "]);
rec.trim();
assert_eq!(rec.get(0), Some("abc"));
assert_eq!(rec.get(1), Some("xyz"));
}
#[test]
fn trim_both() {
let mut rec = StringRecord::from(vec![" abc "]);
rec.trim();
assert_eq!(rec.get(0), Some("abc"));
let mut rec = StringRecord::from(vec![" abc ", " xyz "]);
rec.trim();
assert_eq!(rec.get(0), Some("abc"));
assert_eq!(rec.get(1), Some("xyz"));
}
#[test]
fn trim_does_not_panic_on_empty_records_1() {
let mut rec = StringRecord::from(vec![""]);
rec.trim();
assert_eq!(rec.get(0), Some(""));
}
#[test]
fn trim_does_not_panic_on_empty_records_2() {
let mut rec = StringRecord::from(vec!["", ""]);
rec.trim();
assert_eq!(rec.get(0), Some(""));
assert_eq!(rec.get(1), Some(""));
}
#[test]
fn trim_does_not_panic_on_empty_records_3() {
let mut rec = StringRecord::new();
rec.trim();
assert_eq!(rec.as_slice().len(), 0);
}
#[test]
fn trim_whitespace_only() {
let mut rec = StringRecord::from(vec![
"\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{0085}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}",
]);
rec.trim();
assert_eq!(rec.get(0), Some(""));
}
}