#![cfg_attr(feature="heap_size", feature(plugin, custom_derive))]
#![cfg_attr(feature="heap_size", plugin(heapsize_plugin))]
extern crate rustc_serialize;
extern crate uuid;
#[macro_use]
extern crate matches;
#[cfg(feature="serde_serialization")]
extern crate serde;
#[cfg(feature="heap_size")]
#[macro_use] extern crate heapsize;
use std::fmt::{self, Formatter};
use std::str;
use std::path::{Path, PathBuf};
use std::borrow::Borrow;
use std::hash::{Hash, Hasher};
use std::cmp::Ordering;
#[cfg(feature="serde_serialization")]
use std::str::FromStr;
pub use host::Host;
pub use parser::{ErrorHandler, ParseResult, ParseError};
use percent_encoding::{percent_encode, lossy_utf8_percent_decode, DEFAULT_ENCODE_SET};
use format::{PathFormatter, UserInfoFormatter, UrlNoFragmentFormatter};
use encoding::EncodingOverride;
use uuid::Uuid;
mod encoding;
mod host;
mod parser;
pub mod urlutils;
pub mod percent_encoding;
pub mod form_urlencoded;
pub mod punycode;
pub mod format;
#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
#[cfg_attr(feature="heap_size", derive(HeapSizeOf))]
pub struct Url {
pub scheme: String,
pub scheme_data: SchemeData,
pub query: Option<String>,
pub fragment: Option<String>,
}
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct OpaqueOrigin(Uuid);
#[cfg(feature="heap_size")]
known_heap_size!(0, OpaqueOrigin);
impl OpaqueOrigin {
pub fn new() -> OpaqueOrigin {
OpaqueOrigin(Uuid::new_v4())
}
}
#[derive(PartialEq, Eq, Clone, Debug)]
#[cfg_attr(feature="heap_size", derive(HeapSizeOf))]
pub enum Origin {
UID(OpaqueOrigin),
Tuple(String, Host, u16)
}
#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
#[cfg_attr(feature="heap_size", derive(HeapSizeOf))]
pub enum SchemeData {
Relative(RelativeSchemeData),
NonRelative(String),
}
#[derive(Clone, Debug)]
#[cfg_attr(feature="heap_size", derive(HeapSizeOf))]
pub struct RelativeSchemeData {
pub username: String,
pub password: Option<String>,
pub host: Host,
pub port: Option<u16>,
pub default_port: Option<u16>,
pub path: Vec<String>,
}
impl RelativeSchemeData {
fn get_identity_key(&self) -> (&String, &Option<String>, &Host, Option<u16>, Option<u16>, &Vec<String>) {
(
&self.username,
&self.password,
&self.host,
self.port.or(self.default_port),
self.default_port,
&self.path
)
}
}
impl PartialEq for RelativeSchemeData {
fn eq(&self, other: &RelativeSchemeData) -> bool {
self.get_identity_key() == other.get_identity_key()
}
}
impl Eq for RelativeSchemeData {}
impl Hash for RelativeSchemeData {
fn hash<H: Hasher>(&self, state: &mut H) {
self.get_identity_key().hash(state)
}
}
impl PartialOrd for RelativeSchemeData {
fn partial_cmp(&self, other: &RelativeSchemeData) -> Option<Ordering> {
self.get_identity_key().partial_cmp(&other.get_identity_key())
}
}
impl Ord for RelativeSchemeData {
fn cmp(&self, other: &Self) -> Ordering {
self.get_identity_key().cmp(&other.get_identity_key())
}
}
impl str::FromStr for Url {
type Err = ParseError;
fn from_str(url: &str) -> ParseResult<Url> {
Url::parse(url)
}
}
pub struct UrlParser<'a> {
base_url: Option<&'a Url>,
query_encoding_override: EncodingOverride,
error_handler: ErrorHandler,
scheme_type_mapper: fn(scheme: &str) -> SchemeType,
}
impl<'a> UrlParser<'a> {
#[inline]
pub fn new() -> UrlParser<'a> {
fn silent_handler(_reason: ParseError) -> ParseResult<()> { Ok(()) }
UrlParser {
base_url: None,
query_encoding_override: EncodingOverride::utf8(),
error_handler: silent_handler,
scheme_type_mapper: whatwg_scheme_type_mapper,
}
}
#[inline]
pub fn base_url<'b>(&'b mut self, value: &'a Url) -> &'b mut UrlParser<'a> {
self.base_url = Some(value);
self
}
#[cfg(feature = "query_encoding")]
#[inline]
pub fn query_encoding_override<'b>(&'b mut self, value: encoding::EncodingRef)
-> &'b mut UrlParser<'a> {
self.query_encoding_override = EncodingOverride::from_encoding(value);
self
}
#[inline]
pub fn error_handler<'b>(&'b mut self, value: ErrorHandler) -> &'b mut UrlParser<'a> {
self.error_handler = value;
self
}
#[inline]
pub fn scheme_type_mapper<'b>(&'b mut self, value: fn(scheme: &str) -> SchemeType)
-> &'b mut UrlParser<'a> {
self.scheme_type_mapper = value;
self
}
#[inline]
pub fn parse(&self, input: &str) -> ParseResult<Url> {
parser::parse_url(input, self)
}
#[inline]
pub fn parse_path(&self, input: &str)
-> ParseResult<(Vec<String>, Option<String>, Option<String>)> {
parser::parse_standalone_path(input, self)
}
}
#[inline]
pub fn parse_path(input: &str)
-> ParseResult<(Vec<String>, Option<String>, Option<String>)> {
UrlParser::new().parse_path(input)
}
impl<'a> UrlParser<'a> {
#[inline]
fn parse_error(&self, error: ParseError) -> ParseResult<()> {
(self.error_handler)(error)
}
#[inline]
fn get_scheme_type(&self, scheme: &str) -> SchemeType {
(self.scheme_type_mapper)(scheme)
}
}
#[derive(PartialEq, Eq, Copy, Debug, Clone, Hash, PartialOrd, Ord)]
pub enum SchemeType {
NonRelative,
Relative(u16),
FileLike,
}
impl SchemeType {
pub fn default_port(&self) -> Option<u16> {
match self {
&SchemeType::Relative(default_port) => Some(default_port),
_ => None,
}
}
pub fn same_as(&self, other: SchemeType) -> bool {
match (self, other) {
(&SchemeType::NonRelative, SchemeType::NonRelative) => true,
(&SchemeType::Relative(_), SchemeType::Relative(_)) => true,
(&SchemeType::FileLike, SchemeType::FileLike) => true,
_ => false
}
}
}
pub fn whatwg_scheme_type_mapper(scheme: &str) -> SchemeType {
match scheme {
"file" => SchemeType::FileLike,
"ftp" => SchemeType::Relative(21),
"gopher" => SchemeType::Relative(70),
"http" => SchemeType::Relative(80),
"https" => SchemeType::Relative(443),
"ws" => SchemeType::Relative(80),
"wss" => SchemeType::Relative(443),
_ => SchemeType::NonRelative,
}
}
impl Url {
#[inline]
pub fn parse(input: &str) -> ParseResult<Url> {
UrlParser::new().parse(input)
}
pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
let path = try!(path_to_file_url_path(path.as_ref()));
Ok(Url::from_path_common(path))
}
pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
let mut path = try!(path_to_file_url_path(path.as_ref()));
path.push("".to_string());
Ok(Url::from_path_common(path))
}
fn from_path_common(path: Vec<String>) -> Url {
Url {
scheme: "file".to_string(),
scheme_data: SchemeData::Relative(RelativeSchemeData {
username: "".to_string(),
password: None,
port: None,
default_port: None,
host: Host::Domain("".to_string()),
path: path,
}),
query: None,
fragment: None,
}
}
#[inline]
pub fn to_file_path(&self) -> Result<PathBuf, ()> {
match self.scheme_data {
SchemeData::Relative(ref scheme_data) => scheme_data.to_file_path(),
SchemeData::NonRelative(..) => Err(()),
}
}
pub fn serialize(&self) -> String {
self.to_string()
}
pub fn origin(&self) -> Origin {
match &*self.scheme {
"blob" => {
let result = Url::parse(self.non_relative_scheme_data().unwrap());
match result {
Ok(ref url) => url.origin(),
Err(_) => Origin::UID(OpaqueOrigin::new())
}
},
"ftp" | "gopher" | "http" | "https" | "ws" | "wss" => {
Origin::Tuple(self.scheme.clone(), self.host().unwrap().clone(),
self.port_or_default().unwrap())
},
"file" => Origin::UID(OpaqueOrigin::new()),
_ => Origin::UID(OpaqueOrigin::new())
}
}
pub fn serialize_no_fragment(&self) -> String {
UrlNoFragmentFormatter{ url: self }.to_string()
}
#[inline]
pub fn non_relative_scheme_data<'a>(&'a self) -> Option<&'a str> {
match self.scheme_data {
SchemeData::Relative(..) => None,
SchemeData::NonRelative(ref scheme_data) => Some(scheme_data),
}
}
#[inline]
pub fn non_relative_scheme_data_mut<'a>(&'a mut self) -> Option<&'a mut String> {
match self.scheme_data {
SchemeData::Relative(..) => None,
SchemeData::NonRelative(ref mut scheme_data) => Some(scheme_data),
}
}
#[inline]
pub fn relative_scheme_data<'a>(&'a self) -> Option<&'a RelativeSchemeData> {
match self.scheme_data {
SchemeData::Relative(ref scheme_data) => Some(scheme_data),
SchemeData::NonRelative(..) => None,
}
}
#[inline]
pub fn relative_scheme_data_mut<'a>(&'a mut self) -> Option<&'a mut RelativeSchemeData> {
match self.scheme_data {
SchemeData::Relative(ref mut scheme_data) => Some(scheme_data),
SchemeData::NonRelative(..) => None,
}
}
#[inline]
pub fn username<'a>(&'a self) -> Option<&'a str> {
self.relative_scheme_data().map(|scheme_data| &*scheme_data.username)
}
#[inline]
pub fn username_mut<'a>(&'a mut self) -> Option<&'a mut String> {
self.relative_scheme_data_mut().map(|scheme_data| &mut scheme_data.username)
}
#[inline]
pub fn lossy_percent_decode_username(&self) -> Option<String> {
self.relative_scheme_data().map(|scheme_data| scheme_data.lossy_percent_decode_username())
}
#[inline]
pub fn password<'a>(&'a self) -> Option<&'a str> {
self.relative_scheme_data().and_then(|scheme_data|
scheme_data.password.as_ref().map(|password| password as &str))
}
#[inline]
pub fn password_mut<'a>(&'a mut self) -> Option<&'a mut String> {
self.relative_scheme_data_mut().and_then(|scheme_data| scheme_data.password.as_mut())
}
#[inline]
pub fn lossy_percent_decode_password(&self) -> Option<String> {
self.relative_scheme_data().and_then(|scheme_data|
scheme_data.lossy_percent_decode_password())
}
#[inline]
pub fn serialize_userinfo<'a>(&'a mut self) -> Option<String> {
self.relative_scheme_data().map(|scheme_data| scheme_data.serialize_userinfo())
}
#[inline]
pub fn host<'a>(&'a self) -> Option<&'a Host> {
self.relative_scheme_data().map(|scheme_data| &scheme_data.host)
}
#[inline]
pub fn host_mut<'a>(&'a mut self) -> Option<&'a mut Host> {
self.relative_scheme_data_mut().map(|scheme_data| &mut scheme_data.host)
}
#[inline]
pub fn domain<'a>(&'a self) -> Option<&'a str> {
self.relative_scheme_data().and_then(|scheme_data| scheme_data.domain())
}
#[inline]
pub fn domain_mut<'a>(&'a mut self) -> Option<&'a mut String> {
self.relative_scheme_data_mut().and_then(|scheme_data| scheme_data.domain_mut())
}
#[inline]
pub fn serialize_host(&self) -> Option<String> {
self.relative_scheme_data().map(|scheme_data| scheme_data.host.serialize())
}
#[inline]
pub fn port<'a>(&'a self) -> Option<u16> {
self.relative_scheme_data().and_then(|scheme_data| scheme_data.port)
}
#[inline]
pub fn port_mut<'a>(&'a mut self) -> Option<&'a mut Option<u16>> {
self.relative_scheme_data_mut().map(|scheme_data| &mut scheme_data.port)
}
#[inline]
pub fn port_or_default(&self) -> Option<u16> {
self.relative_scheme_data().and_then(|scheme_data| scheme_data.port_or_default())
}
#[inline]
pub fn path<'a>(&'a self) -> Option<&'a [String]> {
self.relative_scheme_data().map(|scheme_data| &*scheme_data.path)
}
#[inline]
pub fn path_mut<'a>(&'a mut self) -> Option<&'a mut Vec<String>> {
self.relative_scheme_data_mut().map(|scheme_data| &mut scheme_data.path)
}
#[inline]
pub fn serialize_path(&self) -> Option<String> {
self.relative_scheme_data().map(|scheme_data| scheme_data.serialize_path())
}
#[inline]
pub fn query_pairs(&self) -> Option<Vec<(String, String)>> {
self.query.as_ref().map(|query| form_urlencoded::parse(query.as_bytes()))
}
#[inline]
pub fn set_query_from_pairs<I, K, V>(&mut self, pairs: I)
where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str> {
self.query = Some(form_urlencoded::serialize(pairs));
}
#[inline]
pub fn lossy_percent_decode_query(&self) -> Option<String> {
self.query.as_ref().map(|value| lossy_utf8_percent_decode(value.as_bytes()))
}
#[inline]
pub fn lossy_percent_decode_fragment(&self) -> Option<String> {
self.fragment.as_ref().map(|value| lossy_utf8_percent_decode(value.as_bytes()))
}
#[inline]
pub fn join(&self, input: &str) -> ParseResult<Url> {
UrlParser::new().base_url(self).parse(input)
}
}
impl rustc_serialize::Encodable for Url {
fn encode<S: rustc_serialize::Encoder>(&self, encoder: &mut S) -> Result<(), S::Error> {
encoder.emit_str(&self.to_string())
}
}
impl rustc_serialize::Decodable for Url {
fn decode<D: rustc_serialize::Decoder>(decoder: &mut D) -> Result<Url, D::Error> {
Url::parse(&*try!(decoder.read_str())).map_err(|error| {
decoder.error(&format!("URL parsing error: {}", error))
})
}
}
#[cfg(feature="serde_serialization")]
impl serde::Serialize for Url {
fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer {
format!("{}", self).serialize(serializer)
}
}
#[cfg(feature="serde_serialization")]
impl serde::Deserialize for Url {
fn deserialize<D>(deserializer: &mut D) -> Result<Url, D::Error> where D: serde::Deserializer {
let string_representation: String = try!(serde::Deserialize::deserialize(deserializer));
Ok(FromStr::from_str(&string_representation[..]).unwrap())
}
}
impl fmt::Display for Url {
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
try!(UrlNoFragmentFormatter{ url: self }.fmt(formatter));
if let Some(ref fragment) = self.fragment {
try!(formatter.write_str("#"));
try!(formatter.write_str(fragment));
}
Ok(())
}
}
impl fmt::Display for SchemeData {
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
match *self {
SchemeData::Relative(ref scheme_data) => scheme_data.fmt(formatter),
SchemeData::NonRelative(ref scheme_data) => scheme_data.fmt(formatter),
}
}
}
impl RelativeSchemeData {
#[inline]
pub fn lossy_percent_decode_username(&self) -> String {
lossy_utf8_percent_decode(self.username.as_bytes())
}
#[inline]
pub fn lossy_percent_decode_password(&self) -> Option<String> {
self.password.as_ref().map(|value| lossy_utf8_percent_decode(value.as_bytes()))
}
#[inline]
pub fn to_file_path(&self) -> Result<PathBuf, ()> {
if !matches!(self.domain(), Some("") | Some("localhost")) {
return Err(())
}
file_url_path_to_pathbuf(&self.path)
}
#[inline]
pub fn domain<'a>(&'a self) -> Option<&'a str> {
match self.host {
Host::Domain(ref domain) => Some(domain),
_ => None,
}
}
#[inline]
pub fn domain_mut<'a>(&'a mut self) -> Option<&'a mut String> {
match self.host {
Host::Domain(ref mut domain) => Some(domain),
_ => None,
}
}
#[inline]
pub fn port_or_default(&self) -> Option<u16> {
self.port.or(self.default_port)
}
pub fn serialize_path(&self) -> String {
PathFormatter {
path: &self.path
}.to_string()
}
pub fn serialize_userinfo(&self) -> String {
UserInfoFormatter {
username: &self.username,
password: self.password.as_ref().map(|s| s as &str)
}.to_string()
}
}
impl fmt::Display for RelativeSchemeData {
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
try!(formatter.write_str("//"));
try!(UserInfoFormatter {
username: &self.username,
password: self.password.as_ref().map(|s| s as &str)
}.fmt(formatter));
try!(self.host.fmt(formatter));
match self.port {
Some(port) => {
try!(write!(formatter, ":{}", port));
},
None => {}
}
PathFormatter {
path: &self.path
}.fmt(formatter)
}
}
#[cfg(unix)]
fn path_to_file_url_path(path: &Path) -> Result<Vec<String>, ()> {
use std::os::unix::prelude::OsStrExt;
if !path.is_absolute() {
return Err(())
}
Ok(path.components().skip(1).map(|c| {
percent_encode(c.as_os_str().as_bytes(), DEFAULT_ENCODE_SET)
}).collect())
}
#[cfg(windows)]
fn path_to_file_url_path(path: &Path) -> Result<Vec<String>, ()> {
path_to_file_url_path_windows(path)
}
#[cfg_attr(not(windows), allow(dead_code))]
fn path_to_file_url_path_windows(path: &Path) -> Result<Vec<String>, ()> {
use std::path::{Prefix, Component};
if !path.is_absolute() {
return Err(())
}
let mut components = path.components();
let disk = match components.next() {
Some(Component::Prefix(ref p)) => match p.kind() {
Prefix::Disk(byte) => byte,
_ => return Err(()),
},
_ => return Err(())
};
let mut path = vec![format!("{}:", disk as char)];
for component in components {
if component == Component::RootDir { continue }
let part = match component.as_os_str().to_str() {
Some(s) => s,
None => return Err(()),
};
path.push(percent_encode(part.as_bytes(), DEFAULT_ENCODE_SET));
}
Ok(path)
}
#[cfg(unix)]
fn file_url_path_to_pathbuf(path: &[String]) -> Result<PathBuf, ()> {
use std::ffi::OsStr;
use std::os::unix::prelude::OsStrExt;
use std::path::PathBuf;
use percent_encoding::percent_decode_to;
if path.is_empty() {
return Ok(PathBuf::from("/"))
}
let mut bytes = Vec::new();
for path_part in path {
bytes.push(b'/');
percent_decode_to(path_part.as_bytes(), &mut bytes);
}
let os_str = OsStr::from_bytes(&bytes);
let path = PathBuf::from(os_str);
debug_assert!(path.is_absolute(),
"to_file_path() failed to produce an absolute Path");
Ok(path)
}
#[cfg(windows)]
fn file_url_path_to_pathbuf(path: &[String]) -> Result<PathBuf, ()> {
file_url_path_to_pathbuf_windows(path)
}
#[cfg_attr(not(windows), allow(dead_code))]
fn file_url_path_to_pathbuf_windows(path: &[String]) -> Result<PathBuf, ()> {
use percent_encoding::percent_decode;
if path.is_empty() {
return Err(())
}
let prefix = &*path[0];
if prefix.len() != 2 || !parser::starts_with_ascii_alpha(prefix)
|| prefix.as_bytes()[1] != b':' {
return Err(())
}
let mut string = prefix.to_string();
for path_part in &path[1..] {
string.push('\\');
match String::from_utf8(percent_decode(path_part.as_bytes())) {
Ok(s) => string.push_str(&s),
Err(..) => return Err(()),
}
}
let path = PathBuf::from(string);
debug_assert!(path.is_absolute(),
"to_file_path() failed to produce an absolute Path");
Ok(path)
}