use std::borrow::Cow;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use url::Url;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use crate::cache_key::{CacheKey, CacheKeyHasher};
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct CanonicalUrl(DisplaySafeUrl);
impl CanonicalUrl {
pub fn new(url: &DisplaySafeUrl) -> Self {
let mut url = url.clone();
if url.cannot_be_a_base() {
return Self(url);
}
let _ = url.set_password(None);
let _ = url.set_username("");
if url.path().ends_with('/') {
url.path_segments_mut().unwrap().pop_if_empty();
}
if url.host_str() == Some("github.com") {
let scheme = url.scheme().to_lowercase();
url.set_scheme(&scheme).unwrap();
let path = url.path().to_lowercase();
url.set_path(&path);
}
if let Some((prefix, suffix)) = url.path().rsplit_once('@') {
let needs_chopping = std::path::Path::new(prefix)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
if needs_chopping {
let prefix = &prefix[..prefix.len() - 4];
let path = format!("{prefix}@{suffix}");
url.set_path(&path);
}
} else {
let needs_chopping = std::path::Path::new(url.path())
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
if needs_chopping {
let last = {
let last = url.path_segments().unwrap().next_back().unwrap();
last[..last.len() - 4].to_owned()
};
url.path_segments_mut().unwrap().pop().push(&last);
}
}
if memchr::memchr(b'%', url.path().as_bytes()).is_some() {
let decoded = url
.path_segments()
.unwrap()
.map(|segment| {
percent_encoding::percent_decode_str(segment)
.decode_utf8()
.unwrap_or(Cow::Borrowed(segment))
.into_owned()
})
.collect::<Vec<_>>();
let mut path_segments = url.path_segments_mut().unwrap();
path_segments.clear();
path_segments.extend(decoded);
}
Self(url)
}
pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
Ok(Self::new(&DisplaySafeUrl::parse(url)?))
}
}
impl CacheKey for CanonicalUrl {
fn cache_key(&self, state: &mut CacheKeyHasher) {
self.0.as_str().cache_key(state);
}
}
impl Hash for CanonicalUrl {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state);
}
}
impl From<CanonicalUrl> for DisplaySafeUrl {
fn from(value: CanonicalUrl) -> Self {
value.0
}
}
impl std::fmt::Display for CanonicalUrl {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct RepositoryUrl(DisplaySafeUrl);
impl RepositoryUrl {
pub fn new(url: &DisplaySafeUrl) -> Self {
let mut url = CanonicalUrl::new(url).0;
if url.scheme().starts_with("git+") {
if let Some(prefix) = url
.path()
.rsplit_once('@')
.map(|(prefix, _suffix)| prefix.to_string())
{
url.set_path(&prefix);
}
}
url.set_fragment(None);
url.set_query(None);
Self(url)
}
pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
Ok(Self::new(&DisplaySafeUrl::parse(url)?))
}
}
impl CacheKey for RepositoryUrl {
fn cache_key(&self, state: &mut CacheKeyHasher) {
self.0.as_str().cache_key(state);
}
}
impl Hash for RepositoryUrl {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state);
}
}
impl Deref for RepositoryUrl {
type Target = Url;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::fmt::Display for RepositoryUrl {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn user_credential_does_not_affect_cache_key() -> Result<(), DisplaySafeUrlError> {
let mut hasher = CacheKeyHasher::new();
CanonicalUrl::parse("https://example.com/pypa/sample-namespace-packages.git@2.0.0")?
.cache_key(&mut hasher);
let hash_without_creds = hasher.finish();
let mut hasher = CacheKeyHasher::new();
CanonicalUrl::parse(
"https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0",
)?
.cache_key(&mut hasher);
let hash_with_creds = hasher.finish();
assert_eq!(
hash_without_creds, hash_with_creds,
"URLs with no user credentials should hash the same as URLs with different user credentials",
);
let mut hasher = CacheKeyHasher::new();
CanonicalUrl::parse(
"https://user:bar@example.com/pypa/sample-namespace-packages.git@2.0.0",
)?
.cache_key(&mut hasher);
let hash_with_creds = hasher.finish();
assert_eq!(
hash_without_creds, hash_with_creds,
"URLs with different user credentials should hash the same",
);
let mut hasher = CacheKeyHasher::new();
CanonicalUrl::parse("https://:bar@example.com/pypa/sample-namespace-packages.git@2.0.0")?
.cache_key(&mut hasher);
let hash_with_creds = hasher.finish();
assert_eq!(
hash_without_creds, hash_with_creds,
"URLs with no username, though with a password, should hash the same as URLs with different user credentials",
);
let mut hasher = CacheKeyHasher::new();
CanonicalUrl::parse("https://user:@example.com/pypa/sample-namespace-packages.git@2.0.0")?
.cache_key(&mut hasher);
let hash_with_creds = hasher.finish();
assert_eq!(
hash_without_creds, hash_with_creds,
"URLs with no password, though with a username, should hash the same as URLs with different user credentials",
);
Ok(())
}
#[test]
fn canonical_url() -> Result<(), DisplaySafeUrlError> {
assert_eq!(
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
);
assert_eq!(
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git@2.0.0")?,
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages@2.0.0")?,
);
assert_ne!(
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
);
assert_ne!(
CanonicalUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a"
)?,
CanonicalUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b"
)?,
);
assert_ne!(
CanonicalUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
)?,
CanonicalUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
)?,
);
assert_eq!(
CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
);
assert_ne!(
CanonicalUrl::parse("https://github.com/pypa/sample%2Fnamespace%2Fpackages")?,
CanonicalUrl::parse("https://github.com/pypa/sample/namespace/packages")?,
);
assert_eq!(
CanonicalUrl::parse("https://github.com/pypa/sample%2Bnamespace%2Bpackages")?,
CanonicalUrl::parse("https://github.com/pypa/sample+namespace+packages")?,
);
assert_ne!(
CanonicalUrl::parse(
"file:///home/ferris/my_project%2Fmy_project-0.1.0-py3-none-any.whl"
)?,
CanonicalUrl::parse(
"file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl"
)?,
);
assert_eq!(
CanonicalUrl::parse(
"file:///home/ferris/my_project/my_project-0.1.0+foo-py3-none-any.whl"
)?,
CanonicalUrl::parse(
"file:///home/ferris/my_project/my_project-0.1.0%2Bfoo-py3-none-any.whl"
)?,
);
Ok(())
}
#[test]
fn repository_url() -> Result<(), DisplaySafeUrlError> {
assert_eq!(
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
);
assert_eq!(
RepositoryUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git@2.0.0"
)?,
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages@2.0.0")?,
);
assert_ne!(
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
RepositoryUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
);
assert_eq!(
RepositoryUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a"
)?,
RepositoryUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b"
)?,
);
assert_eq!(
RepositoryUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
)?,
RepositoryUrl::parse(
"git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
)?,
);
Ok(())
}
}