1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
use std::borrow::Cow;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use dashmap::DashMap;
use dashmap::mapref::one::Ref;
use fs_err::tokio as fs;
use reqwest_middleware::ClientWithMiddleware;
use tracing::debug;
use uv_cache_key::{RepositoryUrl, cache_digest};
use uv_fs::LockedFile;
use uv_git_types::{GitHubRepository, GitOid, GitReference, GitUrl};
use uv_static::EnvVars;
use uv_version::version;
use crate::{
Fetch, GitSource, Reporter,
rate_limit::{GITHUB_RATE_LIMIT_STATUS, is_github_rate_limited},
};
#[derive(Debug, thiserror::Error)]
pub enum GitResolverError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Join(#[from] tokio::task::JoinError),
#[error("Git operation failed")]
Git(#[source] anyhow::Error),
#[error(transparent)]
Reqwest(#[from] reqwest::Error),
#[error(transparent)]
ReqwestMiddleware(#[from] reqwest_middleware::Error),
}
/// A resolver for Git repositories.
#[derive(Default, Clone)]
pub struct GitResolver(Arc<DashMap<RepositoryReference, GitOid>>);
impl GitResolver {
/// Inserts a new [`GitOid`] for the given [`RepositoryReference`].
pub fn insert(&self, reference: RepositoryReference, sha: GitOid) {
self.0.insert(reference, sha);
}
/// Returns the [`GitOid`] for the given [`RepositoryReference`], if it exists.
fn get(&self, reference: &RepositoryReference) -> Option<Ref<'_, RepositoryReference, GitOid>> {
self.0.get(reference)
}
/// Return the [`GitOid`] for the given [`GitUrl`], if it is already known.
pub fn get_precise(&self, url: &GitUrl) -> Option<GitOid> {
// If the URL is already precise, return it.
if let Some(precise) = url.precise() {
return Some(precise);
}
// If we know the precise commit already, return it.
let reference = RepositoryReference::from(url);
if let Some(precise) = self.get(&reference) {
return Some(*precise);
}
None
}
/// Resolve a Git URL to a specific commit without performing any Git operations.
///
/// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache),
/// or if it can be fetched via the GitHub API. Otherwise, returns `None`.
pub async fn github_fast_path(
&self,
url: &GitUrl,
client: &ClientWithMiddleware,
) -> Result<Option<GitOid>, GitResolverError> {
if std::env::var_os(EnvVars::UV_NO_GITHUB_FAST_PATH).is_some() {
return Ok(None);
}
// If the URL is already precise or we know the precise commit, return it.
if let Some(precise) = self.get_precise(url) {
return Ok(Some(precise));
}
// If the URL is a GitHub URL, attempt to resolve it via the GitHub API.
let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url.repository())
else {
return Ok(None);
};
// Check if we're rate-limited by GitHub, before determining the Git reference
if GITHUB_RATE_LIMIT_STATUS.is_active() {
debug!("Rate-limited by GitHub. Skipping GitHub fast path attempt for: {url}");
return Ok(None);
}
// Determine the Git reference.
let rev = url.reference().as_rev();
let github_api_base_url = std::env::var(EnvVars::UV_GITHUB_FAST_PATH_URL)
.unwrap_or("https://api.github.com/repos".to_owned());
let github_api_url = format!("{github_api_base_url}/{owner}/{repo}/commits/{rev}");
debug!("Querying GitHub for commit at: {github_api_url}");
let mut request = client.get(&github_api_url);
request = request.header("Accept", "application/vnd.github.3.sha");
request = request.header(
"User-Agent",
format!("uv/{} (+https://github.com/astral-sh/uv)", version()),
);
let response = request.send().await?;
let status = response.status();
if !status.is_success() {
// Returns a 404 if the repository does not exist, and a 422 if GitHub is unable to
// resolve the requested rev.
debug!(
"GitHub API request failed for: {github_api_url} ({})",
response.status()
);
if is_github_rate_limited(&response) {
// Mark that we are being rate-limited by GitHub
GITHUB_RATE_LIMIT_STATUS.activate();
}
return Ok(None);
}
// Parse the response as a Git SHA.
let precise = response.text().await?;
let precise =
GitOid::from_str(&precise).map_err(|err| GitResolverError::Git(err.into()))?;
// Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
// resolve to the same precise commit.
self.insert(RepositoryReference::from(url), precise);
Ok(Some(precise))
}
/// Fetch a remote Git repository.
pub async fn fetch(
&self,
url: &GitUrl,
disable_ssl: bool,
offline: bool,
cache: PathBuf,
reporter: Option<Arc<dyn Reporter>>,
) -> Result<Fetch, GitResolverError> {
debug!("Fetching source distribution from Git: {url}");
let reference = RepositoryReference::from(url);
// If we know the precise commit already, reuse it, to ensure that all fetches within a
// single process are consistent.
let url = {
if let Some(precise) = self.get(&reference) {
Cow::Owned(url.clone().with_precise(*precise))
} else {
Cow::Borrowed(url)
}
};
// Avoid races between different processes, too.
let lock_dir = cache.join("locks");
fs::create_dir_all(&lock_dir).await?;
let repository_url = RepositoryUrl::new(url.repository());
let _lock = LockedFile::acquire(
lock_dir.join(cache_digest(&repository_url)),
&repository_url,
)
.await?;
// Fetch the Git repository.
let source = if let Some(reporter) = reporter {
GitSource::new(url.as_ref().clone(), cache, offline).with_reporter(reporter)
} else {
GitSource::new(url.as_ref().clone(), cache, offline)
};
// If necessary, disable SSL.
let source = if disable_ssl {
source.dangerous()
} else {
source
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(GitResolverError::Git)?;
// Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
// resolve to the same precise commit.
if let Some(precise) = fetch.git().precise() {
self.insert(reference, precise);
}
Ok(fetch)
}
/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent of the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
///
/// This method will only return precise URLs for URLs that have already been resolved via
/// [`resolve_precise`], and will return `None` for URLs that have not been resolved _or_
/// already have a precise reference.
pub fn precise(&self, url: GitUrl) -> Option<GitUrl> {
let reference = RepositoryReference::from(&url);
let precise = self.get(&reference)?;
Some(url.with_precise(*precise))
}
/// Returns `true` if the two Git URLs refer to the same precise commit.
pub fn same_ref(&self, a: &GitUrl, b: &GitUrl) -> bool {
// Convert `a` to a repository URL.
let a_ref = RepositoryReference::from(a);
// Convert `b` to a repository URL.
let b_ref = RepositoryReference::from(b);
// The URLs must refer to the same repository.
if a_ref.url != b_ref.url {
return false;
}
// If the URLs have the same tag, they refer to the same commit.
if a_ref.reference == b_ref.reference {
return true;
}
// Otherwise, the URLs must resolve to the same precise commit.
let Some(a_precise) = a.precise().or_else(|| self.get(&a_ref).map(|sha| *sha)) else {
return false;
};
let Some(b_precise) = b.precise().or_else(|| self.get(&b_ref).map(|sha| *sha)) else {
return false;
};
a_precise == b_precise
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ResolvedRepositoryReference {
/// An abstract reference to a Git repository, including the URL and the commit (e.g., a branch,
/// tag, or revision).
pub reference: RepositoryReference,
/// The precise commit SHA of the reference.
pub sha: GitOid,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RepositoryReference {
/// The URL of the Git repository, with any query parameters and fragments removed.
pub url: RepositoryUrl,
/// The reference to the commit to use, which could be a branch, tag, or revision.
pub reference: GitReference,
}
impl From<&GitUrl> for RepositoryReference {
fn from(git: &GitUrl) -> Self {
Self {
url: RepositoryUrl::new(git.repository()),
reference: git.reference().clone(),
}
}
}