[go: up one dir, main page]

uv-git 0.0.1

This is a component crate of uv
Documentation
//! Git support is derived from Cargo's implementation.
//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/sources/git/source.rs>

use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use anyhow::Result;
use tracing::{debug, instrument};

use uv_cache_key::{RepositoryUrl, cache_digest};
use uv_git_types::{GitOid, GitReference, GitUrl};
use uv_redacted::DisplaySafeUrl;

use crate::GIT_STORE;
use crate::git::{GitDatabase, GitRemote};

/// A remote Git source that can be checked out locally.
pub struct GitSource {
    /// The Git reference from the manifest file.
    git: GitUrl,
    /// Whether to disable SSL verification.
    disable_ssl: bool,
    /// Whether to operate without network connectivity.
    offline: bool,
    /// The path to the Git source database.
    cache: PathBuf,
    /// The reporter to use for this source.
    reporter: Option<Arc<dyn Reporter>>,
}

impl GitSource {
    /// Initialize a [`GitSource`] with the given Git URL, HTTP client, and cache path.
    pub fn new(git: GitUrl, cache: impl Into<PathBuf>, offline: bool) -> Self {
        Self {
            git,
            disable_ssl: false,
            offline,
            cache: cache.into(),
            reporter: None,
        }
    }

    /// Disable SSL verification for this [`GitSource`].
    #[must_use]
    pub fn dangerous(self) -> Self {
        Self {
            disable_ssl: true,
            ..self
        }
    }

    /// Set the [`Reporter`] to use for the [`GitSource`].
    #[must_use]
    pub fn with_reporter(self, reporter: Arc<dyn Reporter>) -> Self {
        Self {
            reporter: Some(reporter),
            ..self
        }
    }

    /// Fetch the underlying Git repository at the given revision.
    #[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))]
    pub fn fetch(self) -> Result<Fetch> {
        // Compute the canonical URL for the repository.
        let canonical = RepositoryUrl::new(self.git.repository());

        // The path to the repo, within the Git database.
        let ident = cache_digest(&canonical);
        let db_path = self.cache.join("db").join(&ident);

        // Authenticate the URL, if necessary.
        let remote = if let Some(credentials) = GIT_STORE.get(&canonical) {
            Cow::Owned(credentials.apply(self.git.repository().clone()))
        } else {
            Cow::Borrowed(self.git.repository())
        };

        // Fetch the commit, if we don't already have it. Wrapping this section in a closure makes
        // it easier to short-circuit this in the cases where we do have the commit.
        let (db, actual_rev, maybe_task) = || -> Result<(GitDatabase, GitOid, Option<usize>)> {
            let git_remote = GitRemote::new(&remote);
            let maybe_db = git_remote.db_at(&db_path).ok();

            // If we have a locked revision, and we have a pre-existing database which has that
            // revision, then no update needs to happen.
            if let (Some(rev), Some(db)) = (self.git.precise(), &maybe_db) {
                if db.contains(rev) {
                    debug!("Using existing Git source `{}`", self.git.repository());
                    return Ok((maybe_db.unwrap(), rev, None));
                }
            }

            // If the revision isn't locked, but it looks like it might be an exact commit hash,
            // and we do have a pre-existing database, then check whether it is, in fact, a commit
            // hash. If so, treat it like it's locked.
            if let Some(db) = &maybe_db {
                if let GitReference::BranchOrTagOrCommit(maybe_commit) = self.git.reference() {
                    if let Ok(oid) = maybe_commit.parse::<GitOid>() {
                        if db.contains(oid) {
                            // This reference is an exact commit. Treat it like it's
                            // locked.
                            debug!("Using existing Git source `{}`", self.git.repository());
                            return Ok((maybe_db.unwrap(), oid, None));
                        }
                    }
                }
            }

            // ... otherwise, we use this state to update the Git database. Note that we still check
            // for being offline here, for example in the situation that we have a locked revision
            // but the database doesn't have it.
            debug!("Updating Git source `{}`", self.git.repository());

            // Report the checkout operation to the reporter.
            let task = self.reporter.as_ref().map(|reporter| {
                reporter.on_checkout_start(git_remote.url(), self.git.reference().as_rev())
            });

            let (db, actual_rev) = git_remote.checkout(
                &db_path,
                maybe_db,
                self.git.reference(),
                self.git.precise(),
                self.disable_ssl,
                self.offline,
            )?;

            Ok((db, actual_rev, task))
        }()?;

        // Don’t use the full hash, in order to contribute less to reaching the
        // path length limit on Windows.
        let short_id = db.to_short_id(actual_rev)?;

        // Check out `actual_rev` from the database to a scoped location on the
        // filesystem. This will use hard links and such to ideally make the
        // checkout operation here pretty fast.
        let checkout_path = self
            .cache
            .join("checkouts")
            .join(&ident)
            .join(short_id.as_str());

        db.copy_to(actual_rev, &checkout_path)?;

        // Report the checkout operation to the reporter.
        if let Some(task) = maybe_task {
            if let Some(reporter) = self.reporter.as_ref() {
                reporter.on_checkout_complete(remote.as_ref(), actual_rev.as_str(), task);
            }
        }

        Ok(Fetch {
            git: self.git.with_precise(actual_rev),
            path: checkout_path,
        })
    }
}

pub struct Fetch {
    /// The [`GitUrl`] reference that was fetched.
    git: GitUrl,
    /// The path to the checked out repository.
    path: PathBuf,
}

impl Fetch {
    pub fn git(&self) -> &GitUrl {
        &self.git
    }

    pub fn path(&self) -> &Path {
        &self.path
    }

    pub fn into_git(self) -> GitUrl {
        self.git
    }

    pub fn into_path(self) -> PathBuf {
        self.path
    }
}

pub trait Reporter: Send + Sync {
    /// Callback to invoke when a repository checkout begins.
    fn on_checkout_start(&self, url: &DisplaySafeUrl, rev: &str) -> usize;

    /// Callback to invoke when a repository checkout completes.
    fn on_checkout_complete(&self, url: &DisplaySafeUrl, rev: &str, index: usize);
}