From 33b777074db21db8cd060ecf8cfdac0409a7e10c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 11 May 2023 18:43:41 +0200 Subject: [PATCH 1/9] support reading the fetch algorithm from configuration --- gix/src/config/tree/mod.rs | 9 ++- gix/src/config/tree/sections/fetch.rs | 67 +++++++++++++++++++ gix/src/config/tree/sections/mod.rs | 5 ++ gix/src/remote/connection/fetch/error.rs | 2 + gix/src/remote/connection/fetch/mod.rs | 3 +- gix/src/remote/connection/fetch/negotiate.rs | 15 ++--- .../remote/connection/fetch/receive_pack.rs | 14 +++- gix/src/remote/fetch.rs | 31 +++++++-- gix/tests/config/tree.rs | 30 +++++++++ src/plumbing/progress.rs | 6 -- 10 files changed, 156 insertions(+), 26 deletions(-) create mode 100644 gix/src/config/tree/sections/fetch.rs diff --git a/gix/src/config/tree/mod.rs b/gix/src/config/tree/mod.rs index b378b8c49ca..065d77aaa23 100644 --- a/gix/src/config/tree/mod.rs +++ b/gix/src/config/tree/mod.rs @@ -34,6 +34,8 @@ pub(crate) mod root { pub const DIFF: sections::Diff = sections::Diff; /// The `extensions` section. pub const EXTENSIONS: sections::Extensions = sections::Extensions; + /// The `fetch` section. + pub const FETCH: sections::Fetch = sections::Fetch; /// The `gitoxide` section. pub const GITOXIDE: sections::Gitoxide = sections::Gitoxide; /// The `http` section. @@ -69,6 +71,7 @@ pub(crate) mod root { &Self::CREDENTIAL, &Self::DIFF, &Self::EXTENSIONS, + &Self::FETCH, &Self::GITOXIDE, &Self::HTTP, &Self::INDEX, @@ -87,9 +90,9 @@ pub(crate) mod root { mod sections; pub use sections::{ - branch, checkout, core, credential, diff, extensions, gitoxide, http, index, protocol, remote, ssh, Author, Branch, - Checkout, Clone, Committer, Core, Credential, Diff, Extensions, Gitoxide, Http, Index, Init, Pack, Protocol, - Remote, Safe, Ssh, Url, User, + branch, checkout, core, credential, diff, extensions, fetch, gitoxide, http, index, protocol, remote, ssh, Author, + Branch, Checkout, Clone, Committer, Core, Credential, Diff, Extensions, Fetch, Gitoxide, Http, Index, Init, Pack, + Protocol, Remote, Safe, Ssh, Url, User, }; /// Generic value implementations for static instantiation. diff --git a/gix/src/config/tree/sections/fetch.rs b/gix/src/config/tree/sections/fetch.rs new file mode 100644 index 00000000000..e9996da898c --- /dev/null +++ b/gix/src/config/tree/sections/fetch.rs @@ -0,0 +1,67 @@ +use crate::{ + config, + config::tree::{keys, Fetch, Key, Section}, +}; + +impl Fetch { + /// The `fetch.negotiationAlgorithm` key. + pub const NEGOTIATION_ALGORITHM: NegotiationAlgorithm = NegotiationAlgorithm::new_with_validate( + "negotiationAlgorithm", + &config::Tree::FETCH, + validate::NegotiationAlgorithm, + ); +} + +impl Section for Fetch { + fn name(&self) -> &str { + "fetch" + } + + fn keys(&self) -> &[&dyn Key] { + &[&Self::NEGOTIATION_ALGORITHM] + } +} + +/// The `fetch.negotiationAlgorithm` key. +pub type NegotiationAlgorithm = keys::Any; + +mod algorithm { + use gix_object::bstr::ByteSlice; + use std::borrow::Cow; + + use crate::remote::fetch::negotiate; + use crate::{ + bstr::BStr, + config::{key::GenericErrorWithValue, tree::sections::fetch::NegotiationAlgorithm}, + }; + + impl NegotiationAlgorithm { + /// Derive the negotiation algorithm identified by `name`, case-sensitively. + pub fn try_into_negotiation_algorithm( + &'static self, + name: Cow<'_, BStr>, + ) -> Result { + Ok(match name.as_ref().as_bytes() { + b"noop" => negotiate::Algorithm::Noop, + b"consecutive" | b"default" => negotiate::Algorithm::Consecutive, + b"skipping" => negotiate::Algorithm::Skipping, + _ => return Err(GenericErrorWithValue::from_value(self, name.into_owned())), + }) + } + } +} + +mod validate { + use crate::{ + bstr::BStr, + config::tree::{keys, Fetch}, + }; + + pub struct NegotiationAlgorithm; + impl keys::Validate for NegotiationAlgorithm { + fn validate(&self, value: &BStr) -> Result<(), Box> { + Fetch::NEGOTIATION_ALGORITHM.try_into_negotiation_algorithm(value.into())?; + Ok(()) + } + } +} diff --git a/gix/src/config/tree/sections/mod.rs b/gix/src/config/tree/sections/mod.rs index 9f0a50c93f3..ebf24a8b7f0 100644 --- a/gix/src/config/tree/sections/mod.rs +++ b/gix/src/config/tree/sections/mod.rs @@ -45,6 +45,11 @@ pub mod diff; pub struct Extensions; pub mod extensions; +/// The `fetch` top-level section. +#[derive(Copy, Clone, Default)] +pub struct Fetch; +pub mod fetch; + /// The `gitoxide` top-level section. #[derive(Copy, Clone, Default)] pub struct Gitoxide; diff --git a/gix/src/remote/connection/fetch/error.rs b/gix/src/remote/connection/fetch/error.rs index afcacca1379..5034dcb5d4e 100644 --- a/gix/src/remote/connection/fetch/error.rs +++ b/gix/src/remote/connection/fetch/error.rs @@ -43,6 +43,8 @@ pub enum Error { RejectShallowRemoteConfig(#[from] config::boolean::Error), #[error("Receiving objects from shallow remotes is prohibited due to the value of `clone.rejectShallow`")] RejectShallowRemote, + #[error(transparent)] + NegotiationAlgorithmConfig(#[from] config::key::GenericErrorWithValue), } impl gix_protocol::transport::IsSpuriousError for Error { diff --git a/gix/src/remote/connection/fetch/mod.rs b/gix/src/remote/connection/fetch/mod.rs index a51ae7c5448..70b589cd963 100644 --- a/gix/src/remote/connection/fetch/mod.rs +++ b/gix/src/remote/connection/fetch/mod.rs @@ -91,8 +91,7 @@ impl From for gix_features::progress::Id { } } -/// -pub mod negotiate; +pub(crate) mod negotiate; /// pub mod prepare { diff --git a/gix/src/remote/connection/fetch/negotiate.rs b/gix/src/remote/connection/fetch/negotiate.rs index 771c5acba2b..985d7bd259d 100644 --- a/gix/src/remote/connection/fetch/negotiate.rs +++ b/gix/src/remote/connection/fetch/negotiate.rs @@ -1,11 +1,5 @@ use crate::remote::fetch; - -/// The way the negotiation is performed -#[derive(Copy, Clone)] -pub(crate) enum Algorithm { - /// Our very own implementation that probably should be replaced by one of the known algorithms soon. - Naive, -} +use crate::remote::fetch::negotiate::Algorithm; /// The error returned during negotiation. #[derive(Debug, thiserror::Error)] @@ -23,8 +17,8 @@ pub(crate) fn one_round( algo: Algorithm, round: usize, repo: &crate::Repository, - ref_map: &crate::remote::fetch::RefMap, - fetch_tags: crate::remote::fetch::Tags, + ref_map: &fetch::RefMap, + fetch_tags: fetch::Tags, arguments: &mut gix_protocol::fetch::Arguments, _previous_response: Option<&gix_protocol::fetch::Response>, shallow: Option<&fetch::Shallow>, @@ -39,6 +33,9 @@ pub(crate) fn one_round( } match algo { + Algorithm::Noop | Algorithm::Skipping | Algorithm::Consecutive => { + todo!() + } Algorithm::Naive => { assert_eq!(round, 1, "Naive always finishes after the first round, it claims."); let mut has_missing_tracking_branch = false; diff --git a/gix/src/remote/connection/fetch/receive_pack.rs b/gix/src/remote/connection/fetch/receive_pack.rs index 99560fbca6c..c7dffd892bc 100644 --- a/gix/src/remote/connection/fetch/receive_pack.rs +++ b/gix/src/remote/connection/fetch/receive_pack.rs @@ -6,6 +6,9 @@ use gix_protocol::{ transport::{client::Transport, packetline::read::ProgressAction}, }; +use crate::config::cache::util::ApplyLeniency; +use crate::config::tree::{Fetch, Key}; +use crate::remote::fetch::negotiate::Algorithm; use crate::{ config::tree::Clone, remote, @@ -109,12 +112,21 @@ where }); } + let algorithm = repo + .config + .resolved + .string_by_key(Fetch::NEGOTIATION_ALGORITHM.logical_name().as_str()) + .map(|n| Fetch::NEGOTIATION_ALGORITHM.try_into_negotiation_algorithm(n)) + .transpose() + .with_leniency(repo.config.lenient_config)? + .unwrap_or(Algorithm::Naive); // TODO: use the default instead once consecutive is implemented + let reader = 'negotiation: loop { progress.step(); progress.set_name(format!("negotiate (round {round})")); let is_done = match negotiate::one_round( - negotiate::Algorithm::Naive, + algorithm, round, repo, &self.ref_map, diff --git a/gix/src/remote/fetch.rs b/gix/src/remote/fetch.rs index 8196f20d1f0..c6190afc382 100644 --- a/gix/src/remote/fetch.rs +++ b/gix/src/remote/fetch.rs @@ -1,3 +1,29 @@ +/// +pub mod negotiate { + /// The way the negotiation is performed. + #[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] + pub enum Algorithm { + /// Do not send any information at all, likely at cost of larger-than-necessary packs. + Noop, + /// Walk over consecutive commits and check each one. This can be costly be assures packs are exactly the size they need to be. + #[default] + Consecutive, + /// Like `Consecutive`, but skips commits to converge faster, at the cost of receiving packs that are larger than they have to be. + Skipping, + /// Our very own implementation that probably should be replaced by one of the known algorithms soon. + Naive, + } + + #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] + pub use super::super::connection::fetch::negotiate::Error; + + #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] + pub(crate) use super::super::connection::fetch::negotiate::one_round; +} + +#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] +pub use super::connection::fetch::{prepare, refs, Error, Outcome, Prepare, ProgressId, RefLogMessage, Status}; + /// If `Yes`, don't really make changes but do as much as possible to get an idea of what would be done. #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] @@ -197,8 +223,3 @@ pub struct Mapping { /// The index into the fetch ref-specs used to produce the mapping, allowing it to be recovered. pub spec_index: SpecIndex, } - -#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))] -pub use super::connection::fetch::{ - negotiate, prepare, refs, Error, Outcome, Prepare, ProgressId, RefLogMessage, Status, -}; diff --git a/gix/tests/config/tree.rs b/gix/tests/config/tree.rs index 3992b648f5e..a822b7cd8af 100644 --- a/gix/tests/config/tree.rs +++ b/gix/tests/config/tree.rs @@ -117,6 +117,36 @@ mod ssh { } } +mod fetch { + use crate::config::tree::bcow; + use gix::config::tree::{Fetch, Key}; + use gix::remote::fetch::negotiate::Algorithm; + + #[test] + fn algorithm() -> crate::Result { + for (actual, expected) in [ + ("noop", Algorithm::Noop), + ("consecutive", Algorithm::Consecutive), + ("skipping", Algorithm::Skipping), + ("default", Algorithm::Consecutive), // actually, default can be Skipping of `feature.experimental` is true, but we don't deal with that yet until we implement `skipping` + ] { + assert_eq!( + Fetch::NEGOTIATION_ALGORITHM.try_into_negotiation_algorithm(bcow(actual))?, + expected + ); + assert!(Fetch::NEGOTIATION_ALGORITHM.validate(actual.into()).is_ok()); + } + assert_eq!( + Fetch::NEGOTIATION_ALGORITHM + .try_into_negotiation_algorithm(bcow("foo")) + .unwrap_err() + .to_string(), + "The key \"fetch.negotiationAlgorithm=foo\" was invalid" + ); + Ok(()) + } +} + mod diff { use gix::{ config::tree::{Diff, Key}, diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index 005a9b06bec..71ee94028d1 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -294,12 +294,6 @@ static GIT_CONFIG: &[Record] = &[ config: "fetch.output", usage: NotPlanned {reason: "'gix' might support it, but there is no intention on copying the 'git' CLI"}, }, - Record { - config: "fetch.negotiationAlgorithm", - usage: Planned { - note: Some("Implements our own 'naive' algorithm, only"), - }, - }, Record { config: "remotes.", usage: Planned { From f4245f4bb0921610456dde2c56068e7c5e4f1d27 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 May 2023 10:29:15 +0200 Subject: [PATCH 2/9] refactor - name tests correctly --- gix/Cargo.toml | 8 ++++---- gix/tests/{git-with-regex.rs => gix-with-regex.rs} | 0 gix/tests/{git.rs => gix.rs} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename gix/tests/{git-with-regex.rs => gix-with-regex.rs} (100%) rename gix/tests/{git.rs => gix.rs} (100%) diff --git a/gix/Cargo.toml b/gix/Cargo.toml index e7f6a062ef1..899f6d9aebb 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -14,13 +14,13 @@ doctest = false test = true [[test]] -name = "git" -path = "tests/git.rs" +name = "gix" +path = "tests/gix.rs" required-features = [] [[test]] -name = "git-with-regex" -path = "tests/git-with-regex.rs" +name = "gix-with-regex" +path = "tests/gix-with-regex.rs" required-features = ["regex"] [[example]] diff --git a/gix/tests/git-with-regex.rs b/gix/tests/gix-with-regex.rs similarity index 100% rename from gix/tests/git-with-regex.rs rename to gix/tests/gix-with-regex.rs diff --git a/gix/tests/git.rs b/gix/tests/gix.rs similarity index 100% rename from gix/tests/git.rs rename to gix/tests/gix.rs From 967f3b954e9fb4fc7757f8920998173caf0491ab Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 May 2023 15:43:14 +0200 Subject: [PATCH 3/9] refactor!: make API more consistent with other `gix-*` crates. For that, we remove duplicate import paths for types. We also improve lifetimes around parent iteration, and make the type explicit. --- gix-commitgraph/src/{graph => }/access.rs | 17 ++++----- gix-commitgraph/src/file/access.rs | 5 ++- gix-commitgraph/src/file/commit.rs | 31 ++++++++-------- gix-commitgraph/src/file/init.rs | 9 +++-- gix-commitgraph/src/file/mod.rs | 37 ++++--------------- gix-commitgraph/src/file/verify.rs | 5 +-- gix-commitgraph/src/graph/mod.rs | 27 -------------- gix-commitgraph/src/{graph => }/init.rs | 7 +--- gix-commitgraph/src/lib.rs | 45 +++++++++++++++++++++-- gix-commitgraph/src/{graph => }/verify.rs | 12 +++--- gix-commitgraph/tests/commitgraph.rs | 5 +-- 11 files changed, 92 insertions(+), 108 deletions(-) rename gix-commitgraph/src/{graph => }/access.rs (86%) delete mode 100644 gix-commitgraph/src/graph/mod.rs rename gix-commitgraph/src/{graph => }/init.rs (97%) rename gix-commitgraph/src/{graph => }/verify.rs (95%) diff --git a/gix-commitgraph/src/graph/access.rs b/gix-commitgraph/src/access.rs similarity index 86% rename from gix-commitgraph/src/graph/access.rs rename to gix-commitgraph/src/access.rs index 5aa71eb72cd..ce3688f780e 100644 --- a/gix-commitgraph/src/graph/access.rs +++ b/gix-commitgraph/src/access.rs @@ -1,7 +1,4 @@ -use crate::{ - file::{self, Commit, File}, - graph::{self, Graph}, -}; +use crate::{file, file::Commit, File, Graph, Position}; /// Access impl Graph { @@ -9,7 +6,7 @@ impl Graph { /// /// # Panics /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()]. - pub fn commit_at(&self, pos: graph::Position) -> Commit<'_> { + pub fn commit_at(&self, pos: Position) -> Commit<'_> { let r = self.lookup_by_pos(pos); r.file.commit_at(r.pos) } @@ -24,7 +21,7 @@ impl Graph { /// /// # Panics /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()]. - pub fn id_at(&self, pos: graph::Position) -> &gix_hash::oid { + pub fn id_at(&self, pos: Position) -> &gix_hash::oid { let r = self.lookup_by_pos(pos); r.file.id_at(r.pos) } @@ -40,7 +37,7 @@ impl Graph { } /// Translate the given `id` to its position in the file. - pub fn lookup(&self, id: impl AsRef) -> Option { + pub fn lookup(&self, id: impl AsRef) -> Option { Some(self.lookup_by_id(id.as_ref())?.graph_pos) } @@ -59,7 +56,7 @@ impl Graph { return Some(LookupByIdResult { file, file_pos: lex_pos, - graph_pos: graph::Position(current_file_start + lex_pos.0), + graph_pos: Position(current_file_start + lex_pos.0), }); } current_file_start += file.num_commits(); @@ -67,7 +64,7 @@ impl Graph { None } - fn lookup_by_pos(&self, pos: graph::Position) -> LookupByPositionResult<'_> { + fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> { let mut remaining = pos.0; for (file_index, file) in self.files.iter().enumerate() { match remaining.checked_sub(file.num_commits()) { @@ -88,7 +85,7 @@ impl Graph { #[derive(Clone)] struct LookupByIdResult<'a> { pub file: &'a File, - pub graph_pos: graph::Position, + pub graph_pos: Position, pub file_pos: file::Position, } diff --git a/gix-commitgraph/src/file/access.rs b/gix-commitgraph/src/file/access.rs index a6602d5485e..44fb6db9891 100644 --- a/gix-commitgraph/src/file/access.rs +++ b/gix-commitgraph/src/file/access.rs @@ -4,7 +4,10 @@ use std::{ path::Path, }; -use crate::file::{self, commit::Commit, File, COMMIT_DATA_ENTRY_SIZE_SANS_HASH}; +use crate::{ + file::{self, commit::Commit, COMMIT_DATA_ENTRY_SIZE_SANS_HASH}, + File, +}; /// Access impl File { diff --git a/gix-commitgraph/src/file/commit.rs b/gix-commitgraph/src/file/commit.rs index 7c237563287..7f4219b6e5c 100644 --- a/gix-commitgraph/src/file/commit.rs +++ b/gix-commitgraph/src/file/commit.rs @@ -6,8 +6,8 @@ use std::{ }; use crate::{ - file::{self, File, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT}, - graph, + file::{self, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT}, + File, Position, }; /// The error used in the [`file::commit`][self] module. @@ -25,6 +25,7 @@ pub enum Error { } /// A commit as stored in a [`File`]. +#[derive(Copy, Clone)] pub struct Commit<'a> { file: &'a File, pos: file::Position, @@ -72,11 +73,11 @@ impl<'a> Commit<'a> { } /// Returns an iterator over the parent positions for lookup in the owning [Graph][crate::Graph]. - pub fn iter_parents(&'a self) -> impl Iterator> + 'a { + pub fn iter_parents(self) -> Parents<'a> { // I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported // fuse-after-first-error behavior, and b) was significantly shorter or more understandable // than ParentIterator. So here we are. - ParentIterator { + Parents { commit_data: self, state: ParentIteratorState::First, } @@ -88,7 +89,7 @@ impl<'a> Commit<'a> { } /// Returns the first parent of this commit. - pub fn parent1(&self) -> Result, Error> { + pub fn parent1(&self) -> Result, Error> { self.iter_parents().next().transpose() } @@ -127,13 +128,13 @@ impl<'a> PartialEq for Commit<'a> { } /// An iterator over parents of a [`Commit`]. -pub struct ParentIterator<'a> { - commit_data: &'a Commit<'a>, +pub struct Parents<'a> { + commit_data: Commit<'a>, state: ParentIteratorState<'a>, } -impl<'a> Iterator for ParentIterator<'a> { - type Item = Result; +impl<'a> Iterator for Parents<'a> { + type Item = Result; fn next(&mut self) -> Option { let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted); @@ -221,7 +222,7 @@ enum ParentIteratorState<'a> { #[derive(Clone, Copy, Debug)] enum ParentEdge { None, - GraphPosition(graph::Position), + GraphPosition(Position), ExtraEdgeIndex(u32), } @@ -233,22 +234,22 @@ impl ParentEdge { if raw & EXTENDED_EDGES_MASK != 0 { ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK) } else { - ParentEdge::GraphPosition(graph::Position(raw)) + ParentEdge::GraphPosition(Position(raw)) } } } enum ExtraEdge { - Internal(graph::Position), - Last(graph::Position), + Internal(Position), + Last(Position), } impl ExtraEdge { pub fn from_raw(raw: u32) -> Self { if raw & LAST_EXTENDED_EDGE_MASK != 0 { - Self::Last(graph::Position(raw & !LAST_EXTENDED_EDGE_MASK)) + Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK)) } else { - Self::Internal(graph::Position(raw)) + Self::Internal(Position(raw)) } } } diff --git a/gix-commitgraph/src/file/init.rs b/gix-commitgraph/src/file/init.rs index e55894168a3..1d8f52e3f7b 100644 --- a/gix-commitgraph/src/file/init.rs +++ b/gix-commitgraph/src/file/init.rs @@ -6,9 +6,12 @@ use std::{ use bstr::ByteSlice; use memmap2::Mmap; -use crate::file::{ - ChunkId, File, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH, - EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE, +use crate::{ + file::{ + ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH, + EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE, + }, + File, }; /// The error used in [`File::at()`]. diff --git a/gix-commitgraph/src/file/mod.rs b/gix-commitgraph/src/file/mod.rs index f9b9821a859..528cd69fb95 100644 --- a/gix-commitgraph/src/file/mod.rs +++ b/gix-commitgraph/src/file/mod.rs @@ -1,12 +1,6 @@ //! Operations on a single commit-graph file. -use std::{ - fmt::{Display, Formatter}, - ops::Range, - path::PathBuf, -}; - -use memmap2::Mmap; +use std::fmt::{Display, Formatter}; pub use self::{commit::Commit, init::Error}; @@ -16,7 +10,7 @@ mod init; pub mod verify; const COMMIT_DATA_ENTRY_SIZE_SANS_HASH: usize = 16; -const FAN_LEN: usize = 256; +pub(crate) const FAN_LEN: usize = 256; const HEADER_LEN: usize = 8; const SIGNATURE: &[u8] = b"CGPH"; @@ -34,31 +28,14 @@ const NO_PARENT: u32 = 0x7000_0000; const EXTENDED_EDGES_MASK: u32 = 0x8000_0000; const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000; -/// A single commit-graph file. -/// -/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple -/// files, all interesting graph operations belong on [`Graph`][crate::Graph]. -pub struct File { - base_graph_count: u8, - base_graphs_list_offset: Option, - commit_data_offset: usize, - data: Mmap, - extra_edges_list_range: Option>, - fan: [u32; FAN_LEN], - oid_lookup_offset: usize, - path: PathBuf, - hash_len: usize, - object_hash: gix_hash::Kind, -} - /// The position of a given commit within a graph file, starting at 0. /// -/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexigraphical position +/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexicographical position /// is its position in this ordering. If a commit graph spans multiple files, each file's commits -/// start at lexigraphical position 0, so it is unique across a single file but is not unique across -/// the whole commit graph. Each commit also has a graph position ([`graph::Position`][crate::graph::Position]), -/// which is unique /// across the whole commit graph. In order to avoid accidentally mixing lexigraphical positions with graph -/// positions, distinct types are used for each. +/// start at lexicographical position 0, so it is unique across a single file but is not unique across +/// the whole commit graph. Each commit also has a graph position ([`Position`][crate::Position]), +/// which is unique across the whole commit graph. +/// In order to avoid accidentally mixing lexicographical positions with graph positions, distinct types are used for each. #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Position(pub u32); diff --git a/gix-commitgraph/src/file/verify.rs b/gix-commitgraph/src/file/verify.rs index 9a7e251a6f4..4f4f7682973 100644 --- a/gix-commitgraph/src/file/verify.rs +++ b/gix-commitgraph/src/file/verify.rs @@ -5,10 +5,7 @@ use std::{ path::Path, }; -use crate::{ - file::{self, File}, - GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX, -}; +use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX}; /// The error used in [`File::traverse()`]. #[derive(thiserror::Error, Debug)] diff --git a/gix-commitgraph/src/graph/mod.rs b/gix-commitgraph/src/graph/mod.rs deleted file mode 100644 index bf03214c349..00000000000 --- a/gix-commitgraph/src/graph/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Operations on a complete commit graph. -mod access; -mod init; -pub mod verify; - -use std::fmt; - -use crate::file::File; - -/// A complete commit graph. -/// -/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it -/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are -/// generated via `git commit-graph write ...` commands. -pub struct Graph { - files: Vec, -} - -/// A generalized position for use in [`Graph`]. -#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] -pub struct Position(pub u32); - -impl fmt::Display for Position { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} diff --git a/gix-commitgraph/src/graph/init.rs b/gix-commitgraph/src/init.rs similarity index 97% rename from gix-commitgraph/src/graph/init.rs rename to gix-commitgraph/src/init.rs index a0e4166b010..f964aade029 100644 --- a/gix-commitgraph/src/graph/init.rs +++ b/gix-commitgraph/src/init.rs @@ -4,12 +4,9 @@ use std::{ path::{Path, PathBuf}, }; -use crate::{ - file::{self, File}, - Graph, MAX_COMMITS, -}; +use crate::{file, File, Graph, MAX_COMMITS}; -/// The error used in the [`graph`][crate::graph] module. +/// The error returned by initializations functions like [`Graph::at()`]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { diff --git a/gix-commitgraph/src/lib.rs b/gix-commitgraph/src/lib.rs index 8e9db3bda26..2d0cfbb6281 100644 --- a/gix-commitgraph/src/lib.rs +++ b/gix-commitgraph/src/lib.rs @@ -5,7 +5,7 @@ //! traversing the git history by usual means. //! //! As generating the full commit graph from scratch can take some time, git may write new commits -//! to separate [files][file::File] instead of overwriting the original file. +//! to separate [files][File] instead of overwriting the original file. //! Eventually, git will merge these files together as the number of files grows. //! ## Feature Flags #![cfg_attr( @@ -15,10 +15,37 @@ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] -pub mod file; -pub mod graph; +/// A single commit-graph file. +/// +/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple +/// files, all interesting graph operations belong on [`Graph`][crate::Graph]. +pub struct File { + base_graph_count: u8, + base_graphs_list_offset: Option, + commit_data_offset: usize, + data: memmap2::Mmap, + extra_edges_list_range: Option>, + fan: [u32; file::FAN_LEN], + oid_lookup_offset: usize, + path: std::path::PathBuf, + hash_len: usize, + object_hash: gix_hash::Kind, +} + +/// A complete commit graph. +/// +/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it +/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are +/// generated via `git commit-graph write ...` commands. +pub struct Graph { + files: Vec, +} -pub use graph::Graph; +mod access; +pub mod file; +/// +pub mod init; +pub mod verify; /// The number of generations that are considered 'infinite' commit history. pub const GENERATION_NUMBER_INFINITY: u32 = 0xffff_ffff; @@ -31,3 +58,13 @@ pub const GENERATION_NUMBER_MAX: u32 = 0x3fff_ffff; /// The maximum number of commits that can be stored in a commit graph. pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1; + +/// A generalized position for use in [`Graph`]. +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] +pub struct Position(pub u32); + +impl std::fmt::Display for Position { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/gix-commitgraph/src/graph/verify.rs b/gix-commitgraph/src/verify.rs similarity index 95% rename from gix-commitgraph/src/graph/verify.rs rename to gix-commitgraph/src/verify.rs index 81785f0ddac..2d863e648ea 100644 --- a/gix-commitgraph/src/graph/verify.rs +++ b/gix-commitgraph/src/verify.rs @@ -8,7 +8,7 @@ use std::{ use crate::{ file::{self, commit}, - graph, Graph, GENERATION_NUMBER_MAX, + Graph, Position, GENERATION_NUMBER_MAX, }; /// The error used in [`verify_integrity()`][Graph::verify_integrity]. @@ -46,8 +46,8 @@ pub enum Error { )] ParentOutOfRange { id: gix_hash::ObjectId, - max_valid_pos: graph::Position, - parent_pos: graph::Position, + max_valid_pos: Position, + parent_pos: Position, }, #[error("{0}")] Processor(#[source] E), @@ -97,7 +97,7 @@ impl Graph { // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without // a separate loop, e.g. self.iter_sorted_ids(). - let mut file_start_pos = graph::Position(0); + let mut file_start_pos = Position(0); for (file_index, file) in self.files.iter().enumerate() { if usize::from(file.base_graph_count()) != file_index { return Err(Error::BaseGraphCount { @@ -127,7 +127,7 @@ impl Graph { } } - let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits()); + let next_file_start_pos = Position(file_start_pos.0 + file.num_commits()); let file_stats = file .traverse(|commit| { let mut max_parent_generation = 0u32; @@ -137,7 +137,7 @@ impl Graph { return Err(Error::ParentOutOfRange { parent_pos, id: commit.id().into(), - max_valid_pos: graph::Position(next_file_start_pos.0 - 1), + max_valid_pos: Position(next_file_start_pos.0 - 1), }); } let parent = self.commit_at(parent_pos); diff --git a/gix-commitgraph/tests/commitgraph.rs b/gix-commitgraph/tests/commitgraph.rs index 1d676c2fcca..f098e1c65ca 100644 --- a/gix-commitgraph/tests/commitgraph.rs +++ b/gix-commitgraph/tests/commitgraph.rs @@ -7,7 +7,7 @@ use std::{ process::Command, }; -use gix_commitgraph::{graph::Position as GraphPosition, Graph}; +use gix_commitgraph::{Graph, Position as GraphPosition}; type Result = std::result::Result<(), Box>; @@ -29,7 +29,6 @@ pub fn check_common(cg: &Graph, expected: &HashMap = ref_info .parent_ids() - .into_iter() .map(|id| { expected .values() @@ -81,7 +80,7 @@ impl RefInfo { self.pos } - pub fn parent_ids(&self) -> impl IntoIterator { + pub fn parent_ids(&self) -> impl Iterator { self.parent_ids.iter().map(|x| x.as_ref()) } From 2d71d20180c3426b40491c1e0bc12361627b3d68 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 May 2023 15:57:40 +0200 Subject: [PATCH 4/9] adjust to changes in `gix-commitgraph` --- gitoxide-core/src/commitgraph/verify.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gitoxide-core/src/commitgraph/verify.rs b/gitoxide-core/src/commitgraph/verify.rs index da682ccd54c..d16a9c8b0e1 100644 --- a/gitoxide-core/src/commitgraph/verify.rs +++ b/gitoxide-core/src/commitgraph/verify.rs @@ -1,7 +1,7 @@ use std::{io, path::Path}; use anyhow::{Context as AnyhowContext, Result}; -use gix_commitgraph::{graph::verify::Outcome, Graph}; +use gix_commitgraph::Graph; use crate::OutputFormat; @@ -31,7 +31,7 @@ pub fn graph_or_file( mut out, output_statistics, }: Context, -) -> Result +) -> Result where W1: io::Write, W2: io::Write, @@ -57,7 +57,7 @@ where Ok(stats) } -fn print_human_output(out: &mut impl io::Write, stats: &Outcome) -> io::Result<()> { +fn print_human_output(out: &mut impl io::Write, stats: &gix_commitgraph::verify::Outcome) -> io::Result<()> { writeln!(out, "number of commits with the given number of parents")?; let mut parent_counts: Vec<_> = stats.parent_counts.iter().map(|(a, b)| (*a, *b)).collect(); parent_counts.sort_by_key(|e| e.0); From fc423e470de50491a725d4802066d26c05bd2b2a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 May 2023 16:33:01 +0200 Subject: [PATCH 5/9] make clear that we do handle shallow repos, refactor tests --- Cargo.lock | 14 +- Cargo.toml | 1 - gix-revision/Cargo.toml | 5 +- gix-revision/tests/Cargo.toml | 29 -- gix-revision/tests/describe/mod.rs | 305 +++++++++++------- .../make_repo_with_branches.tar.xz | 4 +- .../tests/fixtures/make_repo_with_branches.sh | 3 +- 7 files changed, 192 insertions(+), 169 deletions(-) delete mode 100644 gix-revision/tests/Cargo.toml diff --git a/Cargo.lock b/Cargo.lock index 761c65495cb..dd499bce9ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2169,22 +2169,12 @@ dependencies = [ "gix-hash 0.11.1", "gix-hashtable 0.2.0", "gix-object 0.29.2", + "gix-odb", + "gix-testtools", "serde", "thiserror", ] -[[package]] -name = "gix-revision-tests" -version = "0.0.0" -dependencies = [ - "bstr", - "gix", - "gix-hash 0.11.1", - "gix-object 0.29.2", - "gix-revision", - "gix-testtools", -] - [[package]] name = "gix-sec" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 90ffd9731bc..1e9d503f878 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -253,7 +253,6 @@ members = [ "cargo-smart-release", "tests/tools", - "gix-revision/tests", "gix-diff/tests", "gix-pack/tests", "gix-index/tests", diff --git a/gix-revision/Cargo.toml b/gix-revision/Cargo.toml index 3070ea02cdd..e95b5326a10 100644 --- a/gix-revision/Cargo.toml +++ b/gix-revision/Cargo.toml @@ -8,7 +8,6 @@ authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "CHANGELOG.md", "README.md"] rust-version = "1.64" -autotests = false [lib] doctest = false @@ -28,6 +27,10 @@ thiserror = "1.0.26" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } document-features = { version = "0.2.1", optional = true } +[dev-dependencies] +gix-odb = { path = "../gix-odb" } +gix-testtools = { path = "../tests/tools" } + [package.metadata.docs.rs] all-features = true features = ["document-features"] diff --git a/gix-revision/tests/Cargo.toml b/gix-revision/tests/Cargo.toml deleted file mode 100644 index b9df58e67db..00000000000 --- a/gix-revision/tests/Cargo.toml +++ /dev/null @@ -1,29 +0,0 @@ -[package] -name = "gix-revision-tests" -version = "0.0.0" -publish = false -repository = "https://github.com/Byron/gitoxide" -license = "MIT/Apache-2.0" -description = "Please use `gix-` instead ('git' -> 'gix')" -authors = ["Sebastian Thiel "] -edition = "2021" -include = ["src/**/*", "CHANGELOG.md", "README.md"] -rust-version = "1.64" - -[[test]] -name = "revision" -doctest = false -path = "revision.rs" - -[features] -## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde = [ "gix-revision/serde", "gix-hash/serde", "gix-object/serde" ] - -[dev-dependencies] -gix-revision = { path = "..", default-features = false } -gix-hash = { path = "../../gix-hash" } -gix-object = { path = "../../gix-object" } -gix-testtools = { path = "../../tests/tools" } -gix = { path = "../../gix", default-features = false } - -bstr = { version = "1.3.0", default-features = false, features = ["std"]} diff --git a/gix-revision/tests/describe/mod.rs b/gix-revision/tests/describe/mod.rs index c9fa2a9e839..d41293f559e 100644 --- a/gix-revision/tests/describe/mod.rs +++ b/gix-revision/tests/describe/mod.rs @@ -1,83 +1,100 @@ use std::borrow::Cow; +use std::path::PathBuf; -use gix::{ - odb::{Find, FindExt}, - Repository, -}; use gix_object::bstr::ByteSlice; +use gix_odb::Find; use gix_revision::describe; +use gix_revision::describe::{Error, Outcome}; use crate::hex_to_id; mod format; +fn run_test( + transform_odb: impl FnOnce(gix_odb::Handle) -> gix_odb::Handle, + options: impl Fn(gix_hash::ObjectId) -> gix_revision::describe::Options<'static>, + run_assertions: impl Fn( + Result>, Error>, + gix_hash::ObjectId, + ) -> crate::Result, +) -> crate::Result { + let store = odb_at("."); + let store = transform_odb(store); + let commit_id = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); + run_assertions( + gix_revision::describe( + &commit_id, + |id, buf| { + store + .try_find(id, buf) + .map(|r| r.and_then(|d| d.try_into_commit_iter())) + }, + options(commit_id), + ), + commit_id, + ) +} + #[test] fn option_none_if_no_tag_found() -> crate::Result { - let repo = repo(); - let commit = repo.head_commit()?; - let res = gix_revision::describe( - &commit.id, - |id, buf| repo.objects.find_commit_iter(id, buf).map(Some), - Default::default(), - )?; - assert!(res.is_none(), "cannot find anything if there's no candidate"); - Ok(()) + run_test( + std::convert::identity, + |_| Default::default(), + |res, _id| { + assert!(res?.is_none(), "cannot find anything if there's no candidate"); + Ok(()) + }, + ) } #[test] fn fallback_if_configured_in_options_but_no_candidate_or_names() -> crate::Result { - let repo = repo(); - let commit = repo.head_commit()?; - let res = gix_revision::describe( - &commit.id, - |id, buf| repo.objects.find_commit_iter(id, buf).map(Some), - describe::Options { + run_test( + std::convert::identity, + |_| describe::Options { fallback_to_oid: true, ..Default::default() }, - )? - .expect("fallback activated"); - assert!(res.name.is_none(), "no name can be found"); - assert_eq!(res.depth, 0, "just a default, not relevant as there is no name"); - assert_eq!( - res.commits_seen, 0, - "a traversal is isn't performed as name map is empty, and that's the whole point" - ); - assert_eq!(res.into_format(7).to_string(), "01ec18a"); - Ok(()) + |res, _id| { + let res = res?.expect("fallback active"); + assert!(res.name.is_none(), "no name can be found"); + assert_eq!(res.depth, 0, "just a default, not relevant as there is no name"); + assert_eq!( + res.commits_seen, 0, + "a traversal is isn't performed as name map is empty, and that's the whole point" + ); + assert_eq!(res.into_format(7).to_string(), "01ec18a"); + Ok(()) + }, + ) } #[test] fn fallback_if_configured_in_options_and_max_candidates_zero() -> crate::Result { - let repo = repo(); - let commit = repo.head_commit()?; - let res = gix_revision::describe( - &commit.id, - |id, buf| repo.objects.find_commit_iter(id, buf).map(Some), - describe::Options { + run_test( + std::convert::identity, + |_| describe::Options { fallback_to_oid: true, max_candidates: 0, ..Default::default() }, - )? - .expect("fallback activated"); - assert!(res.name.is_none(), "no name can be found"); - assert_eq!(res.depth, 0, "just a default, not relevant as there is no name"); - assert_eq!(res.commits_seen, 0, "we don't do any traversal"); - assert_eq!(res.into_format(7).to_string(), "01ec18a"); - Ok(()) + |res, _id| { + let res = res?.expect("fallback active"); + assert!(res.name.is_none(), "no name can be found"); + assert_eq!(res.depth, 0, "just a default, not relevant as there is no name"); + assert_eq!(res.commits_seen, 0, "we don't do any traversal"); + assert_eq!(res.into_format(7).to_string(), "01ec18a"); + Ok(()) + }, + ) } #[test] fn not_enough_candidates() -> crate::Result { - let repo = repo(); - let commit = repo.head_commit()?; - let name = Cow::Borrowed(b"at-c5".as_bstr()); - let res = gix_revision::describe( - &commit.id, - |id, buf| repo.objects.find_commit_iter(id, buf).map(Some), - describe::Options { + run_test( + std::convert::identity, + |_| describe::Options { name_by_oid: vec![ (hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), name.clone()), ( @@ -90,50 +107,48 @@ fn not_enough_candidates() -> crate::Result { max_candidates: 1, ..Default::default() }, - )? - .expect("candidate found"); - - assert_eq!(res.name, Some(name), "it finds the youngest/most-recent name"); - assert_eq!(res.id, commit.id); - assert_eq!(res.commits_seen, 6, "it has to traverse commits"); - assert_eq!( - res.depth, 3, - "it calculates the final number of commits even though it aborted early" - ); - - Ok(()) + |res, id| { + let res = res?.expect("candidate found"); + assert_eq!(res.name, Some(name.clone()), "it finds the youngest/most-recent name"); + assert_eq!(res.id, id); + assert_eq!(res.commits_seen, 6, "it has to traverse commits"); + assert_eq!( + res.depth, 3, + "it calculates the final number of commits even though it aborted early" + ); + Ok(()) + }, + ) } #[test] -fn typical_usecases() { - let repo = repo(); - let commit = repo.head_commit().unwrap(); +fn typical_usecases() -> crate::Result { let name = Cow::Borrowed(b"main".as_bstr()); - let res = gix_revision::describe( - &commit.id, - |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "shouldn't be called")), - describe::Options { - name_by_oid: vec![(commit.id, name.clone())].into_iter().collect(), + run_test( + std::convert::identity, + |id| describe::Options { + name_by_oid: vec![(id, name.clone())].into_iter().collect(), max_candidates: 0, ..Default::default() }, - ) - .unwrap() - .expect("found a candidate"); - - assert_eq!( - res.name, - Some(name), - "this is an exact match, and it's found despite max-candidates being 0 (one lookup is always performed)" - ); - assert_eq!(res.id, commit.id); - assert_eq!(res.depth, 0); + |res, id| { + let res = res?.expect("candidate found"); + assert_eq!( + res.name, + Some(name.clone()), + "this is an exact match, and it's found despite max-candidates being 0 (one lookup is always performed)" + ); + assert_eq!(res.id, id); + assert_eq!(res.depth, 0); + assert_eq!(res.commits_seen, 0); + Ok(()) + }, + )?; let name = Cow::Borrowed(b"at-c5".as_bstr()); - let res = gix_revision::describe( - &commit.id, - |id, buf| repo.objects.find_commit_iter(id, buf).map(Some), - describe::Options { + run_test( + std::convert::identity, + |_| describe::Options { name_by_oid: vec![ (hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), name.clone()), ( @@ -145,56 +160,100 @@ fn typical_usecases() { .collect(), ..Default::default() }, - ) - .unwrap() - .expect("found a candidate"); - - assert_eq!( - res.name, - Some(name.clone()), - "a match to a tag 1 commit away with 2 commits on the other side of the merge/head" - ); - assert_eq!(res.id, commit.id); - assert_eq!(res.depth, 3); - assert_eq!(res.commits_seen, 6); + |res, id| { + let res = res?.expect("candidate found"); + assert_eq!( + res.name, + Some(name.clone()), + "a match to a tag 1 commit away with 2 commits on the other side of the merge/head" + ); + assert_eq!(res.id, id); + assert_eq!(res.depth, 3); + assert_eq!(res.commits_seen, 6); + Ok(()) + }, + )?; - let res = gix_revision::describe( - &commit.id, - |id, buf| repo.objects.find_commit_iter(id, buf).map(Some), - describe::Options { - name_by_oid: res.name_by_oid, + run_test( + std::convert::identity, + |_| describe::Options { + name_by_oid: vec![ + (hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), name.clone()), + ( + hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659"), + b"at-b1c1".as_bstr().into(), + ), + ] + .into_iter() + .collect(), first_parent: true, ..Default::default() }, + |res, id| { + let res = res?.expect("candidate found"); + assert_eq!(res.name, Some(name.clone()),); + assert_eq!(res.id, id); + assert_eq!(res.depth, 1); + assert_eq!(res.commits_seen, 2); + assert_eq!(res.into_format(7).to_string(), "at-c5-1-g01ec18a"); + Ok(()) + }, ) - .unwrap() - .expect("found a candidate"); - - assert_eq!(res.name, Some(name),); - assert_eq!(res.id, commit.id); - assert_eq!(res.depth, 1); +} - let shallow_repo = gix::open(repo.work_dir().expect("non-bare").join("shallow-clone")).unwrap(); +#[test] +fn shallow_yields_no_result_if_provided_refs_are_in_truncated_part_of_history() -> crate::Result { + run_test( + |_| odb_at("shallow-1-clone"), + |_| describe::Options { + name_by_oid: vec![( + hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), + Cow::Borrowed(b"at-c5".as_bstr()), + )] + .into_iter() + .collect(), + first_parent: true, + ..Default::default() + }, + |res, _id| { + let res = res?; + assert!( + res.is_none(), + "no candidate found on truncated history, and it doesn't crash" + ); + Ok(()) + }, + ) +} - let res = gix_revision::describe( - &commit.id, - |id, buf| { - shallow_repo - .objects - .try_find(id, buf) - .map(|r| r.and_then(|d| d.try_into_commit_iter())) - }, - describe::Options { - name_by_oid: res.name_by_oid, +#[test] +fn shallow_yields_result_if_refs_are_available() -> crate::Result { + let name = Cow::Borrowed(b"at-c5".as_bstr()); + run_test( + |_| odb_at("shallow-2-clone"), + |_| describe::Options { + name_by_oid: vec![(hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), name.clone())] + .into_iter() + .collect(), first_parent: true, ..Default::default() }, + |res, id| { + let res = res?.expect("found candidate"); + assert_eq!(res.name, Some(name.clone()),); + assert_eq!(res.id, id); + assert_eq!(res.depth, 1); + assert_eq!(res.commits_seen, 2); + assert_eq!(res.into_format(7).to_string(), "at-c5-1-g01ec18a"); + Ok(()) + }, ) - .unwrap(); - assert!(res.is_none(), "no candidate found on truncated history"); } -fn repo() -> Repository { - let dir = gix_testtools::scripted_fixture_read_only_standalone("make_repo_with_branches.sh").unwrap(); - gix::open(dir).unwrap() +fn odb_at(name: &str) -> gix_odb::Handle { + gix_odb::at(fixture_path().join(name).join(".git/objects")).unwrap() +} + +fn fixture_path() -> PathBuf { + gix_testtools::scripted_fixture_read_only("make_repo_with_branches.sh").unwrap() } diff --git a/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz b/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz index 3b481b81018..73881d1632d 100644 --- a/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz +++ b/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7cbbe5397b29025aa128d607b2484e91c971355b45e2b723d4a98094e0cb96d6 -size 12504 +oid sha256:51c255848776de0f426ffb8858ce44f0b09ab3ac2075628b594f520cf86a55ba +size 13124 diff --git a/gix-revision/tests/fixtures/make_repo_with_branches.sh b/gix-revision/tests/fixtures/make_repo_with_branches.sh index e001de7cd24..ca8d5ddef9f 100644 --- a/gix-revision/tests/fixtures/make_repo_with_branches.sh +++ b/gix-revision/tests/fixtures/make_repo_with_branches.sh @@ -21,4 +21,5 @@ git commit -q --allow-empty -m c5 git tag at-c5 git merge branch1 -m m1b1 -git clone --depth 1 file://$PWD shallow-clone +git clone --depth 1 file://$PWD shallow-1-clone +git clone --depth 2 file://$PWD shallow-2-clone From dde8c3aca545ba20cd5752f02283b98647fd3970 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 May 2023 17:09:15 +0200 Subject: [PATCH 6/9] feat: A PriorityQueue that is useful for graph traversal. --- gix-revision/src/describe.rs | 28 ++++++++--------- gix-revision/src/lib.rs | 8 +++++ gix-revision/src/queue.rs | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 14 deletions(-) create mode 100644 gix-revision/src/queue.rs diff --git a/gix-revision/src/describe.rs b/gix-revision/src/describe.rs index 55cc3deef77..1fd33bb97a0 100644 --- a/gix-revision/src/describe.rs +++ b/gix-revision/src/describe.rs @@ -144,7 +144,7 @@ where } pub(crate) mod function { - use std::{borrow::Cow, cmp::Ordering, collections::VecDeque, iter::FromIterator}; + use std::{borrow::Cow, cmp::Ordering}; use bstr::BStr; use gix_hash::oid; @@ -152,7 +152,8 @@ pub(crate) mod function { use gix_object::CommitRefIter; use super::{Error, Outcome}; - use crate::describe::{Flags, Options, MAX_CANDIDATES}; + use crate::describe::{CommitTime, Flags, Options, MAX_CANDIDATES}; + use crate::PriorityQueue; /// Given a `commit` id, traverse the commit graph and collect candidate names from the `name_by_oid` mapping to produce /// an `Outcome`, which converted [`into_format()`][Outcome::into_format()] will produce a typical `git describe` string. @@ -201,14 +202,14 @@ pub(crate) mod function { let mut buf = Vec::new(); let mut parent_buf = Vec::new(); - let mut queue = VecDeque::from_iter(Some((commit.to_owned(), u32::MAX))); + let mut queue = PriorityQueue::from_iter(Some((u32::MAX, commit.to_owned()))); let mut candidates = Vec::new(); let mut commits_seen = 0; let mut gave_up_on_commit = None; let mut seen = HashMap::::default(); seen.insert(commit.to_owned(), 0u32); - while let Some((commit, _commit_time)) = queue.pop_front() { + while let Some(commit) = queue.pop() { commits_seen += 1; if let Some(name) = name_by_oid.get(&commit) { if candidates.len() < max_candidates { @@ -290,7 +291,7 @@ pub(crate) mod function { }); if let Some(commit_id) = gave_up_on_commit { - queue.push_front((commit_id, u32::MAX)); + queue.insert(u32::MAX, commit_id); commits_seen -= 1; } @@ -318,7 +319,7 @@ pub(crate) mod function { find: &mut Find, buf: &mut Vec, parent_buf: &mut Vec, - queue: &mut VecDeque<(gix_hash::ObjectId, u32)>, + queue: &mut PriorityQueue, seen: &mut HashMap, commit: &gix_hash::oid, commit_flags: Flags, @@ -356,10 +357,7 @@ pub(crate) mod function { .unwrap_or_default(); entry.insert(commit_flags); - match queue.binary_search_by(|c| c.1.cmp(&parent_commit_date).reverse()) { - Ok(_) => queue.push_back((parent_id, parent_commit_date)), - Err(pos) => queue.insert(pos, (parent_id, parent_commit_date)), - }; + queue.insert(parent_commit_date, parent_id); } hash_map::Entry::Occupied(mut entry) => { *entry.get_mut() |= commit_flags; @@ -378,7 +376,7 @@ pub(crate) mod function { #[allow(clippy::too_many_arguments)] fn finish_depth_computation<'name, Find, E>( - mut queue: VecDeque<(gix_hash::ObjectId, u32)>, + mut queue: PriorityQueue, mut find: Find, best_candidate: &mut Candidate<'name>, mut seen: HashMap, @@ -391,13 +389,13 @@ pub(crate) mod function { E: std::error::Error + Send + Sync + 'static, { let mut commits_seen = 0; - while let Some((commit, _commit_time)) = queue.pop_front() { + while let Some(commit) = queue.pop() { commits_seen += 1; let flags = seen[&commit]; if (flags & best_candidate.identity_bit) == best_candidate.identity_bit { if queue - .iter() - .all(|(id, _)| (seen[id] & best_candidate.identity_bit) == best_candidate.identity_bit) + .iter_random() + .all(|id| (seen[id] & best_candidate.identity_bit) == best_candidate.identity_bit) { break; } @@ -429,3 +427,5 @@ pub(crate) mod function { order: usize, } } + +type CommitTime = u32; diff --git a/gix-revision/src/lib.rs b/gix-revision/src/lib.rs index ef273b5aeaf..9c0df149ae0 100644 --- a/gix-revision/src/lib.rs +++ b/gix-revision/src/lib.rs @@ -17,3 +17,11 @@ pub mod spec; mod types; pub use types::Spec; + +/// A utility type implementing a queue which can be used to automatically sort data by its time in ascending order. +/// +/// Note that the performance of this queue is very relevant to overall algorithm performance of many graph-walking algorithms, +/// and as it stands our implementation is about 6% slower in practice, probably also depending on the size of the stored data. +#[derive(Default)] +pub struct PriorityQueue(std::collections::BinaryHeap>); +mod queue; diff --git a/gix-revision/src/queue.rs b/gix-revision/src/queue.rs new file mode 100644 index 00000000000..2fc88c34187 --- /dev/null +++ b/gix-revision/src/queue.rs @@ -0,0 +1,60 @@ +use crate::PriorityQueue; +use std::cmp::Ordering; +use std::collections::BinaryHeap; + +pub(crate) struct Item { + key: K, + value: T, +} + +impl PartialEq for Item { + fn eq(&self, other: &Self) -> bool { + Ord::cmp(self, other).is_eq() + } +} + +impl Eq for Item {} + +impl PartialOrd for Item { + fn partial_cmp(&self, other: &Self) -> Option { + Ord::cmp(self, other).into() + } +} + +impl Ord for Item { + fn cmp(&self, other: &Self) -> Ordering { + self.key.cmp(&other.key) + } +} + +impl PriorityQueue { + /// Insert `value` so that it is ordered according to `key`. + pub fn insert(&mut self, key: K, value: T) { + self.0.push(Item { key, value }); + } + + /// Pop the highest-priority item off the queue. + pub fn pop(&mut self) -> Option { + self.0.pop().map(|t| t.value) + } + + /// Iterate all items ordered from highest to lowest priority. + pub fn iter_random(&self) -> impl Iterator { + self.0.iter().map(|t| &t.value) + } + + /// Return true if the queue is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +impl FromIterator<(K, T)> for PriorityQueue { + fn from_iter>(iter: I) -> Self { + let mut q = PriorityQueue(BinaryHeap::new()); + for (k, v) in iter { + q.insert(k, v); + } + q + } +} From 59ce4c606f8ccd9b6a16da2025e6746984d32fd6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 17 May 2023 17:54:51 +0200 Subject: [PATCH 7/9] feat: A Graph for quick access to commits and for associating state with them. This data structure should be used whenever stateful traversal is required, usually by associating information with each commit to remember what was seen and what wasn't. --- Cargo.lock | 2 + gix-revision/Cargo.toml | 2 + gix-revision/src/describe.rs | 11 +- gix-revision/src/graph.rs | 271 +++++++++++++++++++++++++++++++++ gix-revision/src/lib.rs | 20 +++ gix-revision/tests/revision.rs | 1 - 6 files changed, 302 insertions(+), 5 deletions(-) create mode 100644 gix-revision/src/graph.rs diff --git a/Cargo.lock b/Cargo.lock index dd499bce9ca..d0fae1b16c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2165,6 +2165,7 @@ version = "0.13.0" dependencies = [ "bstr", "document-features", + "gix-commitgraph", "gix-date 0.5.0", "gix-hash 0.11.1", "gix-hashtable 0.2.0", @@ -2172,6 +2173,7 @@ dependencies = [ "gix-odb", "gix-testtools", "serde", + "smallvec", "thiserror", ] diff --git a/gix-revision/Cargo.toml b/gix-revision/Cargo.toml index e95b5326a10..21516155a2c 100644 --- a/gix-revision/Cargo.toml +++ b/gix-revision/Cargo.toml @@ -21,10 +21,12 @@ gix-hash = { version = "^0.11.1", path = "../gix-hash" } gix-object = { version = "^0.29.2", path = "../gix-object" } gix-date = { version = "^0.5.0", path = "../gix-date" } gix-hashtable = { version = "^0.2.0", path = "../gix-hashtable" } +gix-commitgraph = { version = "0.14.0", path = "../gix-commitgraph" } bstr = { version = "1.3.0", default-features = false, features = ["std"]} thiserror = "1.0.26" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } +smallvec = "1.10.0" document-features = { version = "0.2.1", optional = true } [dev-dependencies] diff --git a/gix-revision/src/describe.rs b/gix-revision/src/describe.rs index 1fd33bb97a0..0adebb9b6e5 100644 --- a/gix-revision/src/describe.rs +++ b/gix-revision/src/describe.rs @@ -211,7 +211,7 @@ pub(crate) mod function { while let Some(commit) = queue.pop() { commits_seen += 1; - if let Some(name) = name_by_oid.get(&commit) { + let flags = if let Some(name) = name_by_oid.get(&commit) { if candidates.len() < max_candidates { let identity_bit = 1 << candidates.len(); candidates.push(Candidate { @@ -220,14 +220,17 @@ pub(crate) mod function { identity_bit, order: candidates.len(), }); - *seen.get_mut(&commit).expect("inserted") |= identity_bit; + let flags = seen.get_mut(&commit).expect("inserted"); + *flags |= identity_bit; + *flags } else { gave_up_on_commit = Some(commit); break; } - } + } else { + seen[&commit] + }; - let flags = seen[&commit]; for candidate in candidates .iter_mut() .filter(|c| (flags & c.identity_bit) != c.identity_bit) diff --git a/gix-revision/src/graph.rs b/gix-revision/src/graph.rs new file mode 100644 index 00000000000..9b02bc2868c --- /dev/null +++ b/gix-revision/src/graph.rs @@ -0,0 +1,271 @@ +use crate::Graph; +use gix_hash::oid; +use smallvec::SmallVec; +use std::ops::Index; + +impl<'find, T> Graph<'find, T> { + /// Create a new instance with `find` to retrieve commits and optionally `cache` to accelerate commit access. + pub fn new(mut find: Find, cache: impl Into>) -> Self + where + Find: + for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result>, E> + 'find, + E: std::error::Error + Send + Sync + 'static, + { + Graph { + find: Box::new(move |id, buf| { + find(id, buf).map_err(|err| Box::new(err) as Box) + }), + cache: cache.into(), + set: gix_hashtable::HashMap::default(), + buf: Vec::new(), + parent_buf: Vec::new(), + } + } + + /// Returns true if `id` has data associated with it, meaning that we processed it already. + pub fn contains(&self, id: &gix_hash::oid) -> bool { + self.set.contains_key(id.as_ref()) + } + + /// Returns the data associated with `id` if available. + pub fn get(&self, id: &gix_hash::oid) -> Option<&T> { + self.set.get(id) + } + + /// Returns the data associated with `id` if available as mutable reference. + pub fn get_mut(&mut self, id: &gix_hash::oid) -> Option<&mut T> { + self.set.get_mut(id) + } + + /// Insert `id` into the graph and associate it with `value`, returning the previous value associated with it if it existed. + pub fn insert(&mut self, id: gix_hash::ObjectId, value: T) -> Option { + self.set.insert(id, value) + } + + /// Remove all data from the graph to start over. + pub fn clear(&mut self) { + self.set.clear(); + } + + /// Try to lookup `id` and return a handle to it for accessing its data, but don't fail if the commit doesn't exist. + /// + /// It's possible that commits don't exist if the repository is shallow. + pub fn try_lookup(&mut self, id: &gix_hash::oid) -> Result>, lookup::Error> { + try_lookup(id, &mut self.find, self.cache.as_ref(), &mut self.buf) + } + + /// Lookup `id` and return a handle to it, or fail if it doesn't exist. + pub fn lookup(&mut self, id: &gix_hash::oid) -> Result, lookup::existing::Error> { + self.try_lookup(id)?.ok_or(lookup::existing::Error::Missing) + } + + /// Insert the parents of commit named `id` to the graph and associate new parents with data + /// by calling `new_parent_data(parent_id, committer_timestamp)`, or update existing parents + /// data with `update_existing(parent_id, &mut existing_data)`. + /// If `first_parent` is `true`, only the first parent of commits will be looked at. + pub fn insert_parents( + &mut self, + id: &gix_hash::oid, + mut new_parent_data: impl FnMut(gix_hash::ObjectId, u64) -> T, + mut update_existing: impl FnMut(gix_hash::ObjectId, &mut T), + first_parent: bool, + ) -> Result<(), insert_parents::Error> { + let commit = self.lookup(id)?; + let parents: SmallVec<[_; 2]> = commit + .iter_parents() + .take(if first_parent { 1 } else { usize::MAX }) + .collect(); + for parent_id in parents { + let parent_id = parent_id?; + match self.set.entry(parent_id) { + gix_hashtable::hash_map::Entry::Vacant(entry) => { + let parent = match try_lookup(&parent_id, &mut self.find, self.cache.as_ref(), &mut self.parent_buf) + .map_err(|err| insert_parents::Error::Lookup(lookup::existing::Error::Find(err)))? + { + Some(p) => p, + None => continue, // skip missing objects, this is due to shallow clones for instance. + }; + + let parent_commit_date = parent.committer_timestamp().unwrap_or_default(); + entry.insert(new_parent_data(parent_id, parent_commit_date)); + } + gix_hashtable::hash_map::Entry::Occupied(mut entry) => { + update_existing(parent_id, entry.get_mut()); + } + } + if first_parent { + break; + } + } + Ok(()) + } +} + +fn try_lookup<'graph>( + id: &gix_hash::oid, + find: &mut Box>, + cache: Option<&'graph gix_commitgraph::Graph>, + buf: &'graph mut Vec, +) -> Result>, lookup::Error> { + if let Some(cache) = cache { + if let Some(pos) = cache.lookup(id) { + return Ok(Some(Commit { + backing: Either::Right((cache, pos)), + })); + } + } + #[allow(clippy::manual_map)] + Ok(match find(id, buf)? { + Some(_) => Some(Commit { + backing: Either::Left(buf), + }), + None => None, + }) +} + +impl<'a, 'find, T> Index<&'a gix_hash::oid> for Graph<'find, T> { + type Output = T; + + fn index(&self, index: &'a oid) -> &Self::Output { + &self.set[index] + } +} + +/// +pub mod lookup { + /// The error returned by [`try_lookup()`][crate::Graph::try_lookup()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("There was an error looking up a commit")] + Find(#[from] Box), + } + + /// + pub mod existing { + /// The error returned by [`lookup()`][crate::Graph::lookup()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Find(#[from] super::Error), + #[error("Commit could not be found")] + Missing, + } + } +} + +/// +pub mod insert_parents { + use crate::graph::commit::iter_parents; + use crate::graph::lookup; + + /// The error returned by [`insert_parents()`][crate::Graph::insert_parents()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Lookup(#[from] lookup::existing::Error), + #[error("A commit could not be decoded during traversal")] + Decode(#[from] gix_object::decode::Error), + #[error(transparent)] + Parent(#[from] iter_parents::Error), + } +} + +enum Either { + Left(T), + Right(U), +} + +/// A commit that provides access to graph-related information. +pub struct Commit<'graph> { + backing: Either<&'graph [u8], (&'graph gix_commitgraph::Graph, gix_commitgraph::Position)>, +} + +/// +pub mod commit { + use super::Commit; + use crate::graph::Either; + + impl<'graph> Commit<'graph> { + /// Return an iterator over the parents of this commit. + pub fn iter_parents(&self) -> Parents<'graph> { + let backing = match &self.backing { + Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)), + Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())), + }; + Parents { backing } + } + + /// Returns the timestamp at which this commit was created. + /// + /// This is the single-most important date for determining recency of commits. + /// Note that this can only fail if the commit is backed by the object database *and* parsing fails. + pub fn committer_timestamp(&self) -> Result { + Ok(match &self.backing { + Either::Left(buf) => { + gix_object::CommitRefIter::from_bytes(buf) + .committer()? + .time + .seconds_since_unix_epoch as u64 + } + Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp(), + }) + } + } + + /// An iterator over the parents of a commit. + pub struct Parents<'graph> { + backing: Either< + gix_object::CommitRefIter<'graph>, + ( + &'graph gix_commitgraph::Graph, + gix_commitgraph::file::commit::Parents<'graph>, + ), + >, + } + + impl<'graph> Iterator for Parents<'graph> { + type Item = Result; + + fn next(&mut self) -> Option { + match &mut self.backing { + Either::Left(it) => { + for token in it { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => return Some(Ok(id)), + Ok(_unused_token) => break, + Err(err) => return Some(Err(err.into())), + } + } + None + } + Either::Right((cache, it)) => it + .next() + .map(|r| r.map(|pos| cache.id_at(pos).to_owned()).map_err(Into::into)), + } + } + } + + /// + pub mod iter_parents { + /// The error returned by the [`Parents`][super::Parents] iterator. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("An error occurred when parsing commit parents")] + DecodeCommit(#[from] gix_object::decode::Error), + #[error("An error occurred when parsing parents from the commit graph")] + DecodeCommitGraph(#[from] gix_commitgraph::file::commit::Error), + } + } +} + +pub(crate) type FindFn<'find> = dyn for<'a> FnMut( + &gix_hash::oid, + &'a mut Vec, + ) + -> Result>, Box> + + 'find; diff --git a/gix-revision/src/lib.rs b/gix-revision/src/lib.rs index 9c0df149ae0..20eea37146a 100644 --- a/gix-revision/src/lib.rs +++ b/gix-revision/src/lib.rs @@ -16,8 +16,28 @@ pub use describe::function::describe; pub mod spec; mod types; +use crate::graph::FindFn; pub use types::Spec; +/// A graph of commits which additionally allows to associate data with commits. +/// +/// It starts empty, but each access may fill it with commit information. +/// Note that the traversal can be accelerated if a [commit-graph][gix_commitgraph::Graph] is also made available. +pub struct Graph<'find, T> { + /// A way to resolve a commit from the object database. + find: Box>, + /// A way to speedup commit access, essentially a multi-file commit database. + cache: Option, + /// The set of cached commits that we have seen once, along with data associated with them. + set: gix_hashtable::HashMap, + /// A buffer for writing commit data into. + buf: Vec, + /// Another buffer we typically use to store parents. + parent_buf: Vec, +} +/// +pub mod graph; + /// A utility type implementing a queue which can be used to automatically sort data by its time in ascending order. /// /// Note that the performance of this queue is very relevant to overall algorithm performance of many graph-walking algorithms, diff --git a/gix-revision/tests/revision.rs b/gix-revision/tests/revision.rs index 235bc08eb8c..71d63cb4057 100644 --- a/gix-revision/tests/revision.rs +++ b/gix-revision/tests/revision.rs @@ -1,6 +1,5 @@ mod describe; mod spec; - pub type Result = std::result::Result>; fn hex_to_id(hex: &str) -> gix_hash::ObjectId { From ed258da9015d2d68734aeac485dd009760fc4da4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 18 May 2023 13:58:50 +0200 Subject: [PATCH 8/9] feat!: `describe` usees commitgraph. With it it can leverage the commitgraph data structure would would be more prominent on server-side applications, presumably. --- .../tests/fixtures/create_fixtures.sh | 27 --- gix-revision/src/describe.rs | 156 +++++------------- gix-revision/tests/describe/mod.rs | 25 +-- .../make_repo_with_branches.tar.xz | 4 +- .../tests/fixtures/make_repo_with_branches.sh | 3 + 5 files changed, 58 insertions(+), 157 deletions(-) delete mode 100755 gix-commitgraph/tests/fixtures/create_fixtures.sh diff --git a/gix-commitgraph/tests/fixtures/create_fixtures.sh b/gix-commitgraph/tests/fixtures/create_fixtures.sh deleted file mode 100755 index 0845e59b5c6..00000000000 --- a/gix-commitgraph/tests/fixtures/create_fixtures.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -parent_dir="$1" -mkdir "$parent_dir" - -script_dir=$(dirname "$(realpath -e "$0")") - -run() { - local target_dir=$parent_dir/$1 - local script=$script_dir/$1.sh - - local temp_dir=$(mktemp -d create_fixtures-XXXXXXXXXX) - trap "rm -rf $temp_dir" EXIT - (cd "$temp_dir" && "$script") - cp -dR "$temp_dir/.git/objects/" "$target_dir/" - rm -rf "$temp_dir" - trap - EXIT -} - -#run bloom -#run bloom_too_large -run octopus_merges -run single_commit -run single_parent -run split_chain -run two_parents diff --git a/gix-revision/src/describe.rs b/gix-revision/src/describe.rs index 0adebb9b6e5..b669577fdf6 100644 --- a/gix-revision/src/describe.rs +++ b/gix-revision/src/describe.rs @@ -94,7 +94,8 @@ impl<'a> Display for Format<'a> { } } -type Flags = u32; +/// A bit-field which keeps track of which commit is reachable by one of 32 candidates names. +pub type Flags = u32; const MAX_CANDIDATES: usize = std::mem::size_of::() * 8; /// The options required to call [`describe()`][function::describe()]. @@ -129,14 +130,11 @@ impl<'name> Default for Options<'name> { /// The error returned by the [`describe()`][function::describe()] function. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] -pub enum Error -where - E: std::error::Error + Send + Sync + 'static, -{ - #[error("Commit {} could not be found during graph traversal", .oid.to_hex())] - Find { +pub enum Error { + #[error("The parents of commit {} could not be added to graph during traversal", oid.to_hex())] + InsertParentsToGraph { #[source] - err: Option, + err: crate::graph::insert_parents::Error, oid: gix_hash::ObjectId, }, #[error("A commit could not be decoded during traversal")] @@ -148,32 +146,26 @@ pub(crate) mod function { use bstr::BStr; use gix_hash::oid; - use gix_hashtable::{hash_map, HashMap}; - use gix_object::CommitRefIter; use super::{Error, Outcome}; use crate::describe::{CommitTime, Flags, Options, MAX_CANDIDATES}; - use crate::PriorityQueue; + use crate::{Graph, PriorityQueue}; - /// Given a `commit` id, traverse the commit graph and collect candidate names from the `name_by_oid` mapping to produce + /// Given a `commit` id, traverse the commit `graph` and collect candidate names from the `name_by_oid` mapping to produce /// an `Outcome`, which converted [`into_format()`][Outcome::into_format()] will produce a typical `git describe` string. /// /// Note that the `name_by_oid` map is returned in the [`Outcome`], which can be forcefully returned even if there was no matching /// candidate by setting `fallback_to_oid` to true. - pub fn describe<'name, Find, E>( + pub fn describe<'name>( commit: &oid, - mut find: Find, + graph: &mut Graph<'_, Flags>, Options { name_by_oid, mut max_candidates, fallback_to_oid, first_parent, }: Options<'name>, - ) -> Result>, Error> - where - Find: for<'b> FnMut(&oid, &'b mut Vec) -> Result>, E>, - E: std::error::Error + Send + Sync + 'static, - { + ) -> Result>, Error> { max_candidates = max_candidates.min(MAX_CANDIDATES); if let Some(name) = name_by_oid.get(commit) { return Ok(Some(Outcome { @@ -199,15 +191,12 @@ pub(crate) mod function { }; } - let mut buf = Vec::new(); - let mut parent_buf = Vec::new(); - let mut queue = PriorityQueue::from_iter(Some((u32::MAX, commit.to_owned()))); let mut candidates = Vec::new(); let mut commits_seen = 0; let mut gave_up_on_commit = None; - let mut seen = HashMap::::default(); - seen.insert(commit.to_owned(), 0u32); + graph.clear(); + graph.insert(commit.to_owned(), 0u32); while let Some(commit) = queue.pop() { commits_seen += 1; @@ -220,7 +209,7 @@ pub(crate) mod function { identity_bit, order: candidates.len(), }); - let flags = seen.get_mut(&commit).expect("inserted"); + let flags = graph.get_mut(&commit).expect("inserted"); *flags |= identity_bit; *flags } else { @@ -228,7 +217,7 @@ pub(crate) mod function { break; } } else { - seen[&commit] + graph[&commit] }; for candidate in candidates @@ -261,16 +250,7 @@ pub(crate) mod function { } } - parents_by_date_onto_queue_and_track_names( - &mut find, - &mut buf, - &mut parent_buf, - &mut queue, - &mut seen, - &commit, - flags, - first_parent, - )?; + parents_by_date_onto_queue_and_track_names(graph, &mut queue, commit, flags, first_parent)?; } if candidates.is_empty() { @@ -300,11 +280,8 @@ pub(crate) mod function { commits_seen += finish_depth_computation( queue, - find, + graph, candidates.first_mut().expect("at least one candidate"), - seen, - buf, - parent_buf, first_parent, )?; @@ -317,88 +294,41 @@ pub(crate) mod function { })) } - #[allow(clippy::too_many_arguments)] - fn parents_by_date_onto_queue_and_track_names( - find: &mut Find, - buf: &mut Vec, - parent_buf: &mut Vec, + fn parents_by_date_onto_queue_and_track_names( + graph: &mut Graph<'_, Flags>, queue: &mut PriorityQueue, - seen: &mut HashMap, - commit: &gix_hash::oid, + commit: gix_hash::ObjectId, commit_flags: Flags, first_parent: bool, - ) -> Result<(), Error> - where - Find: for<'b> FnMut(&oid, &'b mut Vec) -> Result>, E>, - E: std::error::Error + Send + Sync + 'static, - { - let commit_iter = find(commit, buf) - .map_err(|err| Error::Find { - err: Some(err), - oid: commit.to_owned(), - })? - .ok_or_else(|| Error::Find { - err: None, - oid: commit.to_owned(), - })?; - for token in commit_iter { - match token { - Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, - Ok(gix_object::commit::ref_iter::Token::Parent { id: parent_id }) => match seen.entry(parent_id) { - hash_map::Entry::Vacant(entry) => { - let parent = match find(&parent_id, parent_buf).map_err(|err| Error::Find { - err: Some(err), - oid: commit.to_owned(), - })? { - Some(p) => p, - None => continue, // skip missing objects, they don't exist. - }; - - let parent_commit_date = parent - .committer() - .map(|committer| committer.time.seconds_since_unix_epoch) - .unwrap_or_default(); - - entry.insert(commit_flags); - queue.insert(parent_commit_date, parent_id); - } - hash_map::Entry::Occupied(mut entry) => { - *entry.get_mut() |= commit_flags; - } + ) -> Result<(), Error> { + graph + .insert_parents( + &commit, + |parent_id, parent_commit_date| { + queue.insert(parent_commit_date as u32, parent_id); + commit_flags }, - Ok(_unused_token) => break, - Err(err) => return Err(err.into()), - } - if first_parent { - break; - } - } - + |_parent_id, flags| *flags |= commit_flags, + first_parent, + ) + .map_err(|err| Error::InsertParentsToGraph { err, oid: commit })?; Ok(()) } - #[allow(clippy::too_many_arguments)] - fn finish_depth_computation<'name, Find, E>( + fn finish_depth_computation( mut queue: PriorityQueue, - mut find: Find, - best_candidate: &mut Candidate<'name>, - mut seen: HashMap, - mut buf: Vec, - mut parent_buf: Vec, + graph: &mut Graph<'_, Flags>, + best_candidate: &mut Candidate<'_>, first_parent: bool, - ) -> Result> - where - Find: for<'b> FnMut(&oid, &'b mut Vec) -> Result>, E>, - E: std::error::Error + Send + Sync + 'static, - { + ) -> Result { let mut commits_seen = 0; while let Some(commit) = queue.pop() { commits_seen += 1; - let flags = seen[&commit]; + let flags = graph[&commit]; if (flags & best_candidate.identity_bit) == best_candidate.identity_bit { if queue .iter_random() - .all(|id| (seen[id] & best_candidate.identity_bit) == best_candidate.identity_bit) + .all(|id| (graph[id] & best_candidate.identity_bit) == best_candidate.identity_bit) { break; } @@ -406,16 +336,7 @@ pub(crate) mod function { best_candidate.commits_in_its_future += 1; } - parents_by_date_onto_queue_and_track_names( - &mut find, - &mut buf, - &mut parent_buf, - &mut queue, - &mut seen, - &commit, - flags, - first_parent, - )?; + parents_by_date_onto_queue_and_track_names(graph, &mut queue, commit, flags, first_parent)?; } Ok(commits_seen) } @@ -431,4 +352,5 @@ pub(crate) mod function { } } +/// The timestamp for the creation date of a commit in seconds since unix epoch. type CommitTime = u32; diff --git a/gix-revision/tests/describe/mod.rs b/gix-revision/tests/describe/mod.rs index d41293f559e..38558f2bdaf 100644 --- a/gix-revision/tests/describe/mod.rs +++ b/gix-revision/tests/describe/mod.rs @@ -13,26 +13,29 @@ mod format; fn run_test( transform_odb: impl FnOnce(gix_odb::Handle) -> gix_odb::Handle, options: impl Fn(gix_hash::ObjectId) -> gix_revision::describe::Options<'static>, - run_assertions: impl Fn( - Result>, Error>, - gix_hash::ObjectId, - ) -> crate::Result, + run_assertions: impl Fn(Result>, Error>, gix_hash::ObjectId) -> crate::Result, ) -> crate::Result { let store = odb_at("."); let store = transform_odb(store); let commit_id = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); - run_assertions( - gix_revision::describe( - &commit_id, + for use_commitgraph in [false, true] { + let cache = use_commitgraph + .then(|| gix_commitgraph::Graph::from_info_dir(store.store_ref().path().join("info")).ok()) + .flatten(); + let mut graph = gix_revision::Graph::new( |id, buf| { store .try_find(id, buf) .map(|r| r.and_then(|d| d.try_into_commit_iter())) }, - options(commit_id), - ), - commit_id, - ) + cache, + ); + run_assertions( + gix_revision::describe(&commit_id, &mut graph, options(commit_id)), + commit_id, + )?; + } + Ok(()) } #[test] diff --git a/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz b/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz index 73881d1632d..f07c0655d47 100644 --- a/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz +++ b/gix-revision/tests/fixtures/generated-archives/make_repo_with_branches.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51c255848776de0f426ffb8858ce44f0b09ab3ac2075628b594f520cf86a55ba -size 13124 +oid sha256:7e3e6e069d37db14bb8138eaf7f9d7fade2db6c9985a065da1bbf0684678c961 +size 12452 diff --git a/gix-revision/tests/fixtures/make_repo_with_branches.sh b/gix-revision/tests/fixtures/make_repo_with_branches.sh index ca8d5ddef9f..f53f9bf7db6 100644 --- a/gix-revision/tests/fixtures/make_repo_with_branches.sh +++ b/gix-revision/tests/fixtures/make_repo_with_branches.sh @@ -21,5 +21,8 @@ git commit -q --allow-empty -m c5 git tag at-c5 git merge branch1 -m m1b1 +git commit-graph write --no-progress --reachable +git repack -adq + git clone --depth 1 file://$PWD shallow-1-clone git clone --depth 2 file://$PWD shallow-2-clone From 56f4d30960de4afc8c53136af45149cf880547c5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 18 May 2023 14:22:59 +0200 Subject: [PATCH 9/9] adapt to changes in `gix-revision` --- Cargo.lock | 2 +- crate-status.md | 3 ++- gitoxide-core/Cargo.toml | 3 +-- gitoxide-core/src/commitgraph/verify.rs | 8 ++++---- gix/Cargo.toml | 2 ++ gix/src/commit.rs | 19 +++++++++++-------- gix/src/lib.rs | 1 + 7 files changed, 22 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d0fae1b16c9..7c02092d225 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1264,7 +1264,6 @@ dependencies = [ "futures-io", "futures-lite", "gix", - "gix-commitgraph", "gix-pack", "gix-transport", "gix-url", @@ -1287,6 +1286,7 @@ dependencies = [ "document-features", "gix-actor 0.20.0", "gix-attributes 0.12.0", + "gix-commitgraph", "gix-config", "gix-credentials", "gix-date 0.5.0", diff --git a/crate-status.md b/crate-status.md index 8ddb4d868c9..09e34418bf9 100644 --- a/crate-status.md +++ b/crate-status.md @@ -469,6 +469,7 @@ Make it the best-performing implementation and the most convenient one. ### gix-revision * [x] `describe()` (similar to `git name-rev`) +* [x] primitives to help with graph traversal, along with commit-graph acceleration. * parse specifications * [x] parsing and navigation * [x] revision ranges @@ -638,7 +639,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README. * **Id** * [x] short hashes with detection of ambiguity. * **Commit** - * [x] `describe()` like functionality + * [x] `git describe` like functionality, with optional commit-graph acceleration * [x] create new commit from tree * **Objects** * [x] lookup diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 11aa3686573..f642a829173 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -33,7 +33,7 @@ async-client = ["gix/async-network-client-async-std", "gix-transport-configurati #! ### Other ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde = ["gix-commitgraph/serde", "gix/serde", "serde_json", "dep:serde", "bytesize/serde"] +serde = ["gix/serde", "serde_json", "dep:serde", "bytesize/serde"] [dependencies] @@ -41,7 +41,6 @@ serde = ["gix-commitgraph/serde", "gix/serde", "serde_json", "dep:serde", "bytes gix = { version = "^0.44.1", path = "../gix", default-features = false } gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.35.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static"] } gix-transport-configuration-only = { package = "gix-transport", version = "^0.31.0", path = "../gix-transport", default-features = false } -gix-commitgraph = { version = "^0.14.0", path = "../gix-commitgraph" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } anyhow = "1.0.42" thiserror = "1.0.34" diff --git a/gitoxide-core/src/commitgraph/verify.rs b/gitoxide-core/src/commitgraph/verify.rs index d16a9c8b0e1..64e6836cdbd 100644 --- a/gitoxide-core/src/commitgraph/verify.rs +++ b/gitoxide-core/src/commitgraph/verify.rs @@ -1,7 +1,7 @@ use std::{io, path::Path}; use anyhow::{Context as AnyhowContext, Result}; -use gix_commitgraph::Graph; +use gix::commitgraph::Graph; use crate::OutputFormat; @@ -31,7 +31,7 @@ pub fn graph_or_file( mut out, output_statistics, }: Context, -) -> Result +) -> Result where W1: io::Write, W2: io::Write, @@ -39,7 +39,7 @@ where let g = Graph::at(path).with_context(|| "Could not open commit graph")?; #[allow(clippy::unnecessary_wraps, unknown_lints)] - fn noop_processor(_commit: &gix_commitgraph::file::Commit<'_>) -> std::result::Result<(), std::fmt::Error> { + fn noop_processor(_commit: &gix::commitgraph::file::Commit<'_>) -> std::result::Result<(), std::fmt::Error> { Ok(()) } let stats = g @@ -57,7 +57,7 @@ where Ok(stats) } -fn print_human_output(out: &mut impl io::Write, stats: &gix_commitgraph::verify::Outcome) -> io::Result<()> { +fn print_human_output(out: &mut impl io::Write, stats: &gix::commitgraph::verify::Outcome) -> io::Result<()> { writeln!(out, "number of commits with the given number of parents")?; let mut parent_counts: Vec<_> = stats.parent_counts.iter().map(|(a, b)| (*a, *b)).collect(); parent_counts.sort_by_key(|e| e.0); diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 899f6d9aebb..37e11095a8d 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -68,6 +68,7 @@ serde = [ "dep:serde", "gix-ignore/serde", "gix-revision/serde", "gix-worktree/serde", + "gix-commitgraph/serde", "gix-credentials/serde"] ## Re-export the progress tree root which allows to obtain progress from various functions which take `impl gix::Progress`. @@ -148,6 +149,7 @@ gix-prompt = { version = "^0.5.0", path = "../gix-prompt" } gix-index = { version = "^0.16.1", path = "../gix-index" } gix-worktree = { version = "^0.17.1", path = "../gix-worktree" } gix-hashtable = { version = "^0.2.0", path = "../gix-hashtable" } +gix-commitgraph = { version = "^0.14.0", path = "../gix-commitgraph" } prodash = { version = "23.1", optional = true, default-features = false, features = ["progress-tree"] } once_cell = "1.14.0" diff --git a/gix/src/commit.rs b/gix/src/commit.rs index a58954a3665..74354a36209 100644 --- a/gix/src/commit.rs +++ b/gix/src/commit.rs @@ -52,7 +52,7 @@ pub mod describe { #[allow(missing_docs)] pub enum Error { #[error(transparent)] - Describe(#[from] gix_revision::describe::Error), + Describe(#[from] gix_revision::describe::Error), #[error("Could not produce an unambiguous shortened id for formatting.")] ShortId(#[from] crate::id::shorten::Error), #[error(transparent)] @@ -201,15 +201,18 @@ pub mod describe { /// to save ~40% of time. pub fn try_resolve(&self) -> Result>, Error> { // TODO: dirty suffix with respective dirty-detection - let outcome = gix_revision::describe( - &self.id, + let mut graph = gix_revision::Graph::new( |id, buf| { - Ok(self - .repo + self.repo .objects - .try_find(id, buf)? - .and_then(|d| d.try_into_commit_iter())) + .try_find(id, buf) + .map(|r| r.and_then(|d| d.try_into_commit_iter())) }, + gix_commitgraph::Graph::from_info_dir(self.repo.objects.store_ref().path().join("info")).ok(), + ); + let outcome = gix_revision::describe( + &self.id, + &mut graph, gix_revision::describe::Options { name_by_oid: self.select.names(self.repo)?, fallback_to_oid: self.id_as_fallback, @@ -218,7 +221,7 @@ pub mod describe { }, )?; - Ok(outcome.map(|outcome| crate::commit::describe::Resolution { + Ok(outcome.map(|outcome| Resolution { outcome, id: self.id.attach(self.repo), })) diff --git a/gix/src/lib.rs b/gix/src/lib.rs index eb5efcfdf74..445f2889636 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -68,6 +68,7 @@ // APIs/instances anyway. pub use gix_actor as actor; pub use gix_attributes as attrs; +pub use gix_commitgraph as commitgraph; pub use gix_credentials as credentials; pub use gix_date as date; pub use gix_features as features;