From 9ed78b17d5dd41a9525bd6a57b2dc321b806f265 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 14:03:17 +0200 Subject: [PATCH 1/8] Graph.rg: remove member It wasn't used. --- sizes/graph.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 7e923f6..9187907 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -69,7 +69,7 @@ func ScanRepositoryUsingGraph( ctx, cancel := context.WithCancel(context.TODO()) defer cancel() - graph := NewGraph(rg, nameStyle) + graph := NewGraph(nameStyle) refIter, err := repo.NewReferenceIter(ctx) if err != nil { @@ -337,8 +337,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - rg RefGrouper - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -361,10 +359,8 @@ type Graph struct { } // NewGraph creates and returns a new `*Graph` instance. -func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { +func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ - rg: rg, - blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), From 559b030c9aa7b8fbc8803863e20aae4a720cbb18 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 16:57:25 +0200 Subject: [PATCH 2/8] Collect references before starting the object traversal This provides a better separation of concerns, which will be taken advantage of shortly. --- sizes/graph.go | 79 +++++----------------------------------------- sizes/grouper.go | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 71 deletions(-) create mode 100644 sizes/grouper.go diff --git a/sizes/graph.go b/sizes/graph.go index 9187907..a56cbc2 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,50 +11,6 @@ import ( "github.com/github/git-sizer/meter" ) -// RefGroupSymbol is the string "identifier" that is used to refer to -// a refgroup, for example in the gitconfig. Nesting of refgroups is -// inferred from their names, using "." as separator between -// components. For example, if there are three refgroups with symbols -// "tags", "tags.releases", and "foo.bar", then "tags.releases" is -// considered to be nested within "tags", and "foo.bar" is considered -// to be nested within "foo", the latter being created automatically -// if it was not configured explicitly. -type RefGroupSymbol string - -// RefGroup is a group of references, for example "branches" or -// "tags". Reference groups might overlap. -type RefGroup struct { - // Symbol is the unique string by which this `RefGroup` is - // identified and configured. It consists of dot-separated - // components, which implicitly makes a nested tree-like - // structure. - Symbol RefGroupSymbol - - // Name is the name for this `ReferenceGroup` to be presented - // in user-readable output. - Name string -} - -// RefGrouper describes a type that can collate reference names into -// groups and decide which ones to walk. -type RefGrouper interface { - // Categorize tells whether `refname` should be walked at all, - // and if so, the symbols of the reference groups to which it - // belongs. - Categorize(refname string) (bool, []RefGroupSymbol) - - // Groups returns the list of `ReferenceGroup`s, in the order - // that they should be presented. The return value might - // depend on which references have been seen so far. - Groups() []RefGroup -} - -type refSeen struct { - git.Reference - walked bool - groups []RefGroupSymbol -} - // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -71,9 +27,9 @@ func ScanRepositoryUsingGraph( graph := NewGraph(nameStyle) - refIter, err := repo.NewReferenceIter(ctx) + refsSeen, err := CollectReferences(ctx, repo, rg) if err != nil { - return HistorySize{}, err + return HistorySize{}, fmt.Errorf("reading references: %w", err) } objIter, err := repo.NewObjectIter(context.TODO()) @@ -82,41 +38,22 @@ func ScanRepositoryUsingGraph( } errChan := make(chan error, 1) - var refsSeen []refSeen - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { defer objIter.Close() errChan <- func() error { - for { - ref, ok, err := refIter.Next() - if err != nil { - return err - } - if !ok { - return nil - } - - walk, groups := rg.Categorize(ref.Refname) - - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) - - if !walk { + for _, refSeen := range refsSeen { + if !refSeen.walked { continue } - if err := objIter.AddRoot(ref.OID); err != nil { + if err := objIter.AddRoot(refSeen.OID); err != nil { return err } } + return nil }() }() diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..a5b8a26 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,82 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type refSeen struct { + git.Reference + walked bool + groups []RefGroupSymbol +} + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]refSeen, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []refSeen + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + refSeen{ + Reference: ref, + walked: walk, + groups: groups, + }, + ) + } +} From fdfa791791c392324ec0cde0e42d070f6c9b96c3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 17:57:31 +0200 Subject: [PATCH 3/8] ScanRepositoryUsingGraph(): take a context argument --- git-sizer.go | 10 +++++++--- git_sizer_test.go | 11 ++++++----- sizes/graph.go | 6 ++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index d1e075c..6c9e7a3 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -93,14 +94,17 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(stdout, stderr io.Writer, args []string) error { +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -288,7 +292,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) + historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index 6ab132f..b08985b 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -2,6 +2,7 @@ package main_test import ( "bytes" + "context" "encoding/json" "fmt" "io" @@ -563,7 +564,7 @@ func TestBomb(t *testing.T) { newGitBomb(t, repo, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -636,7 +637,7 @@ func TestTaggedTags(t *testing.T) { require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -658,7 +659,7 @@ func TestFromSubdir(t *testing.T) { require.NoError(t, cmd.Run(), "creating commit") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -711,7 +712,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo.Repository(t), + context.Background(), mainRepo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -724,7 +725,7 @@ func TestSubmodule(t *testing.T) { Path: filepath.Join(mainRepo.Path, "sub"), } h, err = sizes.ScanRepositoryUsingGraph( - submRepo2.Repository(t), + context.Background(), submRepo2.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") diff --git a/sizes/graph.go b/sizes/graph.go index a56cbc2..1b908cc 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -19,12 +19,10 @@ import ( // // It returns the size data for the repository. func ScanRepositoryUsingGraph( + ctx context.Context, repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - graph := NewGraph(nameStyle) refsSeen, err := CollectReferences(ctx, repo, rg) @@ -32,7 +30,7 @@ func ScanRepositoryUsingGraph( return HistorySize{}, fmt.Errorf("reading references: %w", err) } - objIter, err := repo.NewObjectIter(context.TODO()) + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } From 1a2c0b51069b8eedecac2fccf532b7e6da11a1d3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:00:39 +0200 Subject: [PATCH 4/8] refSeen: make type and its members public and rename it to `RefRoot` --- sizes/graph.go | 12 ++++++------ sizes/grouper.go | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 1b908cc..59a6365 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -25,7 +25,7 @@ func ScanRepositoryUsingGraph( ) (HistorySize, error) { graph := NewGraph(nameStyle) - refsSeen, err := CollectReferences(ctx, repo, rg) + refRoots, err := CollectReferences(ctx, repo, rg) if err != nil { return HistorySize{}, fmt.Errorf("reading references: %w", err) } @@ -42,12 +42,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refSeen := range refsSeen { - if !refSeen.walked { + for _, refRoot := range refRoots { + if !refRoot.Walk { continue } - if err := objIter.AddRoot(refSeen.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID); err != nil { return err } } @@ -261,9 +261,9 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refSeen := range refsSeen { + for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) + graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index a5b8a26..3807b0e 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -44,21 +44,21 @@ type RefGrouper interface { Groups() []RefGroup } -type refSeen struct { +type RefRoot struct { git.Reference - walked bool - groups []RefGroupSymbol + Walk bool + Groups []RefGroupSymbol } func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, -) ([]refSeen, error) { +) ([]RefRoot, error) { refIter, err := repo.NewReferenceIter(ctx) if err != nil { return nil, err } - var refsSeen []refSeen + var refsSeen []RefRoot for { ref, ok, err := refIter.Next() if err != nil { @@ -72,10 +72,10 @@ func CollectReferences( refsSeen = append( refsSeen, - refSeen{ + RefRoot{ Reference: ref, - walked: walk, - groups: groups, + Walk: walk, + Groups: groups, }, ) } From 757866b5adda4d0cff52d917d48eab0dc92275ae Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:13:34 +0200 Subject: [PATCH 5/8] ScanRepositoryUsingGraph(): take a list of `RefRoot`s as argument --- git-sizer.go | 9 +++- git_sizer_test.go | 110 ++++++++++++++++++++++++++++++---------------- sizes/graph.go | 7 +-- 3 files changed, 81 insertions(+), 45 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 6c9e7a3..0336d13 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -292,7 +292,14 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, refRoots, nameStyle, progressMeter, + ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index b08985b..54d90d5 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -558,14 +558,21 @@ func (rg refGrouper) Groups() []sizes.RefGroup { func TestBomb(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, true, "bomb") - t.Cleanup(func() { repo.Remove(t) }) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) + + newGitBomb(t, testRepo, 10, 10, "boom!\n") - newGitBomb(t, repo, 10, 10, "boom!\n") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, + ctx, repo, + refRoots, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -613,32 +620,39 @@ func TestBomb(t *testing.T) { func TestTaggedTags(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "tagged-tags") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), repo, + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -647,20 +661,27 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "subdir") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd := repo.GitCommand(t, "commit", "-m", "initial") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), testRepo.Repository(t), + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -669,6 +690,8 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() + ctx := context.Background() + tmp, err := ioutil.TempDir("", "submodule") require.NoError(t, err, "creating temporary directory") @@ -678,42 +701,47 @@ func TestSubmodule(t *testing.T) { timestamp := time.Unix(1112911993, 0) - submRepo := testutils.TestRepo{ + submTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "subm"), } - submRepo.Init(t, false) - submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") - submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") - submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainRepo := testutils.TestRepo{ + mainTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "main"), } - mainRepo.Init(t, false) + mainTestRepo.Init(t, false) - mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") - cmd.Dir = mainRepo.Path + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), mainTestRepo.Repository(t), + mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -721,12 +749,18 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2 := testutils.TestRepo{ - Path: filepath.Join(mainRepo.Path, "sub"), + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + h, err = sizes.ScanRepositoryUsingGraph( - context.Background(), submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), submRepo2, + submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/sizes/graph.go b/sizes/graph.go index 59a6365..e9033ef 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -20,16 +20,11 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) - refRoots, err := CollectReferences(ctx, repo, rg) - if err != nil { - return HistorySize{}, fmt.Errorf("reading references: %w", err) - } - objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err From 897baa1a96585fbc44238d0a536c92bf8a11f3ec Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:28:54 +0200 Subject: [PATCH 6/8] RefRoot: add some methods We want to add another type of root, so start the virtualization process. --- sizes/graph.go | 6 +++--- sizes/grouper.go | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index e9033ef..660f682 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -38,11 +38,11 @@ func ScanRepositoryUsingGraph( errChan <- func() error { for _, refRoot := range refRoots { - if !refRoot.Walk { + if !refRoot.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID()); err != nil { return err } } @@ -258,7 +258,7 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing references: %d") for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) + graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index 3807b0e..32d63ca 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -45,11 +45,16 @@ type RefGrouper interface { } type RefRoot struct { - git.Reference - Walk bool - Groups []RefGroupSymbol + ref git.Reference + walk bool + groups []RefGroupSymbol } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, ) ([]RefRoot, error) { @@ -73,9 +78,9 @@ func CollectReferences( refsSeen = append( refsSeen, RefRoot{ - Reference: ref, - Walk: walk, - Groups: groups, + ref: ref, + walk: walk, + groups: groups, }, ) } From 9e8b14fe3012f05c163ffdf79a32bcb2b48ea422 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 20:14:59 +0200 Subject: [PATCH 7/8] Allow arbitrary reachability roots to be fed in Instead of only traversing objects starting at references, allow the user to specify explicit Git objects via the command line. In that case, the traversal includes objects reachable from those objects. --- git-sizer.go | 50 ++++++-- git/obj_resolver.go | 20 +++ git/ref_filter.go | 16 ++- git_sizer_test.go | 178 ++++++++++++++++++-------- internal/refopts/ref_group_builder.go | 9 +- sizes/explicit_root.go | 19 +++ sizes/graph.go | 41 ++++-- sizes/grouper.go | 1 + sizes/path_resolver.go | 60 +++++---- 9 files changed, 290 insertions(+), 104 deletions(-) create mode 100644 git/obj_resolver.go create mode 100644 sizes/explicit_root.go diff --git a/git-sizer.go b/git-sizer.go index 0336d13..7cfd6ff 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -20,7 +20,9 @@ import ( "github.com/github/git-sizer/sizes" ) -const usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -46,12 +48,29 @@ const usage = `usage: git-sizer [OPTS] be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + Reference selection: - By default, git-sizer processes all Git objects that are reachable - from any reference. The following options can be used to limit which - references to process. The last rule matching a reference determines - whether that reference is processed. + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags @@ -220,10 +239,6 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return nil } - if len(flags.Args()) != 0 { - return errors.New("excess arguments") - } - if repoErr != nil { return fmt.Errorf("couldn't open Git repository: %w", repoErr) } @@ -277,7 +292,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progress = v } - rg, err := rgb.Finish() + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { return err } @@ -297,8 +312,21 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("determining which reference to scan: %w", err) } + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + historySize, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, refRoots, nameStyle, progressMeter, + ctx, repo, roots, nameStyle, progressMeter, ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..418e293 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/ref_filter.go b/git/ref_filter.go index 8eb8a9b..46aff66 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -83,15 +83,23 @@ func (_ allReferencesFilter) Filter(_ string) bool { var AllReferencesFilter allReferencesFilter +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, // -// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not -// "refs/foobar". +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". // -// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or -// "refs/foobar". +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if prefix == "" { return AllReferencesFilter diff --git a/git_sizer_test.go b/git_sizer_test.go index 54d90d5..16d58c9 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -567,54 +567,112 @@ func TestBomb(t *testing.T) { repo := testRepo.Repository(t) - refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) - require.NoError(t, err) + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) - h, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, - refRoots, sizes.NameStyleFull, meter.NoProgressMeter, - ) - require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} - assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { @@ -650,9 +708,14 @@ func TestTaggedTags(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), repo, - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -679,9 +742,14 @@ func TestFromSubdir(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), testRepo.Repository(t), - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -738,10 +806,15 @@ func TestSubmodule(t *testing.T) { mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) require.NoError(t, err) + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( context.Background(), mainTestRepo.Repository(t), - mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -758,9 +831,14 @@ func TestSubmodule(t *testing.T) { submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) require.NoError(t, err) + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) + } + h, err = sizes.ScanRepositoryUsingGraph( context.Background(), submRepo2, - submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 3c3179e..48f1190 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -254,9 +254,14 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. -func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { if rgb.topLevelGroup.filter == nil { - rgb.topLevelGroup.filter = git.AllReferencesFilter + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } } refGrouper := refGrouper{ diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/graph.go b/sizes/graph.go index 660f682..0fb1c8a 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,6 +11,18 @@ import ( "github.com/github/git-sizer/meter" ) +type Root interface { + Name() string + OID() git.OID + Walk() bool +} + +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol +} + // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -20,7 +32,9 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) @@ -37,12 +51,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refRoot := range refRoots { - if !refRoot.Walk() { + for _, root := range roots { + if !root.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID()); err != nil { + if err := objIter.AddRoot(root.OID()); err != nil { return err } } @@ -256,9 +270,15 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refRoot := range refRoots { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() @@ -310,17 +330,18 @@ func NewGraph(nameStyle NameStyle) *Graph { } // RegisterReference records the specified reference in `g`. -func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { g.historySize.recordReferenceGroup(g, group) } g.historyLock.Unlock() +} - if walked { - g.pathResolver.RecordReference(ref) - } +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } // HistorySize returns the size data that have been collected. diff --git a/sizes/grouper.go b/sizes/grouper.go index 32d63ca..fdaa927 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -50,6 +50,7 @@ type RefRoot struct { groups []RefGroupSymbol } +func (rr RefRoot) Name() string { return rr.ref.Refname } func (rr RefRoot) OID() git.OID { return rr.ref.OID } func (rr RefRoot) Reference() git.Reference { return rr.ref } func (rr RefRoot) Walk() bool { return rr.walk } diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index 2a3bb1c..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,7 +131,9 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": @@ -153,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -161,7 +165,9 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": @@ -274,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified From 5d339ec292a3cc126f802efa98de90ea6a804626 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 19 Aug 2023 15:25:51 +0200 Subject: [PATCH 8/8] There's no reason to make this context cancelable --- git-sizer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 7cfd6ff..0888d78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -113,8 +113,7 @@ var ReleaseVersion string var BuildVersion string func main() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + ctx := context.Background() err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil {