From 9d71973b7c0ce12a4a05dce16ba0535863921316 Mon Sep 17 00:00:00 2001 From: elhmn Date: Mon, 25 Sep 2023 13:05:54 +0200 Subject: [PATCH 1/4] Don't search/list every single repo reference when explicit references are provided --- git-sizer.go | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 0888d78..149db80 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -306,22 +306,27 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - refRoots, err := sizes.CollectReferences(ctx, repo, rg) - if err != nil { - return fmt.Errorf("determining which reference to scan: %w", err) - } - - roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) - for _, refRoot := range refRoots { - roots = append(roots, refRoot) - } - - for _, arg := range flags.Args() { - oid, err := repo.ResolveObject(arg) + var roots []sizes.Root + // If arguments are provided, use them as explicit roots. + if len(flags.Args()) > 0 { + roots = make([]sizes.Root, 0, len(flags.Args())) + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + } else { + refs, err := sizes.CollectReferences(ctx, repo, rg) if err != nil { - return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + return fmt.Errorf("determining which reference to scan: %w", err) + } + + roots = make([]sizes.Root, 0, len(refs)) + for _, ref := range refs { + roots = append(roots, ref) } - roots = append(roots, sizes.NewExplicitRoot(arg, oid)) } historySize, err := sizes.ScanRepositoryUsingGraph( From 721e353ef294d7935685b95854cfa89e6232e7c3 Mon Sep 17 00:00:00 2001 From: elhmn Date: Fri, 29 Sep 2023 13:59:49 +0200 Subject: [PATCH 2/4] Still run `git for-each-ref` when explicit roots and reference filters are specified When explicit roots and reference filters are both provided, we want to make sure that the optimisation don't kick in. As we still want to iterate over all references in order to apply the filters. Here is an example with git-sizer being run with explicit roots and reference filters ``` git-sizer main~20^{tree} main~10^{tree} --tags --branches ``` --- git-sizer.go | 19 +++++++++++++------ git/ref_filter.go | 5 +++++ internal/refopts/ref_group.go | 4 ++++ internal/refopts/ref_group_builder.go | 4 ++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 149db80..8668a9b 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -307,26 +307,33 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st } var roots []sizes.Root + var explicitRoots []sizes.Root // If arguments are provided, use them as explicit roots. if len(flags.Args()) > 0 { - roots = make([]sizes.Root, 0, len(flags.Args())) + explicitRoots = make([]sizes.Root, 0, len(flags.Args())) for _, arg := range flags.Args() { oid, err := repo.ResolveObject(arg) if err != nil { return fmt.Errorf("resolving command-line argument %q: %w", arg, err) } - roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + explicitRoots = append(explicitRoots, sizes.NewExplicitRoot(arg, oid)) } + } + + // If no reference filters and no explicit roots were provided + if git.IsNoReferencesFilter(rgb.GetTopLevelGroup().GetFilter()) { + roots = explicitRoots } else { - refs, err := sizes.CollectReferences(ctx, repo, rg) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) if err != nil { return fmt.Errorf("determining which reference to scan: %w", err) } - roots = make([]sizes.Root, 0, len(refs)) - for _, ref := range refs { - roots = append(roots, ref) + roots = make([]sizes.Root, 0, len(refRoots)+len(explicitRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) } + roots = append(roots, explicitRoots...) } historySize, err := sizes.ScanRepositoryUsingGraph( diff --git a/git/ref_filter.go b/git/ref_filter.go index 46aff66..9b7bf72 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -140,3 +140,8 @@ type regexpFilter struct { func (f regexpFilter) Filter(refname string) bool { return f.re.MatchString(refname) } + +func IsNoReferencesFilter(val interface{}) bool { + _, ok := val.(noReferencesFilter) + return ok +} diff --git a/internal/refopts/ref_group.go b/internal/refopts/ref_group.go index b86b333..e3f7fd5 100644 --- a/internal/refopts/ref_group.go +++ b/internal/refopts/ref_group.go @@ -30,6 +30,10 @@ type refGroup struct { otherRefGroup *sizes.RefGroup } +func (rg *refGroup) GetFilter() git.ReferenceFilter { + return rg.filter +} + func (rg *refGroup) collectSymbols(refname string) (bool, []sizes.RefGroupSymbol) { walk := false var symbols []sizes.RefGroupSymbol diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 48f1190..3ea2b3e 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -22,6 +22,10 @@ type RefGroupBuilder struct { groups map[sizes.RefGroupSymbol]*refGroup } +func (rgb *RefGroupBuilder) GetTopLevelGroup() *refGroup { + return rgb.topLevelGroup +} + // NewRefGroupBuilder creates and returns a `RefGroupBuilder` // instance. func NewRefGroupBuilder(configger Configger) (*RefGroupBuilder, error) { From 0315cbab50567b6474748d381bc7b4c071d59b53 Mon Sep 17 00:00:00 2001 From: elhmn Date: Fri, 29 Sep 2023 18:23:41 +0200 Subject: [PATCH 3/4] Don't print the referecence count when explicit roots are provided We printed out the reference count, even when explicit roots were provided. ``` git-sizer master~50^{tree} 'HEAD@{1.week.ago}' -v [TRUNCATED_OUTPUT] | * References | | | | * Count | 187 | | | * Ignored | 187 | | | | | | [TRUNCATED_OUTPUT] ``` This is not very useful, as the reference count depends on the list of references extracted by running `git for-each-ref`. This commit changes the output to not print the reference count when explicit roots and no reference filters are provided to git-sizer --- git-sizer.go | 8 +++++-- sizes/output.go | 63 ++++++++++++++++++++++++++++--------------------- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 8668a9b..d9aadbd 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -343,6 +343,10 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("error scanning repository: %w", err) } + formatOptions := sizes.FormatOptions{ + WithoutReferenceCount: git.IsNoReferencesFilter(rgb.GetTopLevelGroup().GetFilter()), + } + if jsonOutput { var j []byte var err error @@ -350,7 +354,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st case 1: j, err = json.MarshalIndent(historySize, "", " ") case 2: - j, err = historySize.JSON(rg.Groups(), threshold, nameStyle) + j, err = historySize.JSON(rg.Groups(), threshold, nameStyle, formatOptions) default: return fmt.Errorf("JSON version must be 1 or 2") } @@ -360,7 +364,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st fmt.Fprintf(stdout, "%s\n", j) } else { if _, err := io.WriteString( - stdout, historySize.TableString(rg.Groups(), threshold, nameStyle), + stdout, historySize.TableString(rg.Groups(), threshold, nameStyle, formatOptions), ); err != nil { return fmt.Errorf("writing output: %w", err) } diff --git a/sizes/output.go b/sizes/output.go index 933cc05..42e58d9 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -86,14 +86,14 @@ type section struct { contents []tableContents } -func newSection(name string, contents ...tableContents) *section { - return §ion{ +func newSection(name string, contents ...tableContents) section { + return section{ name: name, contents: contents, } } -func (s *section) Emit(t *table) { +func (s section) Emit(t *table) { for _, c := range s.contents { subTable := t.subTable(s.name) c.Emit(subTable) @@ -101,7 +101,7 @@ func (s *section) Emit(t *table) { } } -func (s *section) CollectItems(items map[string]*item) { +func (s section) CollectItems(items map[string]*item) { for _, c := range s.contents { c.CollectItems(items) } @@ -141,7 +141,7 @@ func newItem( } } -func (i *item) Emit(t *table) { +func (i item) Emit(t *table) { levelOfConcern, interesting := i.levelOfConcern(t.threshold) if !interesting { return @@ -154,7 +154,7 @@ func (i *item) Emit(t *table) { ) } -func (i *item) Footnote(nameStyle NameStyle) string { +func (i item) Footnote(nameStyle NameStyle) string { if i.path == nil || i.path.OID == git.NullOID { return "" } @@ -173,7 +173,7 @@ func (i *item) Footnote(nameStyle NameStyle) string { // If this item's alert level is at least as high as the threshold, // return the string that should be used as its "level of concern" and // `true`; otherwise, return `"", false`. -func (i *item) levelOfConcern(threshold Threshold) (string, bool) { +func (i item) levelOfConcern(threshold Threshold) (string, bool) { value, overflow := i.value.ToUint64() if overflow { return "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", true @@ -188,11 +188,11 @@ func (i *item) levelOfConcern(threshold Threshold) (string, bool) { return stars[:int(alert)], true } -func (i *item) CollectItems(items map[string]*item) { - items[i.symbol] = i +func (i item) CollectItems(items map[string]*item) { + items[i.symbol] = &i } -func (i *item) MarshalJSON() ([]byte, error) { +func (i item) MarshalJSON() ([]byte, error) { // How we want to emit an item as JSON. value, _ := i.value.ToUint64() @@ -224,7 +224,7 @@ func (i *item) MarshalJSON() ([]byte, error) { // Indented returns an `item` that is just like `i`, but indented by // `depth` more levels. -func (i *item) Indented(depth int) tableContents { +func (i item) Indented(depth int) tableContents { return &indentedItem{ tableContents: i, depth: depth, @@ -236,7 +236,7 @@ type indentedItem struct { depth int } -func (i *indentedItem) Emit(t *table) { +func (i indentedItem) Emit(t *table) { subTable := t.indented("", i.depth) i.tableContents.Emit(subTable) t.addSection(subTable) @@ -373,8 +373,9 @@ type table struct { func (s *HistorySize) TableString( refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, + opts FormatOptions, ) string { - contents := s.contents(refGroups) + contents := s.contents(refGroups, opts) t := table{ threshold: threshold, nameStyle: nameStyle, @@ -454,17 +455,20 @@ func (t *table) formatRow( ) } +type FormatOptions struct { + WithoutReferenceCount bool +} + func (s *HistorySize) JSON( - refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, -) ([]byte, error) { - contents := s.contents(refGroups) + refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, opts FormatOptions) ([]byte, error) { + contents := s.contents(refGroups, opts) items := make(map[string]*item) contents.CollectItems(items) j, err := json.MarshalIndent(items, "", " ") return j, err } -func (s *HistorySize) contents(refGroups []RefGroup) tableContents { +func (s *HistorySize) contents(refGroups []RefGroup, opts FormatOptions) tableContents { S := newSection I := newItem metric := counts.Metric @@ -489,6 +493,20 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { rgis = append(rgis, rgi.Indented(indent)) } + var refCountSection section + if !opts.WithoutReferenceCount { + refCountSection = S( + "References", + I("referenceCount", "Count", + "The total number of references", + nil, s.ReferenceCount, metric, "", 25e3), + S( + "", + rgis..., + ), + ) + } + return S( "", S( @@ -533,16 +551,7 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { nil, s.UniqueTagCount, metric, "", 25e3), ), - S( - "References", - I("referenceCount", "Count", - "The total number of references", - nil, s.ReferenceCount, metric, "", 25e3), - S( - "", - rgis..., - ), - ), + refCountSection, ), S("Biggest objects", From d407c86a6bb8be08efb4aaa75838e77cc75de9bc Mon Sep 17 00:00:00 2001 From: elhmn Date: Thu, 5 Oct 2023 12:59:42 +0200 Subject: [PATCH 4/4] Don't print reference count for json v1 The reference count is still displayed when the -j flag is passed to git-sizer. ``` git-sizer master~50^{tree} 'HEAD@{1.week.ago}' -v -j [TRUNCATED_OUTPUT] "reference_count": 15, "reference_groups": { "ignored": 15 }, [TRUNCATED_OUTPUT] ``` We don't want to print the reference count for json v1, when explicit root are not provided git-sizer --- git-sizer.go | 2 +- sizes/output.go | 13 +++++++++++++ sizes/sizes.go | 4 ++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index d9aadbd..339baee 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -352,7 +352,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st var err error switch jsonVersion { case 1: - j, err = json.MarshalIndent(historySize, "", " ") + j, err = json.MarshalIndent(historySize.JsonV1Format(&formatOptions), "", " ") case 2: j, err = historySize.JSON(rg.Groups(), threshold, nameStyle, formatOptions) default: diff --git a/sizes/output.go b/sizes/output.go index 42e58d9..ee8fe27 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -41,6 +41,19 @@ func (s TagSize) String() string { return fmt.Sprintf("tag_depth=%d", s.TagDepth) } +func (s *HistorySize) JsonV1Format(opts *FormatOptions) *HistorySize { + if opts == nil { + return s + } + + if opts.WithoutReferenceCount { + s.ReferenceCount = 0 + s.ReferenceGroups = nil + } + + return s +} + func (s *HistorySize) String() string { return fmt.Sprintf( "unique_commit_count=%d, unique_commit_count = %d, max_commit_size = %d, "+ diff --git a/sizes/sizes.go b/sizes/sizes.go index b3de0bc..41f0339 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -158,11 +158,11 @@ type HistorySize struct { // The number of references analyzed. Note that we don't eliminate // duplicates if the user passes the same reference more than // once. - ReferenceCount counts.Count32 `json:"reference_count"` + ReferenceCount counts.Count32 `json:"reference_count,omitempty"` // ReferenceGroups keeps track of how many references in each // reference group were scanned. - ReferenceGroups map[RefGroupSymbol]*counts.Count32 `json:"reference_groups"` + ReferenceGroups map[RefGroupSymbol]*counts.Count32 `json:"reference_groups,omitempty"` // The maximum TreeSize in the analyzed history (where each // attribute is maximized separately).