Skip to content

Commit ab48eb9

Browse files
authored
Add system.paging.faults metrics on Windows (#40468)
#### Description Added the `system.paging.faults` metrics on Windows. #### Testing Besides updated tests the change was validated with a local run of the collector and the new metric. #### Documentation N/A since this metric was not documented as not being generated on Windows.
1 parent 4b283be commit ab48eb9

File tree

6 files changed

+204
-50
lines changed

6 files changed

+204
-50
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: receiver/hostmetrics
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add `system.paging.faults` metrics on Windows
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [40468]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [user]

receiver/hostmetricsreceiver/hostmetrics_receiver_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ var allMetrics = []string{
5252
"system.network.errors",
5353
"system.network.io",
5454
"system.network.packets",
55+
"system.paging.faults",
5556
"system.paging.operations",
5657
"system.paging.usage",
5758
}
@@ -64,11 +65,11 @@ var resourceMetrics = []string{
6465
}
6566

6667
var systemSpecificMetrics = map[string][]string{
67-
"linux": {"system.disk.merged", "system.disk.weighted_io_time", "system.filesystem.inodes.usage", "system.paging.faults", "system.processes.created", "system.processes.count"},
68-
"darwin": {"system.filesystem.inodes.usage", "system.paging.faults", "system.processes.count"},
69-
"freebsd": {"system.filesystem.inodes.usage", "system.paging.faults", "system.processes.count"},
70-
"openbsd": {"system.filesystem.inodes.usage", "system.paging.faults", "system.processes.created", "system.processes.count"},
71-
"solaris": {"system.filesystem.inodes.usage", "system.paging.faults"},
68+
"linux": {"system.disk.merged", "system.disk.weighted_io_time", "system.filesystem.inodes.usage", "system.processes.created", "system.processes.count"},
69+
"darwin": {"system.filesystem.inodes.usage", "system.processes.count"},
70+
"freebsd": {"system.filesystem.inodes.usage", "system.processes.count"},
71+
"openbsd": {"system.filesystem.inodes.usage", "system.processes.created", "system.processes.count"},
72+
"solaris": {"system.filesystem.inodes.usage"},
7273
}
7374

7475
func TestGatherMetrics_EndToEnd(t *testing.T) {

receiver/hostmetricsreceiver/internal/scraper/loadscraper/load_scraper_windows.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,10 @@ func newSampler(logger *zap.Logger) (*sampler, error) {
9898
}
9999

100100
func (sw *sampler) startSamplingTicker() {
101+
// Store the sampling frequency in a local variable to avoid race conditions during tests.
102+
frequency := samplingFrequency
101103
go func() {
102-
ticker := time.NewTicker(samplingFrequency)
104+
ticker := time.NewTicker(frequency)
103105
defer ticker.Stop()
104106

105107
sw.sampleLoad()

receiver/hostmetricsreceiver/internal/scraper/pagingscraper/paging_scraper_test.go

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -77,30 +77,42 @@ func TestScrape(t *testing.T) {
7777
require.NoError(t, err)
7878
metrics := md.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics()
7979

80-
// Windows does not currently support the faults metric
8180
expectedMetrics := 4
82-
if runtime.GOOS == "windows" {
83-
expectedMetrics = 3
84-
}
85-
8681
assert.Equal(t, expectedMetrics, md.MetricCount())
8782

88-
startIndex := 0
89-
if runtime.GOOS != "windows" {
90-
assertPageFaultsMetricValid(t, metrics.At(startIndex), test.expectedStartTime)
91-
startIndex++
83+
var pagingUsageMetricIdx, pagingUtilizationMetricIdx, pagingOperationsMetricIdx, pagingFaultsMetricIdx int
84+
for i := 0; i < metrics.Len(); i++ {
85+
metric := metrics.At(i)
86+
switch metric.Name() {
87+
case "system.paging.faults":
88+
pagingFaultsMetricIdx = i
89+
case "system.paging.operations":
90+
pagingOperationsMetricIdx = i
91+
case "system.paging.usage":
92+
pagingUsageMetricIdx = i
93+
case "system.paging.utilization":
94+
pagingUtilizationMetricIdx = i
95+
default:
96+
assert.Fail(t, "Unexpected metric found", metric.Name())
97+
}
9298
}
9399

94-
assertPagingOperationsMetricValid(t, []pmetric.Metric{metrics.At(startIndex)},
100+
// This test historically ensured that some metrics had the same timestamp, keeping this legacy behavior.
101+
102+
assertPageFaultsMetricValid(t, metrics.At(pagingFaultsMetricIdx), test.expectedStartTime)
103+
104+
assertPagingOperationsMetricValid(t, []pmetric.Metric{metrics.At(pagingOperationsMetricIdx)},
95105
test.expectedStartTime, false)
96106

97-
internal.AssertSameTimeStampForMetrics(t, metrics, 0, metrics.Len()-2)
98-
startIndex++
107+
internal.AssertSameTimeStampForMetrics(t, metrics, pagingUsageMetricIdx, pagingUsageMetricIdx+2)
108+
109+
assertPagingUsageMetricValid(t, metrics.At(pagingUsageMetricIdx))
110+
if runtime.GOOS != "windows" {
111+
// On Windows, page faults do not have the same timestamp as paging operations
112+
internal.AssertSameTimeStampForMetrics(t, metrics, pagingFaultsMetricIdx, pagingFaultsMetricIdx+2)
113+
}
99114

100-
assertPagingUsageMetricValid(t, metrics.At(startIndex))
101-
internal.AssertSameTimeStampForMetrics(t, metrics, startIndex, metrics.Len())
102-
startIndex++
103-
assertPagingUtilizationMetricValid(t, metrics.At(startIndex))
115+
assertPagingUtilizationMetricValid(t, metrics.At(pagingUtilizationMetricIdx))
104116
})
105117
}
106118
}

receiver/hostmetricsreceiver/internal/scraper/pagingscraper/paging_scraper_windows.go

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@ const (
2828

2929
memory = "Memory"
3030

31+
// The counters below are per second rates, but, instead of reading the calculated rates,
32+
// we read the raw values and post them as cumulative metrics.
3133
pageReadsPerSec = "Page Reads/sec"
3234
pageWritesPerSec = "Page Writes/sec"
35+
pageFaultsPerSec = "Page Faults/sec" // All page faults, including minor and major, aka soft and hard faults
36+
pageMajPerSec = "Pages/sec" // Only major, aka hard, page faults.
3337
)
3438

3539
// scraper for Paging Metrics
@@ -38,9 +42,11 @@ type pagingScraper struct {
3842
config *Config
3943
mb *metadata.MetricsBuilder
4044

41-
pageReadsPerfCounter winperfcounters.PerfCounterWatcher
42-
pageWritesPerfCounter winperfcounters.PerfCounterWatcher
43-
skipScrape bool
45+
pageReadsPerfCounter winperfcounters.PerfCounterWatcher
46+
pageWritesPerfCounter winperfcounters.PerfCounterWatcher
47+
pageFaultsPerfCounter winperfcounters.PerfCounterWatcher
48+
pageMajFaultsPerfCounter winperfcounters.PerfCounterWatcher
49+
skipScrape bool
4450

4551
// for mocking
4652
bootTime func(context.Context) (uint64, error)
@@ -79,6 +85,18 @@ func (s *pagingScraper) start(ctx context.Context, _ component.Host) error {
7985
s.skipScrape = true
8086
}
8187

88+
s.pageFaultsPerfCounter, err = s.perfCounterFactory(memory, "", pageFaultsPerSec)
89+
if err != nil {
90+
s.settings.Logger.Error("Failed to create performance counter for page faults / sec", zap.Error(err))
91+
s.skipScrape = true
92+
}
93+
94+
s.pageMajFaultsPerfCounter, err = s.perfCounterFactory(memory, "", pageMajPerSec)
95+
if err != nil {
96+
s.settings.Logger.Error("Failed to create performance counter for major, aka hard, page faults / sec", zap.Error(err))
97+
s.skipScrape = true
98+
}
99+
82100
return nil
83101
}
84102

@@ -99,6 +117,8 @@ func (s *pagingScraper) scrape(context.Context) (pmetric.Metrics, error) {
99117
errors.AddPartial(pagingMetricsLen, err)
100118
}
101119

120+
s.scrapePagingFaultsMetric(&errors)
121+
102122
return s.mb.Emit(), errors.Combine()
103123
}
104124

@@ -152,3 +172,33 @@ func (s *pagingScraper) scrapePagingOperationsMetric() error {
152172

153173
return nil
154174
}
175+
176+
func (s *pagingScraper) scrapePagingFaultsMetric(errors *scrapererror.ScrapeErrors) {
177+
now := pcommon.NewTimestampFromTime(time.Now())
178+
179+
var pageMajFaultsPerSecValue int64
180+
pageMajFaultsHasValue, err := s.pageMajFaultsPerfCounter.ScrapeRawValue(&pageMajFaultsPerSecValue)
181+
if err != nil {
182+
// Count is 2 since without major page faults none of the paging metrics will be recorded
183+
errors.AddPartial(2, err)
184+
return
185+
}
186+
if !pageMajFaultsHasValue {
187+
s.settings.Logger.Debug(
188+
"Skipping paging faults metrics as no value was scraped for 'Pages/sec' performance counter")
189+
return
190+
}
191+
s.mb.RecordSystemPagingFaultsDataPoint(now, pageMajFaultsPerSecValue, metadata.AttributeTypeMajor)
192+
193+
var pageFaultsPerSecValue int64
194+
pageFaultsHasValue, err := s.pageFaultsPerfCounter.ScrapeRawValue(&pageFaultsPerSecValue)
195+
if err != nil {
196+
errors.AddPartial(1, err)
197+
}
198+
if pageFaultsHasValue {
199+
s.mb.RecordSystemPagingFaultsDataPoint(now, pageFaultsPerSecValue-pageMajFaultsPerSecValue, metadata.AttributeTypeMinor)
200+
} else {
201+
s.settings.Logger.Debug(
202+
"Skipping minor paging faults metric as no value was scraped for 'Page Faults/sec' performance counter")
203+
}
204+
}

receiver/hostmetricsreceiver/internal/scraper/pagingscraper/paging_scraper_windows_test.go

Lines changed: 87 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ func TestScrape_Errors(t *testing.T) {
2929
getPageFileStats func() ([]*pageFileStats, error)
3030
pageReadsScrapeErr error
3131
pageWritesScrapeErr error
32+
pageMajFaultsScrapeErr error
33+
pageFaultsScrapeErr error
3234
expectedErr string
3335
expectedErrCount int
3436
expectedUsedValue int64
@@ -71,11 +73,32 @@ func TestScrape_Errors(t *testing.T) {
7173
expectedErrCount: pagingMetricsLen,
7274
},
7375
{
74-
name: "multipleErrors",
75-
getPageFileStats: func() ([]*pageFileStats, error) { return nil, errors.New("err1") },
76-
pageReadsScrapeErr: errors.New("err2"),
77-
expectedErr: "failed to read page file stats: err1; err2",
78-
expectedErrCount: pagingUsageMetricsLen + pagingMetricsLen,
76+
name: "pageMajFaultsScrapeError",
77+
pageMajFaultsScrapeErr: errors.New("err3"),
78+
expectedErr: "err3",
79+
expectedErrCount: 2, // If major faults failed to be scraped, the code can't report minor faults either
80+
},
81+
{
82+
name: "pageFaultsScrapeError",
83+
pageFaultsScrapeErr: errors.New("err4"),
84+
expectedErr: "err4",
85+
expectedErrCount: 1,
86+
},
87+
{
88+
name: "multipleErrors-majorFaultErr",
89+
getPageFileStats: func() ([]*pageFileStats, error) { return nil, errors.New("err1") },
90+
pageReadsScrapeErr: errors.New("err2"),
91+
pageMajFaultsScrapeErr: errors.New("err3"),
92+
expectedErr: "failed to read page file stats: err1; err2; err3",
93+
expectedErrCount: 4, // If major faults failed to be scraped, the code can't report minor faults either
94+
},
95+
{
96+
name: "multipleErrors-minorFaultErr",
97+
getPageFileStats: func() ([]*pageFileStats, error) { return nil, errors.New("err1") },
98+
pageReadsScrapeErr: errors.New("err2"),
99+
pageFaultsScrapeErr: errors.New("err3"),
100+
expectedErr: "failed to read page file stats: err1; err2; err3",
101+
expectedErrCount: 3,
79102
},
80103
}
81104

@@ -98,19 +121,46 @@ func TestScrape_Errors(t *testing.T) {
98121
assert.Zero(t, pageSize%4096) // page size on Windows should always be a multiple of 4KB
99122
}
100123

124+
const (
125+
defaultPageReadsPerSec int64 = 1000
126+
defaultPageWritesPerSec int64 = 500
127+
defaultPageFaultsPerSec int64 = 300
128+
defaultPageMajPerSec int64 = 200
129+
)
101130
scraper.perfCounterFactory = func(_, _, counter string) (winperfcounters.PerfCounterWatcher, error) {
102-
if counter == pageReadsPerSec && test.pageReadsScrapeErr != nil {
103-
return &testmocks.PerfCounterWatcherMock{
104-
ScrapeErr: test.pageReadsScrapeErr,
105-
}, nil
106-
}
107-
if counter == pageWritesPerSec && test.pageWritesScrapeErr != nil {
108-
return &testmocks.PerfCounterWatcherMock{
109-
ScrapeErr: test.pageWritesScrapeErr,
110-
}, nil
131+
perfCounterMock := &testmocks.PerfCounterWatcherMock{}
132+
switch counter {
133+
case pageReadsPerSec:
134+
perfCounterMock.Val = defaultPageReadsPerSec
135+
if test.pageReadsScrapeErr != nil {
136+
return &testmocks.PerfCounterWatcherMock{
137+
ScrapeErr: test.pageReadsScrapeErr,
138+
}, nil
139+
}
140+
case pageWritesPerSec:
141+
perfCounterMock.Val = defaultPageWritesPerSec
142+
if test.pageWritesScrapeErr != nil {
143+
return &testmocks.PerfCounterWatcherMock{
144+
ScrapeErr: test.pageWritesScrapeErr,
145+
}, nil
146+
}
147+
case pageFaultsPerSec:
148+
perfCounterMock.Val = defaultPageFaultsPerSec
149+
if test.pageFaultsScrapeErr != nil {
150+
return &testmocks.PerfCounterWatcherMock{
151+
ScrapeErr: test.pageFaultsScrapeErr,
152+
}, nil
153+
}
154+
case pageMajPerSec:
155+
perfCounterMock.Val = defaultPageMajPerSec
156+
if test.pageMajFaultsScrapeErr != nil {
157+
return &testmocks.PerfCounterWatcherMock{
158+
ScrapeErr: test.pageMajFaultsScrapeErr,
159+
}, nil
160+
}
111161
}
112162

113-
return &testmocks.PerfCounterWatcherMock{}, nil
163+
return perfCounterMock, nil
114164
}
115165

116166
err := scraper.start(context.Background(), componenttest.NewNopHost())
@@ -134,13 +184,25 @@ func TestScrape_Errors(t *testing.T) {
134184
assert.NoError(t, err)
135185

136186
metrics := md.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics()
137-
pagingUsageMetric := metrics.At(1)
138-
assert.Equal(t, test.expectedUsedValue, pagingUsageMetric.Sum().DataPoints().At(0).IntValue())
139-
assert.Equal(t, test.expectedFreeValue, pagingUsageMetric.Sum().DataPoints().At(1).IntValue())
140-
141-
pagingUtilizationMetric := metrics.At(2)
142-
assert.Equal(t, test.expectedUtilizationUsedValue, pagingUtilizationMetric.Gauge().DataPoints().At(0).DoubleValue())
143-
assert.Equal(t, test.expectedUtilizationFreeValue, pagingUtilizationMetric.Gauge().DataPoints().At(1).DoubleValue())
187+
for i := 0; i < metrics.Len(); i++ {
188+
metric := metrics.At(i)
189+
switch metric.Name() {
190+
case metadata.MetricsInfo.SystemPagingFaults.Name:
191+
assert.Equal(t, defaultPageMajPerSec, metric.Sum().DataPoints().At(0).IntValue())
192+
assert.Equal(t, defaultPageFaultsPerSec-defaultPageMajPerSec, metric.Sum().DataPoints().At(1).IntValue())
193+
case metadata.MetricsInfo.SystemPagingOperations.Name:
194+
assert.Equal(t, defaultPageReadsPerSec, metric.Sum().DataPoints().At(0).IntValue())
195+
assert.Equal(t, defaultPageWritesPerSec, metric.Sum().DataPoints().At(1).IntValue())
196+
case metadata.MetricsInfo.SystemPagingUsage.Name:
197+
assert.Equal(t, test.expectedUsedValue, metric.Sum().DataPoints().At(0).IntValue())
198+
assert.Equal(t, test.expectedFreeValue, metric.Sum().DataPoints().At(1).IntValue())
199+
case metadata.MetricsInfo.SystemPagingUtilization.Name:
200+
assert.Equal(t, test.expectedUtilizationUsedValue, metric.Gauge().DataPoints().At(0).DoubleValue())
201+
assert.Equal(t, test.expectedUtilizationFreeValue, metric.Gauge().DataPoints().At(1).DoubleValue())
202+
default:
203+
assert.Fail(t, "Unexpected metric found", metric.Name())
204+
}
205+
}
144206
})
145207
}
146208
}
@@ -159,10 +221,10 @@ func TestPagingScrapeWithRealData(t *testing.T) {
159221
require.NotNil(t, metrics, "Metrics cannot be nil")
160222

161223
// Expected metric names for paging scraper.
162-
// Note: the `system.paging.faults` is enabled by default, but is not being collected on Windows.
163224
expectedMetrics := map[string]bool{
164-
"system.paging.operations": false,
165-
"system.paging.usage": false,
225+
metadata.MetricsInfo.SystemPagingOperations.Name: false,
226+
metadata.MetricsInfo.SystemPagingUsage.Name: false,
227+
metadata.MetricsInfo.SystemPagingFaults.Name: false,
166228
}
167229

168230
internal.AssertExpectedMetrics(t, expectedMetrics, metrics)

0 commit comments

Comments
 (0)