Skip to content

Commit ee47256

Browse files
authored
Add a converter to disabled excessive internal metrics (#4276)
0.93.0 version add a significant amount of internal metrics upstream. We want disable them on the prometheus receiver level until it's handled upstream.
1 parent ec0f6a2 commit ee47256

15 files changed

+769
-0
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Copyright Splunk, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package configconverter
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"strings"
21+
22+
"github.com/prometheus/common/model"
23+
"github.com/prometheus/prometheus/model/relabel"
24+
"go.opentelemetry.io/collector/confmap"
25+
)
26+
27+
const (
28+
// promJobNamePrefix is the name prefix of the prometheus jobs that scrapes internal otel-collector metrics.
29+
promJobNamePrefix = "otel-"
30+
31+
// Metric patterns to drop.
32+
rpcMetricPattern = "otelcol_rpc_.*"
33+
httpMetricPattern = "otelcol_http_.*"
34+
batchMetricPattern = "otelcol_processor_batch_.*"
35+
)
36+
37+
// promScrapeConfigsKeys are possible keys to get the config of a prometheus receiver scrapings internal collector metrics.
38+
var promScrapeConfigsKeys = []string{
39+
"receivers::prometheus/internal::config::scrape_configs",
40+
"receivers::prometheus/agent::config::scrape_configs",
41+
"receivers::prometheus/k8s_cluster_receiver::config::scrape_configs",
42+
"receivers::prometheus/collector::config::scrape_configs",
43+
}
44+
45+
// DisableExcessiveInternalMetrics is a MapConverter that updates config of the prometheus receiver scraping internal
46+
// collector metrics to drop excessive internal metrics matching the following patterns:
47+
// - "otelcol_rpc_.*"
48+
// - "otelcol_http_.*"
49+
// - "otelcol_processor_batch_.*"
50+
type DisableExcessiveInternalMetrics struct{}
51+
52+
func (DisableExcessiveInternalMetrics) Convert(_ context.Context, cfgMap *confmap.Conf) error {
53+
if cfgMap == nil {
54+
return fmt.Errorf("cannot DisableExcessiveInternalMetrics on nil *confmap.Conf")
55+
}
56+
57+
for _, promScrapeConfigsKey := range promScrapeConfigsKeys {
58+
scrapeConfMap := cfgMap.Get(promScrapeConfigsKey)
59+
if scrapeConfMap == nil {
60+
continue
61+
}
62+
63+
scrapeConfigs, ok := scrapeConfMap.([]any)
64+
if !ok {
65+
continue // Ignore invalid scrape_configs, as they will be caught by the config validation.
66+
}
67+
68+
for _, scrapeConfig := range scrapeConfigs {
69+
sc, ok := scrapeConfig.(map[string]any)
70+
if !ok {
71+
continue // Ignore Ignore invalid metric_relabel_configs, as they will be caught by the config validation.
72+
}
73+
jobName, ok := sc["job_name"]
74+
if !ok || !strings.HasPrefix(jobName.(string), promJobNamePrefix) {
75+
continue
76+
}
77+
78+
metricRelabelConfigs := sc["metric_relabel_configs"]
79+
if metricRelabelConfigs == nil {
80+
metricRelabelConfigs = make([]any, 0, 3)
81+
}
82+
mrcs, ok := metricRelabelConfigs.([]any)
83+
if !ok {
84+
continue // Ignore invalid metric_relabel_configs, as they will be caught by the config validation.
85+
}
86+
87+
foundRegexPatterns := make(map[string]bool)
88+
for _, metricRelabelConfig := range mrcs {
89+
mrc, ok := metricRelabelConfig.(map[string]any)
90+
if !ok {
91+
continue // Ignore invalid metric_relabel_config, as they will be caught by the config validation.
92+
}
93+
sourceLabels, ok := mrc["source_labels"].([]any)
94+
if !ok || len(sourceLabels) != 1 || sourceLabels[0] != model.MetricNameLabel {
95+
continue
96+
}
97+
regex, ok := mrc["regex"].(string)
98+
if !ok {
99+
continue
100+
}
101+
foundRegexPatterns[regex] = true
102+
}
103+
104+
for _, pattern := range []string{rpcMetricPattern, httpMetricPattern, batchMetricPattern} {
105+
if !foundRegexPatterns[pattern] {
106+
mrcs = append(mrcs, map[string]any{
107+
"source_labels": []any{model.MetricNameLabel},
108+
"regex": pattern,
109+
"action": string(relabel.Drop),
110+
})
111+
}
112+
}
113+
114+
sc["metric_relabel_configs"] = mrcs
115+
}
116+
117+
// Update the config with the new scrape_configs.
118+
if err := cfgMap.Merge(confmap.NewFromStringMap(map[string]any{promScrapeConfigsKey: scrapeConfigs})); err != nil {
119+
return err
120+
}
121+
}
122+
123+
return nil
124+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Copyright Splunk, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package configconverter
16+
17+
import (
18+
"context"
19+
"testing"
20+
21+
"github.com/stretchr/testify/assert"
22+
"github.com/stretchr/testify/require"
23+
"go.opentelemetry.io/collector/confmap/confmaptest"
24+
)
25+
26+
func TestDisableExcessiveInternalMetrics(t *testing.T) {
27+
tests := []struct {
28+
name string
29+
input string
30+
wantOutput string
31+
}{
32+
{
33+
name: "no_prom_receiver",
34+
input: "testdata/disable_excessive_internal_metrics/no_prom_receiver.yaml",
35+
wantOutput: "testdata/disable_excessive_internal_metrics/no_prom_receiver.yaml",
36+
},
37+
{
38+
name: "no_scrape_configs_prom_receiver",
39+
input: "testdata/disable_excessive_internal_metrics/no_scrape_configs.yaml",
40+
wantOutput: "testdata/disable_excessive_internal_metrics/no_scrape_configs.yaml",
41+
},
42+
{
43+
name: "different_job",
44+
input: "testdata/disable_excessive_internal_metrics/different_job.yaml",
45+
wantOutput: "testdata/disable_excessive_internal_metrics/different_job.yaml",
46+
},
47+
{
48+
name: "no_metric_relabel_configs_set",
49+
input: "testdata/disable_excessive_internal_metrics/no_metric_relabel_configs_set_input.yaml",
50+
wantOutput: "testdata/disable_excessive_internal_metrics/no_metric_relabel_configs_set_output.yaml",
51+
},
52+
{
53+
name: "metric_relabel_configs_with_other_actions",
54+
input: "testdata/disable_excessive_internal_metrics/metric_relabel_configs_with_other_actions_input.yaml",
55+
wantOutput: "testdata/disable_excessive_internal_metrics/metric_relabel_configs_with_other_actions_output.yaml",
56+
},
57+
{
58+
name: "metric_relabel_configs_with_batch_drop_action",
59+
input: "testdata/disable_excessive_internal_metrics/metric_relabel_configs_with_batch_drop_action_input.yaml",
60+
wantOutput: "testdata/disable_excessive_internal_metrics/metric_relabel_configs_with_batch_drop_action_output.yaml",
61+
},
62+
{
63+
name: "all_metric_relabel_configs_are_present",
64+
input: "testdata/disable_excessive_internal_metrics/all_metric_relabel_configs_are_present.yaml",
65+
wantOutput: "testdata/disable_excessive_internal_metrics/all_metric_relabel_configs_are_present.yaml",
66+
},
67+
}
68+
for _, tt := range tests {
69+
t.Run(tt.name, func(t *testing.T) {
70+
expectedCfgMap, err := confmaptest.LoadConf(tt.wantOutput)
71+
require.NoError(t, err)
72+
require.NotNil(t, expectedCfgMap)
73+
74+
cfgMap, err := confmaptest.LoadConf(tt.input)
75+
require.NoError(t, err)
76+
require.NotNil(t, cfgMap)
77+
78+
err = DisableExcessiveInternalMetrics{}.Convert(context.Background(), cfgMap)
79+
require.NoError(t, err)
80+
81+
assert.Equal(t, expectedCfgMap, cfgMap)
82+
})
83+
}
84+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
receivers:
2+
prometheus/agent:
3+
config:
4+
scrape_configs:
5+
- job_name: 'otel-agent'
6+
scrape_interval: 10s
7+
static_configs:
8+
- targets: ["${SPLUNK_LISTEN_INTERFACE}:8888"]
9+
metric_relabel_configs:
10+
- source_labels: [ __name__ ]
11+
regex: 'otelcol_rpc_.*'
12+
action: keep
13+
- source_labels: [ __name__ ]
14+
regex: 'otelcol_processor_batch_.*'
15+
action: drop
16+
- source_labels: [ __name__ ]
17+
regex: 'otelcol_http_.*'
18+
processors:
19+
memory_limiter:
20+
check_interval: 1s
21+
limit_mib: 4000
22+
spike_limit_mib: 800
23+
ballast_size_mib: 64
24+
exporters:
25+
debug:
26+
verbosity: normal
27+
sampling_initial: 2
28+
sampling_thereafter: 500
29+
service:
30+
pipelines:
31+
metrics:
32+
receivers:
33+
- prometheus/agent
34+
processors:
35+
- memory_limiter
36+
exporters:
37+
- logging
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
receivers:
2+
prometheus/internal:
3+
config:
4+
scrape_configs:
5+
- job_name: 'another-job'
6+
scrape_interval: 10s
7+
static_configs:
8+
- targets: ["${SPLUNK_LISTEN_INTERFACE}:8888"]
9+
metric_relabel_configs:
10+
- source_labels: [ __name__ ]
11+
regex: 'my-metric-.*'
12+
action: drop
13+
processors:
14+
memory_limiter:
15+
check_interval: 1s
16+
limit_mib: 4000
17+
spike_limit_mib: 800
18+
ballast_size_mib: 64
19+
exporters:
20+
debug:
21+
verbosity: normal
22+
sampling_initial: 2
23+
sampling_thereafter: 500
24+
service:
25+
pipelines:
26+
metrics:
27+
receivers:
28+
- prometheus/internal
29+
processors:
30+
- memory_limiter
31+
exporters:
32+
- logging
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
receivers:
2+
prometheus/internal:
3+
config:
4+
scrape_configs:
5+
- job_name: 'otel-collector'
6+
scrape_interval: 10s
7+
static_configs:
8+
- targets: ["${SPLUNK_LISTEN_INTERFACE}:8888"]
9+
metric_relabel_configs:
10+
- source_labels: [ __name__ ]
11+
regex: 'otelcol_processor_batch_.*'
12+
action: drop
13+
processors:
14+
memory_limiter:
15+
check_interval: 1s
16+
limit_mib: 4000
17+
spike_limit_mib: 800
18+
ballast_size_mib: 64
19+
exporters:
20+
debug:
21+
verbosity: normal
22+
sampling_initial: 2
23+
sampling_thereafter: 500
24+
service:
25+
pipelines:
26+
metrics:
27+
receivers:
28+
- prometheus/internal
29+
processors:
30+
- memory_limiter
31+
exporters:
32+
- logging
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
receivers:
2+
prometheus/internal:
3+
config:
4+
scrape_configs:
5+
- job_name: 'otel-collector'
6+
scrape_interval: 10s
7+
static_configs:
8+
- targets: ["${SPLUNK_LISTEN_INTERFACE}:8888"]
9+
metric_relabel_configs:
10+
- source_labels: [ __name__ ]
11+
regex: 'otelcol_processor_batch_.*'
12+
action: drop
13+
- source_labels: [ __name__ ]
14+
regex: 'otelcol_rpc_.*'
15+
action: drop
16+
- source_labels: [ __name__ ]
17+
regex: 'otelcol_http_.*'
18+
action: drop
19+
processors:
20+
memory_limiter:
21+
check_interval: 1s
22+
limit_mib: 4000
23+
spike_limit_mib: 800
24+
ballast_size_mib: 64
25+
exporters:
26+
debug:
27+
verbosity: normal
28+
sampling_initial: 2
29+
sampling_thereafter: 500
30+
service:
31+
pipelines:
32+
metrics:
33+
receivers:
34+
- prometheus/internal
35+
processors:
36+
- memory_limiter
37+
exporters:
38+
- logging
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
receivers:
2+
prometheus/internal:
3+
config:
4+
scrape_configs:
5+
- job_name: 'otel-collector'
6+
scrape_interval: 10s
7+
static_configs:
8+
- targets: ["${SPLUNK_LISTEN_INTERFACE}:8888"]
9+
metric_relabel_configs:
10+
- source_labels: [ __name__ ]
11+
regex: '.*grpc_io.*'
12+
action: drop
13+
- source_labels: [ __name__ ]
14+
target_label: 'label1'
15+
action: labeldrop
16+
- source_labels: [subsystem, server]
17+
separator: "@"
18+
regex: "(.*)@(.*)"
19+
replacement: "${2}/${1}"
20+
processors:
21+
memory_limiter:
22+
check_interval: 1s
23+
limit_mib: 4000
24+
spike_limit_mib: 800
25+
ballast_size_mib: 64
26+
exporters:
27+
debug:
28+
verbosity: normal
29+
sampling_initial: 2
30+
sampling_thereafter: 500
31+
service:
32+
pipelines:
33+
metrics:
34+
receivers:
35+
- prometheus/internal
36+
processors:
37+
- memory_limiter
38+
exporters:
39+
- logging

0 commit comments

Comments
 (0)