Skip to content

Commit 0c69f89

Browse files
committed
[exporter/signalfx] Retry property update without tags
Property and tags updates are done in the API same call. Sometimes there are might be too many tags associated with a dimension key/value pair so the backend rejects it. We want to retry the call without tags in that situation given that properties are more valuable and should not be lost because of excessive amount of tags coming with them.
1 parent 7500155 commit 0c69f89

File tree

3 files changed

+102
-16
lines changed

3 files changed

+102
-16
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
2+
change_type: enhancement
3+
4+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
5+
component: exporter/signalfx
6+
7+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
8+
note: Enabling retrying for dimension properties update without tags in case of 400 response error.
9+
10+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
11+
issues: [36044]
12+
13+
# (Optional) One or more lines of additional information to render under the primary note.
14+
# These lines will be padded with 2 spaces and then inserted directly into the document.
15+
# Use pipe (|) for multiline entries.
16+
subtext: |
17+
Property and tag updates are done using the same API call. After this change, the exporter will retry once to sync
18+
properties in case of 400 response error.
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [user]

exporter/signalfxexporter/internal/dimensions/dimclient.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -237,30 +237,32 @@ func (dc *DimensionClient) handleDimensionUpdate(ctx context.Context, dimUpdate
237237

238238
req = req.WithContext(
239239
context.WithValue(req.Context(), RequestFailedCallbackKey, RequestFailedCallback(func(statusCode int, err error) {
240-
if statusCode >= 400 && statusCode < 500 && statusCode != 404 {
241-
dc.logger.Error(
242-
"Unable to update dimension, not retrying",
243-
zap.Error(err),
244-
zap.String("URL", sanitize.URL(req.URL)),
245-
zap.String("dimensionUpdate", dimUpdate.String()),
246-
zap.Int("statusCode", statusCode),
247-
)
248-
249-
// Don't retry if it is a 4xx error (except 404) since these
250-
// imply an input/auth error, which is not going to be remedied
251-
// by retrying.
252-
// 404 errors are special because they can occur due to races
253-
// within the dimension patch endpoint.
254-
return
240+
retry := false
241+
retryMsg := "not retrying"
242+
if statusCode == 400 && len(dimUpdate.Tags) > 0 {
243+
// It's possible that number of tags is too large. In this case,
244+
// we should retry the request without tags to update the dimension properties at least.
245+
dimUpdate.Tags = nil
246+
retry = true
247+
retryMsg = "retrying without tags"
248+
} else if statusCode == 404 || statusCode >= 500 {
249+
// Retry on 5xx server errors or 404s which can occur due to races within the dimension patch endpoint.
250+
retry = true
251+
retryMsg = "retrying"
255252
}
256253

257254
dc.logger.Error(
258-
"Unable to update dimension, retrying",
255+
"Unable to update dimension, "+retryMsg,
259256
zap.Error(err),
260257
zap.String("URL", sanitize.URL(req.URL)),
261258
zap.String("dimensionUpdate", dimUpdate.String()),
262259
zap.Int("statusCode", statusCode),
263260
)
261+
262+
if !retry {
263+
return
264+
}
265+
264266
// The retry is meant to provide some measure of robustness against
265267
// temporary API failures. If the API is down for significant
266268
// periods of time, dimension updates will probably eventually back

exporter/signalfxexporter/internal/dimensions/dimclient_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,63 @@ func TestDimensionClient(t *testing.T) {
273273
require.EqualValues(t, 2, server.requestCount.Load())
274274
})
275275

276+
t.Run("successful retry without tags on 400 response", func(t *testing.T) {
277+
server.reset()
278+
server.respCode = http.StatusBadRequest
279+
280+
require.NoError(t, client.acceptDimension(&DimensionUpdate{
281+
Name: "AWSUniqueID",
282+
Value: "abcd",
283+
Properties: map[string]*string{
284+
"c": newString("d"),
285+
},
286+
Tags: map[string]bool{
287+
"running": true,
288+
},
289+
}))
290+
server.handleRequest()
291+
require.Empty(t, server.acceptedDims)
292+
293+
// The next successful request should be sent without tags.
294+
server.respCode = http.StatusOK
295+
server.handleRequest()
296+
require.Equal(t, []dim{
297+
{
298+
Key: "AWSUniqueID",
299+
Value: "abcd",
300+
Properties: map[string]*string{
301+
"c": newString("d"),
302+
},
303+
},
304+
}, server.acceptedDims)
305+
require.EqualValues(t, 2, server.requestCount.Load())
306+
})
307+
308+
t.Run("retry without tags only once", func(t *testing.T) {
309+
server.reset()
310+
server.respCode = http.StatusBadRequest
311+
312+
require.NoError(t, client.acceptDimension(&DimensionUpdate{
313+
Name: "AWSUniqueID",
314+
Value: "abcd",
315+
Properties: map[string]*string{
316+
"c": newString("d"),
317+
},
318+
Tags: map[string]bool{
319+
"running": true,
320+
},
321+
}))
322+
server.handleRequest()
323+
require.Empty(t, server.acceptedDims)
324+
325+
// handle retry
326+
server.handleRequest()
327+
328+
// ensure no more retries
329+
time.Sleep(100 * time.Millisecond)
330+
require.EqualValues(t, 2, server.requestCount.Load())
331+
})
332+
276333
t.Run("send successive quick updates to same dim", func(t *testing.T) {
277334
server.reset()
278335

0 commit comments

Comments
 (0)