Skip to content

Commit 6b02280

Browse files
djaglowskisbylica-splunk
authored andcommitted
[pkg/ottl] Add ConvertTextToElementsXML Converter (open-telemetry#35364)
This adds a converter called `ConvertTextToElementsXML `. This serves as one of the granular transformations described in open-telemetry#35281 which will allow users to migrate any arbitrary XML document into a JSON-equivalent state. Also see open-telemetry#35328
1 parent 99780e3 commit 6b02280

File tree

6 files changed

+305
-0
lines changed

6 files changed

+305
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: pkg/ottl
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add ConvertTextToElements Converter
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [35364]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

pkg/ottl/e2e/e2e_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,18 @@ func Test_e2e_converters(t *testing.T) {
353353
tCtx.GetLogRecord().Attributes().PutStr("test", `<Log><Message>This is a log message!</Message><id>1</id></Log>`)
354354
},
355355
},
356+
{
357+
statement: `set(body, ConvertTextToElementsXML("<a><b/>foo</a>"))`,
358+
want: func(tCtx ottllog.TransformContext) {
359+
tCtx.GetLogRecord().Body().SetStr("<a><b></b><value>foo</value></a>")
360+
},
361+
},
362+
{
363+
statement: `set(body, ConvertTextToElementsXML("<a><b/>foo</a><c><b/>bar</c>", "/a", "custom"))`,
364+
want: func(tCtx ottllog.TransformContext) {
365+
tCtx.GetLogRecord().Body().SetStr("<a><b></b><custom>foo</custom></a><c><b></b>bar</c>")
366+
},
367+
},
356368
{
357369
statement: `set(attributes["test"], Double(1.0))`,
358370
want: func(tCtx ottllog.TransformContext) {

pkg/ottl/ottlfuncs/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ Available Converters:
414414
- [Concat](#concat)
415415
- [ConvertCase](#convertcase)
416416
- [ConvertAttributesToElementsXML](#convertattributestoelementsxml)
417+
- [ConvertTextToElementsXML](#converttexttoelementsxml)
417418
- [Day](#day)
418419
- [Double](#double)
419420
- [Duration](#duration)
@@ -572,6 +573,36 @@ Convert only attributes within "Record" elements
572573

573574
- `ConvertAttributesToElementsXML(body, "/Log/Record")`
574575

576+
### ConvertTextToElementsXML
577+
578+
`ConvertTextToElementsXML(target, Optional[xpath], Optional[elementName])`
579+
580+
The `ConvertTextToElementsXML` Converter returns an edited version of an XML string where all text belongs to a dedicated element.
581+
582+
`target` is a Getter that returns a string. This string should be in XML format.
583+
If `target` is not a string, nil, or cannot be parsed as XML, `ConvertTextToElementsXML` will return an error.
584+
585+
`xpath` (optional) is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that
586+
selects one or more elements. Content will only be converted within the result(s) of the xpath. The default is `/`.
587+
588+
`elementName` (optional) is a string that is used for any element tags that are created to wrap content.
589+
The default is `"value"`.
590+
591+
For example, `<a><b>foo</b>bar</a>` will be converted to `<a><b>foo</b><value>bar</value></a>`.
592+
593+
Examples:
594+
595+
Ensure all text content in a document is wrapped in a dedicated element
596+
597+
- `ConvertTextToElementsXML(body)`
598+
599+
Use a custom name for any new elements
600+
601+
- `ConvertTextToElementsXML(body, elementName = "custom")`
602+
603+
Convert only part of the document
604+
605+
- `ConvertTextToElementsXML(body, "/some/part/", "value")`
575606

576607
### Day
577608

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
5+
6+
import (
7+
"context"
8+
"fmt"
9+
10+
"github.com/antchfx/xmlquery"
11+
12+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
13+
)
14+
15+
type ConvertTextToElementsXMLArguments[K any] struct {
16+
Target ottl.StringGetter[K]
17+
XPath ottl.Optional[string]
18+
ElementName ottl.Optional[string]
19+
}
20+
21+
func NewConvertTextToElementsXMLFactory[K any]() ottl.Factory[K] {
22+
return ottl.NewFactory("ConvertTextToElementsXML", &ConvertTextToElementsXMLArguments[K]{}, createConvertTextToElementsXMLFunction[K])
23+
}
24+
25+
func createConvertTextToElementsXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
26+
args, ok := oArgs.(*ConvertTextToElementsXMLArguments[K])
27+
28+
if !ok {
29+
return nil, fmt.Errorf("ConvertTextToElementsXML args must be of type *ConvertTextToElementsXMLAguments[K]")
30+
}
31+
32+
xPath := args.XPath.Get()
33+
if xPath == "" {
34+
xPath = "/"
35+
} else if err := validateXPath(xPath); err != nil {
36+
return nil, err
37+
}
38+
39+
elementName := args.ElementName.Get()
40+
if elementName == "" {
41+
elementName = "value"
42+
}
43+
44+
return convertTextToElementsXML(args.Target, xPath, elementName), nil
45+
}
46+
47+
// convertTextToElementsXML returns a string that is a result of wrapping any extraneous text nodes with a dedicated element.
48+
func convertTextToElementsXML[K any](target ottl.StringGetter[K], xPath string, elementName string) ottl.ExprFunc[K] {
49+
return func(ctx context.Context, tCtx K) (any, error) {
50+
var doc *xmlquery.Node
51+
if targetVal, err := target.Get(ctx, tCtx); err != nil {
52+
return nil, err
53+
} else if doc, err = parseNodesXML(targetVal); err != nil {
54+
return nil, err
55+
}
56+
for _, n := range xmlquery.Find(doc, xPath) {
57+
convertTextToElementsForNode(n, elementName)
58+
}
59+
return doc.OutputXML(false), nil
60+
}
61+
}
62+
63+
func convertTextToElementsForNode(parent *xmlquery.Node, elementName string) {
64+
switch parent.Type {
65+
case xmlquery.ElementNode: // ok
66+
case xmlquery.DocumentNode: // ok
67+
default:
68+
return
69+
}
70+
71+
if parent.FirstChild == nil {
72+
return
73+
}
74+
75+
// Convert any child nodes and count text and element nodes.
76+
var valueCount, elementCount int
77+
for child := parent.FirstChild; child != nil; child = child.NextSibling {
78+
if child.Type == xmlquery.ElementNode {
79+
convertTextToElementsForNode(child, elementName)
80+
elementCount++
81+
} else if child.Type == xmlquery.TextNode {
82+
valueCount++
83+
}
84+
}
85+
86+
// If there are no values to wrap, or if there is exactly one value OR one element, this node is all set.
87+
if valueCount == 0 || elementCount+valueCount <= 1 {
88+
return
89+
}
90+
91+
// At this point, we either have multiple values, or a mix of values and elements.
92+
// Either way, we need to wrap the values.
93+
for child := parent.FirstChild; child != nil; child = child.NextSibling {
94+
if child.Type != xmlquery.TextNode {
95+
continue
96+
}
97+
newTextNode := &xmlquery.Node{
98+
Type: xmlquery.TextNode,
99+
Data: child.Data,
100+
}
101+
// Change this node into an element
102+
child.Type = xmlquery.ElementNode
103+
child.Data = elementName
104+
child.FirstChild = newTextNode
105+
child.LastChild = newTextNode
106+
}
107+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
5+
6+
import (
7+
"context"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
12+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
13+
)
14+
15+
func Test_ConvertTextToElementsXML(t *testing.T) {
16+
tests := []struct {
17+
name string
18+
document string
19+
xPath string
20+
elementName string
21+
want string
22+
}{
23+
{
24+
name: "nop",
25+
document: `<a><b/></a>`,
26+
want: `<a><b></b></a>`,
27+
},
28+
{
29+
name: "nop declaration",
30+
document: `<?xml version="1.0" encoding="UTF-8"?><a><b/></a>`,
31+
want: `<?xml version="1.0" encoding="UTF-8"?><a><b></b></a>`,
32+
},
33+
{
34+
name: "nop attributes",
35+
document: `<a foo="bar" hello="world"/>`,
36+
want: `<a foo="bar" hello="world"></a>`,
37+
},
38+
{
39+
name: "nop wrapped text",
40+
document: `<a>hello world</a>`,
41+
want: `<a>hello world</a>`,
42+
},
43+
{
44+
name: "simple hanging",
45+
document: `<a><b/>foo</a>`,
46+
want: `<a><b></b><value>foo</value></a>`,
47+
},
48+
{
49+
name: "simple hanging with tag name",
50+
elementName: "bar",
51+
document: `<a><b/>foo</a>`,
52+
want: `<a><b></b><bar>foo</bar></a>`,
53+
},
54+
{
55+
name: "multiple hanging same level",
56+
document: `<a>foo<b/>bar</a>`,
57+
want: `<a><value>foo</value><b></b><value>bar</value></a>`,
58+
},
59+
{
60+
name: "multiple hanging multiple levels",
61+
document: `<a>foo<b/>bar<c/>1<d>not</d>2<e><f/><f/></e></a>`,
62+
elementName: "v",
63+
want: `<a><v>foo</v><b></b><v>bar</v><c></c><v>1</v><d>not</d><v>2</v><e><f></f><f></f></e></a>`,
64+
},
65+
{
66+
name: "xpath select some",
67+
document: `<a><b><c/>foo</b><d><c/>bar</d><b><c/>baz</b></a>`,
68+
xPath: "/a/b",
69+
want: `<a><b><c></c><value>foo</value></b><d><c></c>bar</d><b><c></c><value>baz</value></b></a>`,
70+
},
71+
{
72+
name: "xpath with element name",
73+
document: `<a><b><c/>foo</b><d><c/>bar</d><b><c/>baz</b></a>`,
74+
xPath: "/a/b",
75+
elementName: "V",
76+
want: `<a><b><c></c><V>foo</V></b><d><c></c>bar</d><b><c></c><V>baz</V></b></a>`,
77+
},
78+
}
79+
factory := NewConvertTextToElementsXMLFactory[any]()
80+
for _, tt := range tests {
81+
t.Run(tt.name, func(t *testing.T) {
82+
args := &ConvertTextToElementsXMLArguments[any]{
83+
Target: ottl.StandardStringGetter[any]{
84+
Getter: func(_ context.Context, _ any) (any, error) {
85+
return tt.document, nil
86+
},
87+
},
88+
XPath: ottl.NewTestingOptional(tt.xPath),
89+
ElementName: ottl.NewTestingOptional(tt.elementName),
90+
}
91+
exprFunc, err := factory.CreateFunction(ottl.FunctionContext{}, args)
92+
assert.NoError(t, err)
93+
94+
result, err := exprFunc(context.Background(), nil)
95+
assert.NoError(t, err)
96+
assert.Equal(t, tt.want, result)
97+
})
98+
}
99+
}
100+
101+
func TestCreateConvertTextToElementsXMLFunc(t *testing.T) {
102+
factory := NewConvertTextToElementsXMLFactory[any]()
103+
fCtx := ottl.FunctionContext{}
104+
105+
// Invalid arg type
106+
exprFunc, err := factory.CreateFunction(fCtx, nil)
107+
assert.Error(t, err)
108+
assert.Nil(t, exprFunc)
109+
110+
// Invalid XPath should error on function creation
111+
exprFunc, err = factory.CreateFunction(
112+
fCtx, &ConvertTextToElementsXMLArguments[any]{
113+
XPath: ottl.NewTestingOptional("!"),
114+
})
115+
assert.Error(t, err)
116+
assert.Nil(t, exprFunc)
117+
118+
// Invalid XML should error on function execution
119+
exprFunc, err = factory.CreateFunction(
120+
fCtx, &ConvertTextToElementsXMLArguments[any]{
121+
Target: invalidXMLGetter(),
122+
})
123+
assert.NoError(t, err)
124+
assert.NotNil(t, exprFunc)
125+
_, err = exprFunc(context.Background(), nil)
126+
assert.Error(t, err)
127+
}

pkg/ottl/ottlfuncs/functions.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func converters[K any]() []ottl.Factory[K] {
4141
NewConcatFactory[K](),
4242
NewConvertCaseFactory[K](),
4343
NewConvertAttributesToElementsXMLFactory[K](),
44+
NewConvertTextToElementsXMLFactory[K](),
4445
NewDayFactory[K](),
4546
NewDoubleFactory[K](),
4647
NewDurationFactory[K](),

0 commit comments

Comments
 (0)