From 06236d3bd87ec4fb4b3f12f5e2835a04dee2b639 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 18:36:02 +0200 Subject: [PATCH 1/2] Fix spec compliance error for DOMDocument::getElementsByTagNameNS Spec link: https://dom.spec.whatwg.org/#concept-getelementsbytagnamens Spec says we should match any namespace when '*' is provided. This was however not the case: elements that didn't have a namespace were not returned. This patch fixes the error by modifying the namespace check. --- ext/dom/php_dom.c | 7 +- ...ementsByTagNameNS_match_any_namespace.phpt | 82 +++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 01a206c0985bd..468a6c3eea1c3 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1270,10 +1270,15 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l { xmlNodePtr ret = NULL; + /* Note: The spec says that ns == NULL should change the namespace to the empty string. + * This is already done at the PHP_FUNCTIONs that create this iterator. + * The null case here actually means we want to match any namespace instead. */ + bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0'); + while (nodep != NULL && (*cur <= index || index == -1)) { if (nodep->type == XML_ELEMENT_NODE) { if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) { - if (ns == NULL || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && (xmlStrEqual(nodep->ns->href, (xmlChar *)ns) || xmlStrEqual((xmlChar *)"*", (xmlChar *)ns)))) { + if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { if (*cur == index) { ret = nodep; break; diff --git a/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt b/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt new file mode 100644 index 0000000000000..460832fcb749f --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt @@ -0,0 +1,82 @@ +--TEST-- +DOMDocument::getElementsByTagNameNS() match any namespace +--EXTENSIONS-- +dom +--FILE-- + + +Books of the other guy.. + + + + xinclude: book.xml not found + + + + This is another namespace + + + +EOD; +$dom = new DOMDocument; + +// load the XML string defined above +$dom->loadXML($xml); + +function test($namespace, $local) { + global $dom; + $namespace_str = $namespace !== NULL ? "'$namespace'" : "null"; + echo "-- getElementsByTagNameNS($namespace_str, '$local') --\n"; + foreach ($dom->getElementsByTagNameNS($namespace, $local) as $element) { + echo 'local name: ', $element->localName, ', prefix: ', $element->prefix, "\n"; + } +} + +// Should *also* include objects even without a namespace +test(null, '*'); +// Should *also* include objects even without a namespace +test('*', '*'); +// Should *only* include objects without a namespace +test('', '*'); +// Should *only* include objects with the specified namespace +test('http://www.w3.org/2001/XInclude', '*'); +// Should not give any output +test('', 'fallback'); +// Should not give any output, because the null namespace is the same as the empty namespace +test(null, 'fallback'); +// Should only output the include from the empty namespace +test(null, 'include'); + +?> +--EXPECT-- +-- getElementsByTagNameNS(null, '*') -- +local name: chapter, prefix: +local name: title, prefix: +local name: para, prefix: +local name: error, prefix: +local name: include, prefix: +-- getElementsByTagNameNS('*', '*') -- +local name: chapter, prefix: +local name: title, prefix: +local name: para, prefix: +local name: include, prefix: xi +local name: fallback, prefix: xi +local name: error, prefix: +local name: include, prefix: +-- getElementsByTagNameNS('', '*') -- +local name: chapter, prefix: +local name: title, prefix: +local name: para, prefix: +local name: error, prefix: +local name: include, prefix: +-- getElementsByTagNameNS('http://www.w3.org/2001/XInclude', '*') -- +local name: include, prefix: xi +local name: fallback, prefix: xi +-- getElementsByTagNameNS('', 'fallback') -- +-- getElementsByTagNameNS(null, 'fallback') -- +-- getElementsByTagNameNS(null, 'include') -- +local name: include, prefix: From f6c919b6a647ca3178d533789f17efc691d593b7 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 23:20:31 +0200 Subject: [PATCH 2/2] Improve comment --- ext/dom/php_dom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 468a6c3eea1c3..1883767d2e48b 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1270,9 +1270,9 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l { xmlNodePtr ret = NULL; - /* Note: The spec says that ns == NULL should change the namespace to the empty string. - * This is already done at the PHP_FUNCTIONs that create this iterator. - * The null case here actually means we want to match any namespace instead. */ + /* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent. + * PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL. + * This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */ bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0'); while (nodep != NULL && (*cur <= index || index == -1)) {