diff --git a/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md b/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md new file mode 100644 index 000000000000..b10509c0e068 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Removed `libxmljs` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/GNOME/libxml2/blob/0c948334a8f5c66d50e9f8992e62998017dc4fc6/NEWS#L905-L908) that prevents XML bomb attacks. diff --git a/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll b/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll index a451182aa21a..c0a783c17644 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll @@ -49,9 +49,7 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // internal entities are always resolved - kind = InternalEntity() - or + not kind = InternalEntity() and // other entities are only resolved if the configuration option `noent` is set to `true` exists(JS::Expr noent | this.hasOptionArgument(1, "noent", noent) and @@ -126,8 +124,9 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // entities are resolved by default - any() + // SAX parsers in libxmljs also inherit libxml2's protection against XML bombs + kind = ExternalEntity(_) or + kind = ParameterEntity(true) } override DataFlow::Node getAResult() { @@ -149,8 +148,9 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // entities are resolved by default - any() + // SAX push parsers in libxmljs also inherit libxml2's protection against XML bombs + kind = ExternalEntity(_) or + kind = ParameterEntity(true) } override DataFlow::Node getAResult() { diff --git a/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected b/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected index 2b4d41804915..6a5c2adfb7a1 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected +++ b/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected @@ -5,10 +5,6 @@ | domparser.js:11:57:11:59 | src | domparser.js:2:13:2:36 | documen ... .search | domparser.js:11:57:11:59 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | domparser.js:2:13:2:36 | documen ... .search | user-provided value | | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | expat.js:6:16:6:36 | req.par ... e-xml") | user-provided value | | jquery.js:4:14:4:16 | src | jquery.js:2:13:2:36 | documen ... .search | jquery.js:4:14:4:16 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | jquery.js:2:13:2:36 | documen ... .search | user-provided value | -| libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.js:5:21:5:41 | req.par ... e-xml") | user-provided value | -| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | user-provided value | -| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | user-provided value | -| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | user-provided value | edges | closure.js:2:7:2:36 | src | closure.js:3:24:3:26 | src | provenance | | | closure.js:2:13:2:36 | documen ... .search | closure.js:2:7:2:36 | src | provenance | | @@ -31,8 +27,4 @@ nodes | jquery.js:2:7:2:36 | src | semmle.label | src | | jquery.js:2:13:2:36 | documen ... .search | semmle.label | documen ... .search | | jquery.js:4:14:4:16 | src | semmle.label | src | -| libxml.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | subpaths diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.js index 6af2da17ef5d..3f16e457dc38 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.js @@ -2,5 +2,5 @@ const express = require('express'); const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { - libxmljs.parseXml(req.param("some-xml")); // $ Alert - libxml expands internal general entities by default + libxmljs.parseXml(req.param("some-xml")); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js index da133cc782e0..de633c04688a 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js @@ -2,5 +2,5 @@ const express = require('express'); const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { - libxmljs.parseXml(req.param("some-xml"), { noent: true }); // $ Alert - unguarded entity expansion + libxmljs.parseXml(req.param("some-xml"), { noent: true }); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js index 6049a8297a98..dc7ec2ddec00 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { const parser = new libxmljs.SaxParser(); - parser.parseString(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default + parser.parseString(req.param("some-xml")); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js index 2fc4afc8ce47..15e63bf5d532 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { const parser = new libxmljs.SaxPushParser(); - parser.push(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default + parser.push(req.param("some-xml")); }); diff --git a/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md b/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md new file mode 100644 index 000000000000..11ff0181a017 --- /dev/null +++ b/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Removed `lxml` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/lxml/lxml/blob/f33ac2c2f5f9c4c4c1fc47f363be96db308f2fa6/doc/FAQ.txt#L1077) that prevents XML bomb attacks. diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index c503d7d5cfbc..e043093ed49e 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -129,11 +129,6 @@ module Lxml { any(True t) ) or - kind.isXmlBomb() and - this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) and - not this.getKeywordParameter("resolve_entities").getAValueReachingSink().asExpr() = - any(False t) - or kind.isDtdRetrieval() and this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t) @@ -305,9 +300,8 @@ module Lxml { // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O kind.isXxe() or - kind.isXmlBomb() and - this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) - or + // libxml2 has built-in protection against XML bombs via entity reference loop detection, + // so lxml is not vulnerable to XML bomb attacks. kind.isDtdRetrieval() and this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t) diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index 63cdc79b4c1d..bff508f24ab0 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -50,7 +50,7 @@ # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) @@ -63,5 +63,5 @@ # iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of # the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O -lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected index 8a7d7bc75e3d..e217064d1dfc 100644 --- a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected @@ -1,14 +1,4 @@ edges -| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | ControlFlowNode for request | provenance | | -| test.py:1:26:1:32 | ControlFlowNode for request | test.py:19:19:19:25 | ControlFlowNode for request | provenance | | -| test.py:19:5:19:15 | ControlFlowNode for xml_content | test.py:30:34:30:44 | ControlFlowNode for xml_content | provenance | | -| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:5:19:15 | ControlFlowNode for xml_content | provenance | AdditionalTaintStep | nodes -| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | -| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:19:5:19:15 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |