From d1b4172486960613b91421183d5f7e32d9ae76c2 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 14:08:48 +0200 Subject: [PATCH 1/6] Shared: Factor out some helper predicates in alert filtering --- shared/util/codeql/util/AlertFiltering.qll | 23 ++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 1bc366c0416d..f264c20aeda7 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -75,19 +75,30 @@ extensible predicate restrictAlertsToExactLocation( /** Module for applying alert location filtering. */ module AlertFilteringImpl { + pragma[nomagic] + private predicate restrictAlertsToEntireFile(string filePath) { restrictAlertsTo(filePath, 0, 0) } + + pragma[nomagic] + private predicate restrictAlertsToStartLine(string filePath, int line) { + exists(int startLineStart, int startLineEnd | + restrictAlertsTo(filePath, startLineStart, startLineEnd) and + line = [startLineStart .. startLineEnd] + ) + } + /** Applies alert filtering to the given location. */ bindingset[location] predicate filterByLocation(Location location) { not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) or - exists(string filePath, int startLineStart, int startLineEnd | - restrictAlertsTo(filePath, startLineStart, startLineEnd) - | - startLineStart = 0 and - startLineEnd = 0 and + exists(string filePath | + restrictAlertsToEntireFile(filePath) and location.hasLocationInfo(filePath, _, _, _, _) or - location.hasLocationInfo(filePath, [startLineStart .. startLineEnd], _, _, _) + exists(int line | + restrictAlertsToStartLine(filePath, line) and + location.hasLocationInfo(filePath, line, _, _, _) + ) ) or exists(string filePath, int startLine, int startColumn, int endLine, int endColumn | From 8b345518f4f2dc3cdcdcb9a9339a0b3ea2175536 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 14:54:41 +0200 Subject: [PATCH 2/6] Shared: Add approximate version of getASelected{Source,Sink}Location --- shared/dataflow/codeql/dataflow/DataFlow.qll | 36 +++++++++++++++++++ .../codeql/dataflow/internal/DataFlowImpl.qll | 4 +++ .../dataflow/internal/DataFlowImplStage1.qll | 8 +++-- shared/util/codeql/util/AlertFiltering.qll | 36 +++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 3483287e3b39..93803af552bc 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -459,6 +459,15 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } + /** + * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the + * actual location associated with `source`. + * + * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSourceLocationApprox(Node source) { none() } + /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -469,6 +478,15 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } + + /** + * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the + * actual location associated with `sink`. + * + * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSinkLocationApprox(Node sink) { none() } } /** An input configuration for data flow using flow state. */ @@ -608,6 +626,15 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } + /** + * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the + * actual location associated with `source`. + * + * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSourceLocationApprox(Node source) { none() } + /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -618,6 +645,15 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } + + /** + * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the + * actual location associated with `sink`. + * + * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility + * that a more precise location can be selected in the query. + */ + default Location getASelectedSinkLocationApprox(Node sink) { none() } } } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index a7e0736432ac..b90512bdb237 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -145,7 +145,11 @@ module MakeImpl Lang> { Location getASelectedSourceLocation(Node source); + Location getASelectedSourceLocationApprox(Node source); + Location getASelectedSinkLocation(Node sink); + + Location getASelectedSinkLocationApprox(Node sink); } /** diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index c7883df0de18..5e85cab00d9d 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -133,7 +133,9 @@ module MakeImplStage1 Lang> { private predicate isFilteredSource(Node source) { Config::isSource(source, _) and if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) + then + AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) or + AlertFiltering::filterByLocationApprox(Config::getASelectedSourceLocationApprox(source)) else any() } @@ -144,7 +146,9 @@ module MakeImplStage1 Lang> { Config::isSink(sink) ) and if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) + then + AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) or + AlertFiltering::filterByLocationApprox(Config::getASelectedSinkLocationApprox(sink)) else any() } diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index f264c20aeda7..97e6198a16cf 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -107,4 +107,40 @@ module AlertFilteringImpl { location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) ) } + + /** + * Holds if some subrange within `location` would be accepted by alert filtering. + * + * There does not need to exist a `Location` corresponding to that subrange. + */ + bindingset[location] + predicate filterByLocationApprox(Location location) { + not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) + or + exists(string filePath | + restrictAlertsToEntireFile(filePath) and + location.hasLocationInfo(filePath, _, _, _, _) + or + exists(int locStartLine, int locEndLine | + location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _) + | + restrictAlertsToStartLine(filePath, [locStartLine .. locEndLine]) + ) + ) + or + // Check if an exact filter-location is fully contained in `location`. + // This is slow but only used for testing. + exists( + string filePath, int startLine, int startColumn, int endLine, int endColumn, + int filterStartLine, int filterStartColumn, int filterEndLine, int filterEndColumn + | + location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) and + restrictAlertsToExactLocation(filePath, filterStartLine, filterStartColumn, filterEndLine, + filterEndColumn) and + startLine <= filterStartLine and + (startLine != filterStartLine or startColumn <= filterStartColumn) and + endLine >= filterEndLine and + (endLine != filterEndLine or endColumn >= filterEndColumn) + ) + } } From d65da1f8a1bc92966c5ac1e6a67cc9a2d4f59233 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 15:52:37 +0200 Subject: [PATCH 3/6] Ruby: enable for PolyReDos but document why it still doesnt work --- .../ruby/security/regexp/PolynomialReDoSQuery.qll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll index 98a42fcf5e7c..0d955d1eb245 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll @@ -18,6 +18,18 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node sink) { sink instanceof Sink } predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer } + + // Diff-informedness is disabled because of RegExpTerms having incorrect locations when + // the regexp is parsed from a string arising from constant folding. + predicate observeDiffInformedIncrementalMode() { none() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + result = sink.(Sink).getHighlight().getLocation() + } + + Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + result = sink.(Sink).getRegExp().getRootTerm().getLocation() + } } /** From a46b5f9529e7cffb7cd1ceacdbbc766c7630362c Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 16:05:22 +0200 Subject: [PATCH 4/6] Python: enable diff-informedness for poly redos using approximate related locations --- .../security/dataflow/PolynomialReDoSQuery.qll | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll index 0e52764c1950..a6602b30b16d 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll @@ -18,21 +18,13 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer } - // Diff-informed incremental mode is currently disabled for this query due to - // API limitations. The query exposes sink.getABacktrackingTerm() as an alert - // location, but there is no way to express that information through - // getASelectedSinkLocation() because there is no @location in the CodeQL - // database that corresponds to a term inside a regular expression. As a - // result, this query could miss alerts in diff-informed incremental mode. - // - // To address this problem, we need to have a version of - // getASelectedSinkLocation() that uses hasLocationInfo() instead of - // returning Location objects. - predicate observeDiffInformedIncrementalMode() { none() } + predicate observeDiffInformedIncrementalMode() { any() } Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() - or + } + + Location getASelectedSinkLocationApprox(DataFlow::Node sink) { result = sink.(Sink).getABacktrackingTerm().getLocation() } } From 82d190f4bf298423b26bf9f2729e534d275638f4 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 1 Jul 2025 16:17:24 +0200 Subject: [PATCH 5/6] Java: use approximate related sink locations in polynomial redos --- .../security/regexp/PolynomialReDoSQuery.qll | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll index ba65e13dd611..f5d0190068b9 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll @@ -47,6 +47,24 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig { node instanceof SimpleTypeSanitizer or node.asExpr().(MethodCall).getMethod() instanceof LengthRestrictedMethod } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | + regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() + | + result = sink.getLocation() + ) + } + + Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | + regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() + | + result = regexp.getLocation() + ) + } } module PolynomialRedosFlow = TaintTracking::Global; From 4a2d7950760f783441057970f2b1f62b6c029336 Mon Sep 17 00:00:00 2001 From: Asger F Date: Wed, 2 Jul 2025 14:38:10 +0200 Subject: [PATCH 6/6] Shared: Make approximate location filtering the default behaviour --- .../security/regexp/PolynomialReDoSQuery.qll | 8 +---- .../dataflow/PolynomialReDoSQuery.qll | 4 +-- .../security/regexp/PolynomialReDoSQuery.qll | 4 +-- shared/dataflow/codeql/dataflow/DataFlow.qll | 36 ------------------- .../codeql/dataflow/internal/DataFlowImpl.qll | 4 --- .../dataflow/internal/DataFlowImplStage1.qll | 8 ++--- shared/util/codeql/util/AlertFiltering.qll | 26 -------------- 7 files changed, 5 insertions(+), 85 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll index f5d0190068b9..767ebc97437b 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll @@ -55,13 +55,7 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig { regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() | result = sink.getLocation() - ) - } - - Location getASelectedSinkLocationApprox(DataFlow::Node sink) { - exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | - regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() - | + or result = regexp.getLocation() ) } diff --git a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll index a6602b30b16d..89aa4961e6ef 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll @@ -22,9 +22,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() - } - - Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + or result = sink.(Sink).getABacktrackingTerm().getLocation() } } diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll index 0d955d1eb245..81179717e01e 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll @@ -25,9 +25,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() - } - - Location getASelectedSinkLocationApprox(DataFlow::Node sink) { + or result = sink.(Sink).getRegExp().getRootTerm().getLocation() } } diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 93803af552bc..3483287e3b39 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -459,15 +459,6 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } - /** - * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the - * actual location associated with `source`. - * - * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSourceLocationApprox(Node source) { none() } - /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -478,15 +469,6 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } - - /** - * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the - * actual location associated with `sink`. - * - * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSinkLocationApprox(Node sink) { none() } } /** An input configuration for data flow using flow state. */ @@ -626,15 +608,6 @@ module Configs Lang> { */ default Location getASelectedSourceLocation(Node source) { result = source.getLocation() } - /** - * Like `getASelectedSourceLocation`, but only has to get a location _containing_ the - * actual location associated with `source`. - * - * This prunes fewer sources than `getASelectedSourceLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSourceLocationApprox(Node source) { none() } - /** * Gets a location that will be associated with the given `sink` in a * diff-informed query that uses this configuration (see @@ -645,15 +618,6 @@ module Configs Lang> { * report the sink at all, this predicate can be `none()`. */ default Location getASelectedSinkLocation(Node sink) { result = sink.getLocation() } - - /** - * Like `getASelectedSinkLocation`, but only has to get a location _containing_ the - * actual location associated with `sink`. - * - * This prunes fewer sinks than `getASelectedSinkLocation` but leaves room for the possibility - * that a more precise location can be selected in the query. - */ - default Location getASelectedSinkLocationApprox(Node sink) { none() } } } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index b90512bdb237..a7e0736432ac 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -145,11 +145,7 @@ module MakeImpl Lang> { Location getASelectedSourceLocation(Node source); - Location getASelectedSourceLocationApprox(Node source); - Location getASelectedSinkLocation(Node sink); - - Location getASelectedSinkLocationApprox(Node sink); } /** diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index 5e85cab00d9d..c7883df0de18 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -133,9 +133,7 @@ module MakeImplStage1 Lang> { private predicate isFilteredSource(Node source) { Config::isSource(source, _) and if Config::observeDiffInformedIncrementalMode() - then - AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) or - AlertFiltering::filterByLocationApprox(Config::getASelectedSourceLocationApprox(source)) + then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) else any() } @@ -146,9 +144,7 @@ module MakeImplStage1 Lang> { Config::isSink(sink) ) and if Config::observeDiffInformedIncrementalMode() - then - AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) or - AlertFiltering::filterByLocationApprox(Config::getASelectedSinkLocationApprox(sink)) + then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) else any() } diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 97e6198a16cf..091c2a89d00b 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -89,32 +89,6 @@ module AlertFilteringImpl { /** Applies alert filtering to the given location. */ bindingset[location] predicate filterByLocation(Location location) { - not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) - or - exists(string filePath | - restrictAlertsToEntireFile(filePath) and - location.hasLocationInfo(filePath, _, _, _, _) - or - exists(int line | - restrictAlertsToStartLine(filePath, line) and - location.hasLocationInfo(filePath, line, _, _, _) - ) - ) - or - exists(string filePath, int startLine, int startColumn, int endLine, int endColumn | - restrictAlertsToExactLocation(filePath, startLine, startColumn, endLine, endColumn) - | - location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) - ) - } - - /** - * Holds if some subrange within `location` would be accepted by alert filtering. - * - * There does not need to exist a `Location` corresponding to that subrange. - */ - bindingset[location] - predicate filterByLocationApprox(Location location) { not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) or exists(string filePath |