Skip to content

Commit 6282b35

Browse files
authored
[Markdown] Fix HTML comment parser. (#2121)
Fix performance and correctness of HTML comment parser. Fixes #2119. RegExp had catastrophic backtracking. Also didn't match the spec it linked to. (Which is still CM 30, not 31.2. They differ, we may want to upgrade support eventually.) Fixed a few other incorrect parsings. - The content of `<?...?>`, `<!a...>` and `<![CDATA[...]]>` can contain newlines. Changed `.` to `[^]`. - The `<![CDATA[` tag is case sensitive. Changed RegExp to be case sensitive, so added `A-Z` to all the `a-z`s used in that regexp.
1 parent 7bf22c9 commit 6282b35

File tree

4 files changed

+68
-8
lines changed

4 files changed

+68
-8
lines changed

pkgs/markdown/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Update the README link to the markdown playground
44
(https://dart-lang.github.io/tools).
55
* Update `package:web` API references in the example.
6+
* Fix performance and correctness of HTML comment parser.
67
* Require Dart `^3.4.0`.
78

89
## 7.3.0

pkgs/markdown/lib/src/inline_syntaxes/inline_html_syntax.dart

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5+
// These are RegExps, always use raw strings.
6+
// ignore_for_file: unnecessary_raw_strings
7+
58
import '../../markdown.dart';
69
import '../charcode.dart';
710
import '../patterns.dart';
@@ -22,23 +25,23 @@ class InlineHtmlSyntax extends TextSyntax {
2225

2326
// HTML comment, see
2427
// https://spec.commonmark.org/0.30/#html-comment.
25-
'<!--(?:(?:[^-<>]+-[^-<>]+)+|[^-<>]+)-->'
28+
r'<!--(?!-?>)[^\-]*-(?:[^\-]+-)*?->'
2629
'|'
2730

2831
// Processing-instruction, see
2932
// https://spec.commonmark.org/0.30/#processing-instruction
30-
r'<\?.*?\?>'
33+
r'<\?[^]*?\?>'
3134
'|'
3235

3336
// Declaration, see
3437
// https://spec.commonmark.org/0.30/#declaration.
35-
'(<![a-z]+.*?>)'
38+
r'(<![a-zA-Z]+[^]*?>)'
3639
'|'
3740

3841
// CDATA section, see
3942
// https://spec.commonmark.org/0.30/#cdata-section.
40-
r'(<!\[CDATA\[.*?]]>)';
43+
r'(<!\[CDATA\[[^]*?\]\]>)';
4144

4245
InlineHtmlSyntax()
43-
: super(_pattern, startCharacter: $lt, caseSensitive: false);
46+
: super(_pattern, startCharacter: $lt, caseSensitive: true);
4447
}

pkgs/markdown/lib/src/patterns.dart

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,15 @@ const namedTagDefinition =
6565
'<'
6666

6767
// Tag name.
68-
'[a-z][a-z0-9-]*'
68+
'[a-zA-Z][a-zA-Z0-9-]*'
6969

7070
// Attribute begins, see
7171
// https://spec.commonmark.org/0.30/#attribute.
7272
r'(?:\s+'
7373

7474
// Attribute name, see
7575
// https://spec.commonmark.org/0.30/#attribute-name.
76-
'[a-z_:][a-z0-9._:-]*'
76+
'[a-zA-Z_:][a-zA-Z0-9._:-]*'
7777

7878
//
7979
'(?:'
@@ -96,7 +96,7 @@ const namedTagDefinition =
9696

9797
// Closing tag, see
9898
// https://spec.commonmark.org/0.30/#closing-tag.
99-
r'</[a-z][a-z0-9-]*\s*>';
99+
r'</[a-zA-Z][a-zA-Z0-9-]*\s*>';
100100

101101
/// A pattern to match the start of an HTML block.
102102
///
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'package:markdown/markdown.dart';
6+
import 'package:test/test.dart';
7+
8+
void main() {
9+
test('HTML comment with dashes #2119', () {
10+
// See https://dartbug.com/tools/2119.
11+
// For this issue, the leading letter was needed,
12+
// an HTML comment starting a line is handled by a different path.
13+
// The empty line before the `-->` is needed.
14+
// The number of lines increase time exponentially.
15+
// The length of lines affect the base of the exponentiation.
16+
// Locally, three "Lorem-ipsum" lines ran in ~6 seconds, two in < 200 ms.
17+
// Adding a fourth line should ensure it cannot possibly finish in ten
18+
// seconds if the bug isn't fixed.
19+
const input = '''
20+
a <!--
21+
- Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
22+
tempor incididunt ut labore et dolore magna aliqua.
23+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
24+
ut aliquip ex ea commodo consequat.
25+
- Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
26+
tempor incididunt ut labore et dolore magna aliqua.
27+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
28+
ut aliquip ex ea commodo consequat.
29+
- Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
30+
tempor incididunt ut labore et dolore magna aliqua.
31+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
32+
ut aliquip ex ea commodo consequat.
33+
- Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod
34+
tempor incididunt ut labore et dolore magna aliqua.
35+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
36+
ut aliquip ex ea commodo consequat.
37+
-->
38+
''';
39+
40+
final time = Stopwatch()..start();
41+
final html = markdownToHtml(input); // Should not hang.
42+
expect(html, isNotNull); // To use the output.
43+
final elapsed = time.elapsedMilliseconds;
44+
expect(elapsed, lessThan(10000));
45+
});
46+
47+
test('HTML comment with lt/gt', () {
48+
// Incorrect parsing found as part of fixing #2119.
49+
// Now matches `<!--$text-->` where text
50+
// does not start with `>` or `->`, does not end with `-`,
51+
// and does not contain `--`.
52+
const input = 'a <!--<->>-<!-->';
53+
final html = markdownToHtml(input);
54+
expect(html, '<p>$input</p>\n');
55+
});
56+
}

0 commit comments

Comments
 (0)