Skip to content

Commit 8f4af0b

Browse files
committed
progress: API and docs refresh
1 parent 30e96b8 commit 8f4af0b

File tree

28 files changed

+2774
-605
lines changed

28 files changed

+2774
-605
lines changed

Cargo.toml

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ std = [
4545
"regex-automata/std",
4646
"regex-syntax/std",
4747
]
48+
# This feature enables the 'log' crate to emit messages. This is usually
49+
# only useful for folks working on the regex crate itself, but can be useful
50+
# if you're trying hard to do some performance hacking on regex patterns
51+
# themselves. Note that you'll need to pair this with a crate like 'env_logger'
52+
# to actually emit the log messages somewhere.
53+
logging = [
54+
"aho-corasick?/logging",
55+
"regex-automata/logging",
56+
]
4857
# The 'use_std' feature is DEPRECATED. It will be removed in regex 2. Until
4958
# then, it is an alias for the 'std' feature.
5059
use_std = ["std"]
@@ -64,11 +73,6 @@ perf = [
6473
"perf-inline",
6574
"perf-literal",
6675
]
67-
# Enables fast caching. (If disabled, caching is still used, but is slower.)
68-
# Currently, this feature has no effect. It used to remove the thread_local
69-
# dependency and use a slower internal cache, but now the default cache has
70-
# been improved and thread_local is no longer a dependency at all.
71-
perf-cache = []
7276
# Enables use of a lazy DFA when possible.
7377
perf-dfa = ["regex-automata/hybrid"]
7478
# Enables use of a fully compiled DFA when possible.
@@ -86,6 +90,11 @@ perf-literal = [
8690
"dep:memchr",
8791
"regex-automata/perf-literal",
8892
]
93+
# Enables fast caching. (If disabled, caching is still used, but is slower.)
94+
# Currently, this feature has no effect. It used to remove the thread_local
95+
# dependency and use a slower internal cache, but now the default cache has
96+
# been improved and thread_local is no longer a dependency at all.
97+
perf-cache = []
8998

9099

91100
# UNICODE DATA FEATURES
@@ -151,7 +160,7 @@ unstable = ["pattern"]
151160
# by default if the unstable feature is enabled.
152161
pattern = []
153162

154-
# For very fast prefix literal matching.
163+
# For very fast multi-prefix literal matching.
155164
[dependencies.aho-corasick]
156165
version = "1.0.0"
157166
optional = true
@@ -161,22 +170,22 @@ optional = true
161170
version = "2.5.0"
162171
optional = true
163172

164-
# For parsing regular expressions.
165-
[dependencies.regex-syntax]
166-
path = "regex-syntax"
167-
version = "0.7.1"
168-
default-features = false
169-
170173
# For the actual regex engines.
171174
[dependencies.regex-automata]
172175
path = "regex-automata"
173176
version = "0.3.0"
174177
default-features = false
175178
features = ["alloc", "syntax", "meta", "nfa-pikevm"]
176179

180+
# For parsing regular expressions.
181+
[dependencies.regex-syntax]
182+
path = "regex-syntax"
183+
version = "0.7.1"
184+
default-features = false
185+
177186
[dev-dependencies]
178187
# For examples.
179-
lazy_static = "1"
188+
once_cell = "1.17.1"
180189
# For property based tests.
181190
quickcheck = { version = "1.0.3", default-features = false }
182191
# To check README's example

regex-automata/src/meta/regex.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2819,6 +2819,12 @@ impl Config {
28192819
///
28202820
/// By default, `\n` is the line terminator.
28212821
///
2822+
/// **Warning**: This does not change the behavior of `.`. To do that,
2823+
/// you'll need to configure the syntax option
2824+
/// [`syntax::Config::line_terminator`](crate::util::syntax::Config::line_terminator)
2825+
/// in addition to this. Otherwise, `.` will continue to match any
2826+
/// character other than `\n`.
2827+
///
28222828
/// # Example
28232829
///
28242830
/// ```

regex-automata/src/util/syntax.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ pub struct Config {
147147
multi_line: bool,
148148
dot_matches_new_line: bool,
149149
crlf: bool,
150+
line_terminator: u8,
150151
swap_greed: bool,
151152
ignore_whitespace: bool,
152153
unicode: bool,
@@ -164,6 +165,7 @@ impl Config {
164165
multi_line: false,
165166
dot_matches_new_line: false,
166167
crlf: false,
168+
line_terminator: b'\n',
167169
swap_greed: false,
168170
ignore_whitespace: false,
169171
unicode: true,
@@ -239,6 +241,31 @@ impl Config {
239241
self
240242
}
241243

244+
/// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
245+
///
246+
/// Namely, instead of `.` (by default) matching everything except for `\n`,
247+
/// this will cause `.` to match everything except for the byte given.
248+
///
249+
/// If `.` is used in a context where Unicode mode is enabled and this byte
250+
/// isn't ASCII, then an error will be returned. When Unicode mode is
251+
/// disabled, then any byte is permitted, but will return an error if UTF-8
252+
/// mode is enabled and it is a non-ASCII byte.
253+
///
254+
/// In short, any ASCII value for a line terminator is always okay. But a
255+
/// non-ASCII byte might result in an error depending on whether Unicode
256+
/// mode or UTF-8 mode are enabled.
257+
///
258+
/// Note that if `R` mode is enabled then it always takes precedence and
259+
/// the line terminator will be treated as `\r` and `\n` simultaneously.
260+
///
261+
/// Note also that this *doesn't* impact the look-around assertions
262+
/// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
263+
/// configuration in the regex engine itself.
264+
pub fn line_terminator(mut self, byte: u8) -> Config {
265+
self.line_terminator = byte;
266+
self
267+
}
268+
242269
/// Enable or disable the "swap greed" flag by default.
243270
///
244271
/// When this is enabled, `.*` (for example) will become ungreedy and `.*?`
@@ -377,6 +404,11 @@ impl Config {
377404
self.crlf
378405
}
379406

407+
/// Returns the line terminator in this syntax configuration.
408+
pub fn get_line_terminator(&self) -> u8 {
409+
self.line_terminator
410+
}
411+
380412
/// Returns whether "swap greed" mode is enabled.
381413
pub fn get_swap_greed(&self) -> bool {
382414
self.swap_greed
@@ -410,6 +442,7 @@ impl Config {
410442
.multi_line(self.multi_line)
411443
.dot_matches_new_line(self.dot_matches_new_line)
412444
.crlf(self.crlf)
445+
.line_terminator(self.line_terminator)
413446
.swap_greed(self.swap_greed)
414447
.ignore_whitespace(self.ignore_whitespace)
415448
.utf8(self.utf8)
@@ -436,6 +469,7 @@ impl Config {
436469
.multi_line(self.multi_line)
437470
.crlf(self.crlf)
438471
.dot_matches_new_line(self.dot_matches_new_line)
472+
.line_terminator(self.line_terminator)
439473
.swap_greed(self.swap_greed)
440474
.utf8(self.utf8);
441475
}

regex-automata/tests/dfa/onepass/suite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,5 @@ fn config_syntax(test: &RegexTest) -> syntax::Config {
193193
.case_insensitive(test.case_insensitive())
194194
.unicode(test.unicode())
195195
.utf8(test.utf8())
196+
.line_terminator(test.line_terminator())
196197
}

regex-automata/tests/dfa/suite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ fn config_syntax(test: &RegexTest) -> syntax::Config {
391391
.case_insensitive(test.case_insensitive())
392392
.unicode(test.unicode())
393393
.utf8(test.utf8())
394+
.line_terminator(test.line_terminator())
394395
}
395396

396397
/// Execute an overlapping search, and for each match found, also find its

regex-automata/tests/hybrid/suite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ fn config_syntax(test: &RegexTest) -> syntax::Config {
281281
.case_insensitive(test.case_insensitive())
282282
.unicode(test.unicode())
283283
.utf8(test.utf8())
284+
.line_terminator(test.line_terminator())
284285
}
285286

286287
/// Execute an overlapping search, and for each match found, also find its

regex-automata/tests/meta/suite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,4 +196,5 @@ fn config_syntax(test: &RegexTest) -> syntax::Config {
196196
.case_insensitive(test.case_insensitive())
197197
.unicode(test.unicode())
198198
.utf8(test.utf8())
199+
.line_terminator(test.line_terminator())
199200
}

regex-automata/tests/nfa/thompson/backtrack/suite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,4 +209,5 @@ fn config_syntax(test: &RegexTest) -> syntax::Config {
209209
.case_insensitive(test.case_insensitive())
210210
.unicode(test.unicode())
211211
.utf8(test.utf8())
212+
.line_terminator(test.line_terminator())
212213
}

regex-automata/tests/nfa/thompson/pikevm/suite.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,5 @@ fn config_syntax(test: &RegexTest) -> syntax::Config {
158158
.case_insensitive(test.case_insensitive())
159159
.unicode(test.unicode())
160160
.utf8(test.utf8())
161+
.line_terminator(test.line_terminator())
161162
}

regex-lite/tests/lib.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ mod fuzz;
22
mod string;
33

44
const BLACKLIST: &[&str] = &[
5-
// CRLF-aware line anchors aren't supported in regex API yet.
6-
"crlf",
75
// Custom line terminators aren't supported in regex-lite. We could add it,
86
// but it didn't seem worth it.
97
"line-terminator",

0 commit comments

Comments
 (0)