|
22 | 22 | FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")
|
23 | 23 | UNICODE_RE = re.compile(
|
24 | 24 | r"(\\+)("
|
25 |
| - r"(u([a-zA-Z0-9]{4}))" |
26 |
| - r"|(U([a-zA-Z0-9]{0,8}))" |
27 |
| - r"|(x([a-zA-Z0-9]{2}))" |
28 |
| - r"|(N\{([a-zA-Z0-9]{2})\})" |
| 25 | + r"(u([a-zA-Z0-9]{4}))" # Formatting 16-bit unicodes i.e. \uxxxx |
| 26 | + r"|(U([a-zA-Z0-9]{0,8}))" # Formatting 32-bit unicodes i.e. \Uxxxxxxxx |
| 27 | + r"|(x([a-zA-Z0-9]{2}))" # Formatting unicodes in format of \xhh |
| 28 | + r"|(N\{([a-zA-Z0-9]{2})\})" # Formatting named unicodes in format of \N{name} |
29 | 29 | r")"
|
30 | 30 | )
|
31 | 31 |
|
@@ -253,23 +253,24 @@ def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
|
253 | 253 | text = leaf.value
|
254 | 254 | prefix = get_string_prefix(text)
|
255 | 255 |
|
256 |
| - def replace(m: Match[AnyStr]) -> AnyStr: |
| 256 | + def replace(m: Match[AnyStr]) -> str: |
257 | 257 | groups = m.groups()
|
| 258 | + back_slashes = str(groups[0]) |
258 | 259 |
|
259 |
| - if len(groups[0]) % 2 == 0 or prefix == "r": |
260 |
| - return groups[0] + groups[1] |
| 260 | + if len(back_slashes) % 2 == 0 or prefix == "r": |
| 261 | + return back_slashes + str(groups[1]) |
261 | 262 |
|
262 | 263 | if groups[2]:
|
263 | 264 | # \u
|
264 |
| - return groups[0] + "u" + groups[3].lower() |
| 265 | + return back_slashes + "u" + str(groups[3].lower()) |
265 | 266 | elif groups[4]:
|
266 | 267 | # \U
|
267 |
| - return groups[0] + "U" + groups[5].lower() |
| 268 | + return back_slashes + "U" + str(groups[5].lower()) |
268 | 269 | elif groups[6]:
|
269 | 270 | # \x
|
270 |
| - return groups[0] + "x" + groups[7].lower() |
| 271 | + return back_slashes + "x" + str(groups[7].lower()) |
271 | 272 | else:
|
272 | 273 | # \N{}
|
273 |
| - return groups[0] + "N{" + groups[9].upper() + "}" |
| 274 | + return back_slashes + "N{" + str(groups[9].upper()) + "}" |
274 | 275 |
|
275 | 276 | leaf.value = re.sub(UNICODE_RE, replace, text)
|
0 commit comments