Skip to content

Commit ecd5d88

Browse files
authored
Add identifier start unicode support for Postegres, MySql and Redshift (#1944)
1 parent c5e6ba5 commit ecd5d88

File tree

4 files changed

+18
-8
lines changed

4 files changed

+18
-8
lines changed

src/dialect/mysql.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
4343
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
4444
// Identifiers which begin with a digit are recognized while tokenizing numbers,
4545
// so they can be distinguished from exponent numeric literals.
46+
// MySQL also implements non ascii utf-8 charecters
4647
ch.is_alphabetic()
4748
|| ch == '_'
4849
|| ch == '$'
4950
|| ch == '@'
5051
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
52+
|| !ch.is_ascii()
5153
}
5254

5355
fn is_identifier_part(&self, ch: char) -> bool {

src/dialect/postgresql.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
6565
}
6666

6767
fn is_identifier_start(&self, ch: char) -> bool {
68-
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
69-
// We don't yet support identifiers beginning with "letters with
70-
// diacritical marks"
71-
ch.is_alphabetic() || ch == '_'
68+
ch.is_alphabetic() || ch == '_' ||
69+
// PostgreSQL implements Unicode characters in identifiers.
70+
!ch.is_ascii()
7271
}
7372

7473
fn is_identifier_part(&self, ch: char) -> bool {

src/dialect/redshift.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
8080
}
8181

8282
fn is_identifier_start(&self, ch: char) -> bool {
83-
// Extends Postgres dialect with sharp and UTF-8 multibyte chars
83+
// UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
8484
// https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
85-
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii()
85+
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
8686
}
8787

8888
fn is_identifier_part(&self, ch: char) -> bool {

tests/sqlparser_common.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
1115111151
let supported_dialects = TestedDialects::new(vec![
1115211152
Box::new(GenericDialect {}),
1115311153
Box::new(DuckDbDialect {}),
11154-
Box::new(PostgreSqlDialect {}),
1115511154
Box::new(MsSqlDialect {}),
11156-
Box::new(MySqlDialect {}),
1115711155
]);
1115811156
assert!(supported_dialects
1115911157
.parse_sql_statements("SELECT 💝 FROM table1")
@@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
1614716145
]);
1614816146
let _ = dialects.verified_stmt(sql);
1614916147
}
16148+
16149+
#[test]
16150+
fn test_identifier_unicode_start() {
16151+
let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
16152+
let dialects = TestedDialects::new(vec![
16153+
Box::new(MySqlDialect {}),
16154+
Box::new(RedshiftSqlDialect {}),
16155+
Box::new(PostgreSqlDialect {}),
16156+
]);
16157+
let _ = dialects.verified_stmt(sql);
16158+
}

0 commit comments

Comments
 (0)