From 942591309739c7fd9d8a5c58d180a5a82cb1b957 Mon Sep 17 00:00:00 2001
From: Etgar Perets <etgar.perets@satoricyber.com>
Date: Mon, 14 Jul 2025 16:18:06 +0300
Subject: [PATCH] SGA-11411 Added unquoted identifier unicode support in
 PostegreSQL, MySQL, added a test for that, and adjusted a test to reflect
 this support

---
 src/dialect/mysql.rs      |  2 ++
 src/dialect/postgresql.rs |  7 +++----
 src/dialect/redshift.rs   |  4 ++--
 tests/sqlparser_common.rs | 13 +++++++++++--
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs
index b50c8df50..f7b5f574e 100644
--- a/src/dialect/mysql.rs
+++ b/src/dialect/mysql.rs
@@ -43,11 +43,13 @@ impl Dialect for MySqlDialect {
         // See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
         // Identifiers which begin with a digit are recognized while tokenizing numbers,
         // so they can be distinguished from exponent numeric literals.
+        // MySQL also implements non ascii utf-8 charecters
         ch.is_alphabetic()
             || ch == '_'
             || ch == '$'
             || ch == '@'
             || ('\u{0080}'..='\u{ffff}').contains(&ch)
+            || !ch.is_ascii()
     }
 
     fn is_identifier_part(&self, ch: char) -> bool {
diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs
index c1f025574..9cea252c8 100644
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@@ -65,10 +65,9 @@ impl Dialect for PostgreSqlDialect {
     }
 
     fn is_identifier_start(&self, ch: char) -> bool {
-        // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
-        // We don't yet support identifiers beginning with "letters with
-        // diacritical marks"
-        ch.is_alphabetic() || ch == '_'
+        ch.is_alphabetic() || ch == '_' ||
+        // PostgreSQL implements Unicode characters in identifiers.
+        !ch.is_ascii()
     }
 
     fn is_identifier_part(&self, ch: char) -> bool {
diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs
index c910e4c77..68e025d18 100644
--- a/src/dialect/redshift.rs
+++ b/src/dialect/redshift.rs
@@ -80,9 +80,9 @@ impl Dialect for RedshiftSqlDialect {
     }
 
     fn is_identifier_start(&self, ch: char) -> bool {
-        // Extends Postgres dialect with sharp and UTF-8 multibyte chars
+        // UTF-8 multibyte characters are supported in identifiers via the PostgreSqlDialect.
         // https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
-        PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#' || !ch.is_ascii()
+        PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
     }
 
     fn is_identifier_part(&self, ch: char) -> bool {
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index ba72399f9..e95c7e7b6 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -11151,9 +11151,7 @@ fn parse_non_latin_identifiers() {
     let supported_dialects = TestedDialects::new(vec![
         Box::new(GenericDialect {}),
         Box::new(DuckDbDialect {}),
-        Box::new(PostgreSqlDialect {}),
         Box::new(MsSqlDialect {}),
-        Box::new(MySqlDialect {}),
     ]);
     assert!(supported_dialects
         .parse_sql_statements("SELECT 💝 FROM table1")
@@ -16147,3 +16145,14 @@ fn test_identifier_unicode_support() {
     ]);
     let _ = dialects.verified_stmt(sql);
 }
+
+#[test]
+fn test_identifier_unicode_start() {
+    let sql = r#"SELECT 💝phone AS 💝 FROM customers"#;
+    let dialects = TestedDialects::new(vec![
+        Box::new(MySqlDialect {}),
+        Box::new(RedshiftSqlDialect {}),
+        Box::new(PostgreSqlDialect {}),
+    ]);
+    let _ = dialects.verified_stmt(sql);
+}