Skip to content

DuckDB, Postgres, SQLite: NOT NULL and NOTNULL expressions #1927

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/dialect/duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,10 @@ impl Dialect for DuckDbDialect {
fn supports_select_wildcard_exclude(&self) -> bool {
true
}

/// DuckDB supports `NOTNULL` as an alias for `IS NOT NULL`,
/// see DuckDB Comparisons <https://duckdb.org/docs/stable/sql/expressions/comparison_operators#between-and-is-not-null>
fn supports_notnull_operator(&self) -> bool {
true
}
}
12 changes: 12 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -677,8 +677,14 @@ pub trait Dialect: Debug + Any {
Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)),
Token::Word(w) if w.keyword == Keyword::NULL && parser.in_normal_state() => {
Ok(p!(Is))
}
_ => Ok(self.prec_unknown()),
},
Token::Word(w) if w.keyword == Keyword::NOTNULL && self.supports_notnull_operator() => {
Ok(p!(Is))
}
Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
Expand Down Expand Up @@ -1122,6 +1128,12 @@ pub trait Dialect: Debug + Any {
) -> bool {
false
}

/// Returns true if the dialect supports the `x NOTNULL`
/// operator expression.
fn supports_notnull_operator(&self) -> bool {
false
}
}

/// This represents the operators for which precedence must be defined
Expand Down
6 changes: 6 additions & 0 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,4 +264,10 @@ impl Dialect for PostgreSqlDialect {
fn supports_alter_column_type_using(&self) -> bool {
true
}

/// Postgres supports `NOTNULL` as an alias for `IS NOT NULL`
/// See: <https://www.postgresql.org/docs/17/functions-comparison.html>
fn supports_notnull_operator(&self) -> bool {
true
}
}
6 changes: 6 additions & 0 deletions src/dialect/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,10 @@ impl Dialect for SQLiteDialect {
fn supports_dollar_placeholder(&self) -> bool {
true
}

/// SQLite supports `NOTNULL` as aliases for `IS NOT NULL`
/// See: <https://sqlite.org/syntax/expr.html>
fn supports_notnull_operator(&self) -> bool {
true
}
}
1 change: 1 addition & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ define_keywords!(
NOT,
NOTHING,
NOTIFY,
NOTNULL,
NOWAIT,
NO_WRITE_TO_BINLOG,
NTH_VALUE,
Expand Down
64 changes: 57 additions & 7 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ use helpers::attached_token::AttachedToken;

use log::debug;

use recursion::RecursionCounter;
use IsLateral::*;
use IsOptional::*;

use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConfiguration};
use crate::ast::Statement::CreatePolicy;
use crate::ast::*;
use crate::dialect::*;
use crate::keywords::{Keyword, ALL_KEYWORDS};
use crate::tokenizer::*;
use recursion::RecursionCounter;
use sqlparser::parser::ParserState::ColumnDefinition;
use IsLateral::*;
use IsOptional::*;
Comment on lines +37 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was the change intentional or autofmt it looks like only one import is being added, if the latter could we revert to a minimal diff?


mod alter;

Expand Down Expand Up @@ -275,6 +275,9 @@ enum ParserState {
/// PRIOR expressions while still allowing prior as an identifier name
/// in other contexts.
ConnectBy,
/// The state when parsing column definitions. This state prohibits
/// NOT NULL as an alias for IS NOT NULL.
ColumnDefinition,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we include a sql example to illustrate this? I'm wondering that folks won't be able to fully get the picture without reading the code otherwise

}

/// A SQL Parser
Expand Down Expand Up @@ -3570,6 +3573,11 @@ impl<'a> Parser<'a> {
let negated = self.parse_keyword(Keyword::NOT);
let regexp = self.parse_keyword(Keyword::REGEXP);
let rlike = self.parse_keyword(Keyword::RLIKE);
let null = if self.in_normal_state() {
self.parse_keyword(Keyword::NULL)
} else {
false
};
if regexp || rlike {
Ok(Expr::RLike {
negated,
Expand All @@ -3579,6 +3587,8 @@ impl<'a> Parser<'a> {
),
regexp,
})
} else if negated && null {
Ok(Expr::IsNotNull(Box::new(expr)))
} else if self.parse_keyword(Keyword::IN) {
self.parse_in(expr, negated)
} else if self.parse_keyword(Keyword::BETWEEN) {
Expand Down Expand Up @@ -3616,6 +3626,9 @@ impl<'a> Parser<'a> {
self.expected("IN or BETWEEN after NOT", self.peek_token())
}
}
Keyword::NOTNULL if dialect.supports_notnull_operator() => {
Ok(Expr::IsNotNull(Box::new(expr)))
}
Keyword::MEMBER => {
if self.parse_keyword(Keyword::OF) {
self.expect_token(&Token::LParen)?;
Expand Down Expand Up @@ -7724,6 +7737,27 @@ impl<'a> Parser<'a> {
return option;
}

self.with_state(
ColumnDefinition,
|parser| -> Result<Option<ColumnOption>, ParserError> {
parser.parse_optional_column_option_inner()
},
)
}

fn parse_optional_column_option_inner(&mut self) -> Result<Option<ColumnOption>, ParserError> {
/// In some cases, we need to revert to [ParserState::Normal] when parsing nested expressions
/// In those cases we use the following macro to parse instead of calling [parse_expr] directly.
macro_rules! parse_expr_normal {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use a regular function for this vs a macro?

In terms of fn signature, would this work just well fn parse_column_option_expr(&mut self) -> Result<Expr, ParserError>?
then the caller can explicitly do e.g. Ok(Some(ColumnOption::Default(self.parse_column_option()?)))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documentation wise, I'm thinking it could be helpful to illustrate why we need this function and with an example like below. And that we also callout that any column option that parses an arbitrary expression likely wants to call this variant instead of parse_expr(), for folks that wander around this area of the code in the future looking to add/update column options support.

CREATE TABLE foo (abc (42 NOT NULL) NOT NULL);
vs
CREATE TABLE foo (abc 42 NOT NULL);

($option:expr) => {
if matches!(self.peek_token().token, Token::LParen) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if matches!(self.peek_token().token, Token::LParen) {
if self.peek_token_ref().token == Token::LParen {

let expr: Expr = self.with_state(ParserState::Normal, |p| p.parse_prefix())?;
Ok(Some($option(expr)))
} else {
Ok(Some($option(self.parse_expr()?)))
}
};
}
if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) {
Ok(Some(ColumnOption::CharacterSet(
self.parse_object_name(false)?,
Expand All @@ -7739,11 +7773,11 @@ impl<'a> Parser<'a> {
} else if self.parse_keyword(Keyword::NULL) {
Ok(Some(ColumnOption::Null))
} else if self.parse_keyword(Keyword::DEFAULT) {
Ok(Some(ColumnOption::Default(self.parse_expr()?)))
parse_expr_normal!(ColumnOption::Default)
} else if dialect_of!(self is ClickHouseDialect| GenericDialect)
&& self.parse_keyword(Keyword::MATERIALIZED)
{
Ok(Some(ColumnOption::Materialized(self.parse_expr()?)))
parse_expr_normal!(ColumnOption::Materialized)
} else if dialect_of!(self is ClickHouseDialect| GenericDialect)
&& self.parse_keyword(Keyword::ALIAS)
{
Expand Down Expand Up @@ -7799,7 +7833,8 @@ impl<'a> Parser<'a> {
}))
} else if self.parse_keyword(Keyword::CHECK) {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
// since `CHECK` requires parentheses, we can parse the inner expression in ParserState::Normal
let expr: Expr = self.with_state(ParserState::Normal, |p| p.parse_expr())?;
self.expect_token(&Token::RParen)?;
Ok(Some(ColumnOption::Check(expr)))
} else if self.parse_keyword(Keyword::AUTO_INCREMENT)
Expand Down Expand Up @@ -16514,6 +16549,10 @@ impl<'a> Parser<'a> {
Ok(None)
}
}

pub fn in_normal_state(&self) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub fn in_normal_state(&self) -> bool {
fn in_normal_state(&self) -> bool {

matches!(self.state, ParserState::Normal)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering should this check be self.state != ParserState::ColumnDefinition? Thinking that covers the enum being extended to other interactions later on (feels like the not null logic only cares that its not in this state in order to be correct)?

}
}

fn maybe_prefixed_expr(expr: Expr, prefix: Option<Ident>) -> Expr {
Expand Down Expand Up @@ -17249,4 +17288,15 @@ mod tests {

assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err());
}

#[test]
fn test_parse_not_null_in_column_options() {
let canonical =
"CREATE TABLE foo (abc INT DEFAULT (42 IS NOT NULL) NOT NULL, CHECK (abc IS NOT NULL))";
all_dialects().verified_stmt(canonical);
all_dialects().one_statement_parses_to(
"CREATE TABLE foo (abc INT DEFAULT (42 NOT NULL) NOT NULL, CHECK (abc NOT NULL) )",
canonical,
);
}
}
9 changes: 9 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1705,6 +1705,15 @@ fn parse_table_sample() {
clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10 OFFSET 1 / 2");
}

#[test]
fn test_parse_not_null_in_column_options() {
// In addition to DEFAULT and CHECK ClickHouse also supports MATERIALIZED, all of which
// can contain `IS NOT NULL` and thus `NOT NULL` as an alias.
let canonical = "CREATE TABLE foo (abc INT DEFAULT (42 IS NOT NULL) NOT NULL, not_null BOOL MATERIALIZED (abc IS NOT NULL), CHECK (abc IS NOT NULL))";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the longer strings, in order to break them up into lines could we use this concat pattern?

clickhouse().verified_stmt(canonical);
clickhouse().one_statement_parses_to("CREATE TABLE foo (abc INT DEFAULT (42 NOT NULL) NOT NULL, not_null BOOL MATERIALIZED (abc IS NOT NULL), CHECK (abc NOT NULL) )", canonical);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
clickhouse().one_statement_parses_to("CREATE TABLE foo (abc INT DEFAULT (42 NOT NULL) NOT NULL, not_null BOOL MATERIALIZED (abc IS NOT NULL), CHECK (abc NOT NULL) )", canonical);
clickhouse().one_statement_parses_to("CREATE TABLE foo (abc INT DEFAULT (42 NOT NULL) NOT NULL, not_null BOOL MATERIALIZED (abc NOT NULL), CHECK (abc NOT NULL) )", canonical);

I think this would be the intention?

}

fn clickhouse() -> TestedDialects {
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
}
Expand Down
60 changes: 60 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16015,6 +16015,30 @@ fn parse_create_procedure_with_parameter_modes() {
}
}

#[test]
fn parse_not_null_supported() {
let _ = all_dialects().expr_parses_to("x NOT NULL", "x IS NOT NULL");
let _ = all_dialects().expr_parses_to("NULL NOT NULL", "NULL IS NOT NULL");
}

#[test]
fn test_not_null_precedence() {
assert_matches!(
all_dialects().expr_parses_to("NOT NULL NOT NULL", "NOT NULL IS NOT NULL"),
Expr::UnaryOp {
op: UnaryOperator::Not,
..
}
);
assert_matches!(
all_dialects().expr_parses_to("NOT x NOT NULL", "NOT x IS NOT NULL"),
Expr::UnaryOp {
op: UnaryOperator::Not,
..
}
);
}

#[test]
fn test_select_exclude() {
let dialects = all_dialects_where(|d| d.supports_select_wildcard_exclude());
Expand Down Expand Up @@ -16147,3 +16171,39 @@ fn test_identifier_unicode_support() {
]);
let _ = dialects.verified_stmt(sql);
}

#[test]
fn parse_notnull_unsupported() {
// Only Postgres, DuckDB, and SQLite support `x NOTNULL` as an expression
// All other dialects consider `x NOTNULL` like `x AS NOTNULL` and thus
// consider `NOTNULL` an alias for x.
let dialects = all_dialects_except(|d| d.supports_notnull_operator());
let _ = dialects
.verified_only_select_with_canonical("SELECT NULL NOTNULL", "SELECT NULL AS NOTNULL");
}

#[test]
fn parse_notnull_supported() {
// Postgres, DuckDB and SQLite support `x NOTNULL` as an alias for `x IS NOT NULL`
let dialects = all_dialects_where(|d| d.supports_notnull_operator());
let _ = dialects.expr_parses_to("x NOTNULL", "x IS NOT NULL");
}

#[test]
fn test_notnull_precedence() {
// For dialects which support it, `NOT NULL NOTNULL` should
// parse as `(NOT (NULL IS NOT NULL))`
let supported_dialects = all_dialects_where(|d| d.supports_notnull_operator());
let unsupported_dialects = all_dialects_except(|d| d.supports_notnull_operator());

assert_matches!(
supported_dialects.expr_parses_to("NOT NULL NOTNULL", "NOT NULL IS NOT NULL"),
Expr::UnaryOp {
op: UnaryOperator::Not,
..
}
);

// for unsupported dialects, parsing should stop at `NOT NULL`
unsupported_dialects.expr_parses_to("NOT NULL NOTNULL", "NOT NULL");
}