From b72a8ff42f53cba0517b1dd9e8af051b4a060ecf Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 10 Sep 2022 10:08:02 +0200 Subject: [PATCH] Allow any unicode character as identifier name (fixes #641). --- CHANGELOG | 1 + sqlparse/keywords.py | 2 +- tests/test_parse.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index e1b3ae63..a363b226 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,7 @@ Bug Fixes * Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand). * Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan). * Handle backtick as valid quote char (issue628, pr629, by codenamelxl). +* Allow any unicode character as valid identifier name (issue641). Other diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 3aa6c630..d73e1143 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -99,7 +99,7 @@ def is_keyword(value): (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), # Check for keywords, also returns tokens.Name if regex matches # but the match isn't a keyword. - (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword), + (r'[0-9_\w][_$#\w]*', is_keyword), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), diff --git a/tests/test_parse.py b/tests/test_parse.py index caba537e..ec327ac8 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -148,6 +148,7 @@ def test_quoted_identifier(): @pytest.mark.parametrize('name', [ 'foo', '_foo', # issue175 '1_data', # valid MySQL table name, see issue337 + '業者名稱', # valid at least for SQLite3, see issue641 ]) def test_valid_identifier_names(name): t = sqlparse.parse(name)[0].tokens