From 9f961f1694ad43e9ca0416991313896021264777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabi=C3=A1n=20Heredia=20Montiel?= Date: Tue, 14 Apr 2020 23:17:01 -0500 Subject: [PATCH] Add unicode operator support for Mathematical Operators and Arrows --- src/Idris/Parser.idr | 34 +++++++++------ src/Parser/Lexer.idr | 66 ++++++++++++++++++----------- tests/Main.idr | 2 + tests/idris2/unicode001/Unicode.idr | 9 ++++ tests/idris2/unicode001/expected | 1 + tests/idris2/unicode001/run | 3 ++ 6 files changed, 78 insertions(+), 37 deletions(-) create mode 100644 tests/idris2/unicode001/Unicode.idr create mode 100644 tests/idris2/unicode001/expected create mode 100644 tests/idris2/unicode001/run diff --git a/src/Idris/Parser.idr b/src/Idris/Parser.idr index 1dc002349..89a1b0d48 100644 --- a/src/Idris/Parser.idr +++ b/src/Idris/Parser.idr @@ -39,6 +39,15 @@ export plhs : ParseOpts plhs = MkParseOpts False False +leftArrow : Rule () +leftArrow = symbol "<-" <|> symbol "←" + +rightArrow : Rule () +rightArrow = symbol "->" <|> symbol "→" + +doubleRightArrow : Rule () +doubleRightArrow = symbol "=>" <|> symbol "⇒" + atom : FileName -> Rule PTerm atom fname = do start <- location @@ -436,12 +445,11 @@ mutual bindSymbol : Rule (PiInfo PTerm) bindSymbol - = do symbol "->" + = do rightArrow pure Explicit - <|> do symbol "=>" + <|> do doubleRightArrow pure AutoImplicit - explicitPi : FileName -> IndentInfo -> Rule PTerm explicitPi fname indents = do start <- location @@ -461,7 +469,7 @@ mutual commit binders <- pibindList fname start indents symbol "}" - symbol "->" + rightArrow scope <- typeExpr pdef fname indents end <- location pure (pibindAll (MkFC fname start end) AutoImplicit binders scope) @@ -475,7 +483,7 @@ mutual t <- simpleExpr fname indents binders <- pibindList fname start indents symbol "}" - symbol "->" + rightArrow scope <- typeExpr pdef fname indents end <- location pure (pibindAll (MkFC fname start end) (DefImplicit t) binders scope) @@ -501,7 +509,7 @@ mutual symbol "{" binders <- pibindList fname start indents symbol "}" - symbol "->" + rightArrow scope <- typeExpr pdef fname indents end <- location pure (pibindAll (MkFC fname start end) Implicit binders scope) @@ -511,7 +519,7 @@ mutual = do start <- location symbol "\\" binders <- bindList fname start indents - symbol "=>" + doubleRightArrow mustContinue indents Nothing scope <- expr pdef fname indents end <- location @@ -614,7 +622,7 @@ mutual caseRHS : FileName -> FilePos -> IndentInfo -> PTerm -> Rule PClause caseRHS fname start indents lhs - = do symbol "=>" + = do doubleRightArrow mustContinue indents Nothing rhs <- expr pdef fname indents atEnd indents @@ -703,7 +711,7 @@ mutual -- If the name doesn't begin with a lower case letter, we should -- treat this as a pattern, so fail validPatternVar n - symbol "<-" + leftArrow val <- expr pdef fname indents atEnd indents end <- location @@ -729,7 +737,7 @@ mutual (do atEnd indents end <- location pure [DoExp (MkFC fname start end) e]) - <|> (do symbol "<-" + <|> (do leftArrow val <- expr pnowith fname indents alts <- block (patAlt fname) atEnd indents @@ -1196,7 +1204,7 @@ getRight (Right v) = Just v constraints : FileName -> IndentInfo -> EmptyRule (List (Maybe Name, PTerm)) constraints fname indents = do tm <- appExpr pdef fname indents - symbol "=>" + doubleRightArrow more <- constraints fname indents pure ((Nothing, tm) :: more) <|> do symbol "(" @@ -1204,7 +1212,7 @@ constraints fname indents symbol ":" tm <- expr pdef fname indents symbol ")" - symbol "=>" + doubleRightArrow more <- constraints fname indents pure ((Just n, tm) :: more) <|> pure [] @@ -1218,7 +1226,7 @@ implBinds fname indents symbol ":" tm <- expr pdef fname indents symbol "}" - symbol "->" + rightArrow more <- implBinds fname indents pure ((n, rig, tm) :: more) <|> pure [] diff --git a/src/Parser/Lexer.idr b/src/Parser/Lexer.idr index f2aa29d80..a3c385823 100644 --- a/src/Parser/Lexer.idr +++ b/src/Parser/Lexer.idr @@ -107,6 +107,39 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1 docComment : Lexer docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n') +-- +-- Symbols +-- + +export +isOpChar : Char -> Bool +isOpChar c = inLatin || inArrows || inMathematicalOperators + where + inRange : (Int, Int) -> Lazy Bool + inRange (lowerBound, upperBound) = (c >= chr lowerBound && c <= chr upperBound) + inLatin = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~") + inArrows = inRange (8592, 8703) + inMathematicalOperators = inRange (8704, 8959) + +nonOpCharUnicode : Char -> Bool +nonOpCharUnicode c = (c > chr 160) && not (isOpChar c) + + +validSymbol : Lexer +validSymbol = some (pred isOpChar) + +||| Special symbols - things which can't be a prefix of another symbol, and +||| don't match 'validSymbol' +export +symbols : List String +symbols + = [".(", -- for things such as Foo.Bar.(+) + "@{", + "[|", "|]", + "(", ")", "{", "}", "[", "]", ",", ";", "_", + "`(", "`"] + +-- -- Identifier Lexer -- There are multiple variants. @@ -114,14 +147,14 @@ data Flavour = Capitalised | AllowDashes | Normal isIdentStart : Flavour -> Char -> Bool isIdentStart _ '_' = True -isIdentStart Capitalised x = isUpper x || x > chr 160 -isIdentStart _ x = isAlpha x || x > chr 160 +isIdentStart Capitalised c = isUpper c || nonOpCharUnicode c +isIdentStart _ c = isAlpha c || nonOpCharUnicode c isIdentTrailing : Flavour -> Char -> Bool isIdentTrailing AllowDashes '-' = True isIdentTrailing _ '\'' = True isIdentTrailing _ '_' = True -isIdentTrailing _ x = isAlphaNum x || x > chr 160 +isIdentTrailing _ c = isAlphaNum c || nonOpCharUnicode c %inline isIdent : Flavour -> String -> Bool @@ -160,6 +193,10 @@ recField = is '.' <+> ident Normal pragma : Lexer pragma = is '%' <+> ident Normal +-- +-- Literal Lexers +-- + doubleLit : Lexer doubleLit = digits <+> is '.' <+> digits <+> opt @@ -194,32 +231,13 @@ keywords = ["data", "module", "where", "let", "in", "do", "record", special : List String special = ["%lam", "%pi", "%imppi", "%let"] --- Special symbols - things which can't be a prefix of another symbol, and --- don't match 'validSymbol' -export -symbols : List String -symbols - = [".(", -- for things such as Foo.Bar.(+) - "@{", - "[|", "|]", - "(", ")", "{", "}", "[", "]", ",", ";", "_", - "`(", "`"] - - -export -isOpChar : Char -> Bool -isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~") - -validSymbol : Lexer -validSymbol = some (pred isOpChar) - -- Valid symbols which have a special meaning so can't be operators export reservedSymbols : List String reservedSymbols = symbols ++ - ["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!", - "&", "**", ".."] + ["%", "\\", ":", "=", "|", "|||", "?", "!", "&", "**", + "..", "<-", "->", "=>", "←", "→", "⇒"] fromHexLit : String -> Integer fromHexLit str diff --git a/tests/Main.idr b/tests/Main.idr index 4ac00006b..c81e1f3be 100644 --- a/tests/Main.idr +++ b/tests/Main.idr @@ -51,6 +51,8 @@ idrisTests "literate001", "literate002", "literate003", "literate004", "literate005", "literate006", "literate007", "literate008", "literate009", "literate010", "literate011", "literate012", + -- Unicode + "unicode001", -- Interfaces "interface001", "interface002", "interface003", "interface004", "interface005", "interface006", "interface007", "interface008", diff --git a/tests/idris2/unicode001/Unicode.idr b/tests/idris2/unicode001/Unicode.idr new file mode 100644 index 000000000..caea1bd70 --- /dev/null +++ b/tests/idris2/unicode001/Unicode.idr @@ -0,0 +1,9 @@ +module Unicode + +infix 6 ≡, ≢ + +public export +interface UnicodeEq a where + (≡) : a → a → Bool + (≢) : a → a → Bool + diff --git a/tests/idris2/unicode001/expected b/tests/idris2/unicode001/expected new file mode 100644 index 000000000..0143f0f45 --- /dev/null +++ b/tests/idris2/unicode001/expected @@ -0,0 +1 @@ +1/1: Building Unicode (Unicode.idr) diff --git a/tests/idris2/unicode001/run b/tests/idris2/unicode001/run new file mode 100644 index 000000000..a6a0124a8 --- /dev/null +++ b/tests/idris2/unicode001/run @@ -0,0 +1,3 @@ +$1 -c Unicode.idr + +rm -rf build/