From bde0f0e695ea28148c823c6cdc3fd19cc9e5c469 Mon Sep 17 00:00:00 2001
From: Mikhail Vorozhtsov <mikhail.vorozhtsov@gmail.com>
Date: Tue, 12 Apr 2011 20:04:14 +0700
Subject: [PATCH] Allow sub/superscript symbols in identifiers and operators.
---
compiler/parser/Lexer.x | 69 ++++++++++++++++++++++++++++++++++++++--------
1 files changed, 57 insertions(+), 12 deletions(-)
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 9ae312c..7ea10d2 100644
|
a
|
b
|
|
| 108 | 108 | $small = [$ascsmall $unismall \_] |
| 109 | 109 | |
| 110 | 110 | $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar. |
| 111 | | $graphic = [$small $large $symbol $digit $special $unigraphic \:\"\'] |
| | 111 | $subsup = \x0E -- Trick Alex into handling Unicode. See alexGetChar. |
| | 112 | $graphic = [$small $large $symbol $digit $special $unigraphic $subsup |
| | 113 | \:\"\'] |
| 112 | 114 | |
| 113 | 115 | $octit = 0-7 |
| 114 | 116 | $hexit = [$decdigit A-F a-f] |
| 115 | | $symchar = [$symbol \:] |
| | 117 | $symchar = [$symbol $subsup \:] |
| 116 | 118 | $nl = [\n\r] |
| 117 | | $idchar = [$small $large $digit \'] |
| | 119 | $idchar = [$small $large $digit $subsup \'] |
| 118 | 120 | |
| 119 | 121 | $pragmachar = [$small $large $digit] |
| 120 | 122 | |
| … |
… |
|
| 1608 | 1610 | symbol = '\x4' |
| 1609 | 1611 | space = '\x5' |
| 1610 | 1612 | other_graphic = '\x6' |
| | 1613 | subsup = '\xE' |
| 1611 | 1614 | |
| 1612 | 1615 | adj_c |
| 1613 | | | c <= '\x06' = non_graphic |
| | 1616 | | c <= '\x06' || c == '\x0E' = non_graphic |
| 1614 | 1617 | | c <= '\x7f' = c |
| 1615 | 1618 | -- Alex doesn't handle Unicode, so when Unicode |
| 1616 | 1619 | -- character is encountered we output these values |
| … |
… |
|
| 1618 | 1621 | | otherwise = |
| 1619 | 1622 | case generalCategory c of |
| 1620 | 1623 | UppercaseLetter -> upper |
| 1621 | | LowercaseLetter -> lower |
| | 1624 | LowercaseLetter -> |
| | 1625 | if c == '\xAA' || c == '\xBA' || |
| | 1626 | (c >= '\x1D62' && c <= '\x1D6A') || |
| | 1627 | c == '\x2C7C' |
| | 1628 | then subsup |
| | 1629 | else lower |
| 1622 | 1630 | TitlecaseLetter -> upper |
| 1623 | | ModifierLetter -> other_graphic |
| | 1631 | ModifierLetter -> |
| | 1632 | if (c >= '\x02B0' && c <= '\x02B8') || |
| | 1633 | (c >= '\x02E0' && c <= '\x02E4') || |
| | 1634 | c == '\x10FC' || |
| | 1635 | (c >= '\x1D2C' && c <= '\x1D2E') || |
| | 1636 | (c >= '\x1D30' && c <= '\x1D3A') || |
| | 1637 | (c >= '\x1D3C' && c <= '\x1D4D') || |
| | 1638 | (c >= '\x1D4F' && c <= '\x1D61') || |
| | 1639 | c == '\x1D78' || |
| | 1640 | (c >= '\x1D9B' && c <= '\x1DBF') || |
| | 1641 | c == '\x2071' || c == '\x207F' || |
| | 1642 | (c >= '\x2090' && c <= '\x209C') || |
| | 1643 | c == '\x2C7D' || c == '\x2D6F' || c == '\xA770' |
| | 1644 | then subsup |
| | 1645 | else other_graphic |
| 1624 | 1646 | OtherLetter -> lower -- see #1103 |
| 1625 | 1647 | NonSpacingMark -> other_graphic |
| 1626 | 1648 | SpacingCombiningMark -> other_graphic |
| 1627 | 1649 | EnclosingMark -> other_graphic |
| 1628 | 1650 | DecimalNumber -> digit |
| 1629 | 1651 | LetterNumber -> other_graphic |
| 1630 | | OtherNumber -> digit -- see #4373 |
| | 1652 | OtherNumber -> |
| | 1653 | if c == '\xB2' || c == '\xB3' || c == '\xB9' || |
| | 1654 | (c >= '\x2070' && c <= '\x2079') || |
| | 1655 | (c >= '\x2080' && c <= '\x2089') || |
| | 1656 | (c >= '\x3192' && c <= '\x3195') |
| | 1657 | then subsup |
| | 1658 | else digit |
| 1631 | 1659 | ConnectorPunctuation -> symbol |
| 1632 | 1660 | DashPunctuation -> symbol |
| 1633 | | OpenPunctuation -> other_graphic |
| 1634 | | ClosePunctuation -> other_graphic |
| | 1661 | OpenPunctuation -> |
| | 1662 | if c == '\x207D' || c == '\x208D' |
| | 1663 | then subsup |
| | 1664 | else other_graphic |
| | 1665 | ClosePunctuation -> |
| | 1666 | if c == '\x207E' || c == '\x208E' |
| | 1667 | then subsup |
| | 1668 | else other_graphic |
| 1635 | 1669 | InitialQuote -> other_graphic |
| 1636 | 1670 | FinalQuote -> other_graphic |
| 1637 | | OtherPunctuation -> symbol |
| 1638 | | MathSymbol -> symbol |
| | 1671 | OtherPunctuation -> |
| | 1672 | if (c >= '\x2032' && c <= '\x2034') || c == '\x2057' |
| | 1673 | then subsup |
| | 1674 | else symbol |
| | 1675 | MathSymbol -> |
| | 1676 | if (c >= '\x207A' && c <= '\x207C') || |
| | 1677 | (c >= '\x208A' && c <= '\x208C') |
| | 1678 | then subsup |
| | 1679 | else symbol |
| 1639 | 1680 | CurrencySymbol -> symbol |
| 1640 | 1681 | ModifierSymbol -> symbol |
| 1641 | | OtherSymbol -> symbol |
| | 1682 | OtherSymbol -> |
| | 1683 | if c == '\x2120' || c == '\x2122' || |
| | 1684 | (c >= '\x3196' && c <= '\x319F') |
| | 1685 | then subsup |
| | 1686 | else symbol |
| 1642 | 1687 | Space -> space |
| 1643 | 1688 | _other -> non_graphic |
| 1644 | 1689 | |