{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
module Skylighting.Parser ( parseSyntaxDefinition
                          , parseSyntaxDefinitionFromText
                          , addSyntaxDefinition
                          , resolveKeywords
                          , missingIncludes
                          ) where

import qualified Data.String as String
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Char (isAlphaNum, toUpper)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.Text.Read as TR
import qualified Data.Text.Encoding as TE
import Safe
import Skylighting.Regex
import Skylighting.Types
import System.FilePath
import Text.XML
import qualified Control.Exception as E
import Control.Monad.Trans.Except
import Control.Monad.Error.Class
import Control.Monad.Identity
#if MIN_VERSION_mtl(2,3,0)
import Control.Monad
#endif

-- | Adds a syntax definition to a syntax map,
-- replacing any existing definition with the same name.
addSyntaxDefinition :: Syntax -> SyntaxMap -> SyntaxMap
addSyntaxDefinition :: Syntax -> SyntaxMap -> SyntaxMap
addSyntaxDefinition Syntax
s = forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert (Syntax -> Text
sName Syntax
s) Syntax
s

-- | Scan a list of 'Syntax's and make sure that
-- `IncludeRules` never asks for a syntax not in this
-- list.  Produces a list of pairs where the first
-- element is the including syntax name and the second
-- element is the (missing) included syntax name.
-- This is intended for sanity checks to avoid run-time
-- errors.
missingIncludes :: [Syntax] -> [(Text, Text)]
missingIncludes :: [Syntax] -> [(Text, Text)]
missingIncludes [Syntax]
syns = forall a. Ord a => [a] -> [a]
ordNub
  [(Syntax -> Text
sName Syntax
s, Text
lang)
     | Syntax
s <- [Syntax]
syns
     , Context
c <- forall k a. Map k a -> [a]
M.elems (Syntax -> Map Text Context
sContexts Syntax
s)
     , IncludeRules (Text
lang, Text
_) <- forall a b. (a -> b) -> [a] -> [b]
map Rule -> Matcher
rMatcher (Context -> [Rule]
cRules Context
c)
     , Bool -> Bool
not (Text
lang forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
syntaxNames)]
   where syntaxNames :: Set Text
syntaxNames = forall a. Ord a => [a] -> Set a
Set.fromList forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Syntax -> Text
sName [Syntax]
syns

ordNub :: (Ord a) => [a] -> [a]
ordNub :: forall a. Ord a => [a] -> [a]
ordNub [a]
l = forall {a}. Ord a => Set a -> [a] -> [a]
go forall a. Set a
Set.empty [a]
l
  where
    go :: Set a -> [a] -> [a]
go Set a
_ [] = []
    go Set a
s (a
x:[a]
xs) = if a
x forall a. Ord a => a -> Set a -> Bool
`Set.member` Set a
s then Set a -> [a] -> [a]
go Set a
s [a]
xs
                                      else a
x forall a. a -> [a] -> [a]
: Set a -> [a] -> [a]
go (forall a. Ord a => a -> Set a -> Set a
Set.insert a
x Set a
s) [a]
xs

standardDelims :: Set.Set Char
standardDelims :: Set Char
standardDelims = forall a. Ord a => [a] -> Set a
Set.fromList [Char]
" \n\t.():!+,-<=>%&*/;?[]^{|}~\\"

defaultKeywordAttr :: KeywordAttr
defaultKeywordAttr :: KeywordAttr
defaultKeywordAttr = KeywordAttr { keywordCaseSensitive :: Bool
keywordCaseSensitive = Bool
True
                                 , keywordDelims :: Set Char
keywordDelims = Set Char
standardDelims }

vBool :: Bool -> Text -> Bool
vBool :: Bool -> Text -> Bool
vBool Bool
defaultVal Text
value = case Text
value of
                           Text
"true"  -> Bool
True
                           Text
"yes"   -> Bool
True
                           Text
"1"     -> Bool
True
                           Text
"false" -> Bool
False
                           Text
"no"    -> Bool
False
                           Text
"0"     -> Bool
False
                           Text
_       -> Bool
defaultVal

-- | Parses a file containing a Kate XML syntax definition
-- into a 'Syntax' description.
parseSyntaxDefinition :: FilePath -> IO (Either String Syntax)
parseSyntaxDefinition :: [Char] -> IO (Either [Char] Syntax)
parseSyntaxDefinition [Char]
fp = do
  ByteString
bs <- [Char] -> IO ByteString
BL.readFile [Char]
fp
  case ParseSettings -> Text -> Either SomeException Document
parseText forall a. Default a => a
def (ByteString -> Text
toTextLazy ByteString
bs) of
    Left SomeException
e    -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall e. Exception e => e -> [Char]
E.displayException SomeException
e
    Right Document
doc -> forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT (forall (m :: * -> *).
Monad m =>
[Char] -> Document -> ExceptT [Char] m Syntax
documentToSyntax [Char]
fp Document
doc)
 where
  toTextLazy :: ByteString -> Text
toTextLazy = ByteString -> Text
TLE.decodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
filterCRs forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
dropBOM
  dropBOM :: ByteString -> ByteString
dropBOM ByteString
bs =
         if ByteString
"\xEF\xBB\xBF" ByteString -> ByteString -> Bool
`BL.isPrefixOf` ByteString
bs
            then Int64 -> ByteString -> ByteString
BL.drop Int64
3 ByteString
bs
            else ByteString
bs
  filterCRs :: ByteString -> ByteString
filterCRs = (Char -> Bool) -> ByteString -> ByteString
BL.filter (forall a. Eq a => a -> a -> Bool
/=Char
'\r')

parseSyntaxDefinitionFromText ::
  FilePath -> TL.Text -> Either String Syntax
parseSyntaxDefinitionFromText :: [Char] -> Text -> Either [Char] Syntax
parseSyntaxDefinitionFromText [Char]
fp Text
xml =
    case ParseSettings -> Text -> Either SomeException Document
parseText forall a. Default a => a
def Text
xml of
      Left SomeException
e    -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall e. Exception e => e -> [Char]
E.displayException SomeException
e
      Right Document
doc -> forall a. Identity a -> a
runIdentity forall a b. (a -> b) -> a -> b
$ forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *).
Monad m =>
[Char] -> Document -> ExceptT [Char] m Syntax
documentToSyntax [Char]
fp Document
doc

-- | Resolve Keyword matchers that refer to lists; following up
-- include directives in the syntax map and producing WordSets.
resolveKeywords :: SyntaxMap -> Syntax -> Syntax
resolveKeywords :: SyntaxMap -> Syntax -> Syntax
resolveKeywords SyntaxMap
sm = Syntax -> Syntax
goSyntax
 where
   goSyntax :: Syntax -> Syntax
goSyntax Syntax
syntax = Syntax
syntax{ sContexts :: Map Text Context
sContexts = forall a b k. (a -> b) -> Map k a -> Map k b
M.map (forall {t :: * -> *}.
Foldable t =>
Map Text (t ListItem) -> Context -> Context
goContext (Syntax -> Map Text [ListItem]
sLists Syntax
syntax))
                                                 (Syntax -> Map Text Context
sContexts Syntax
syntax) }
   goContext :: Map Text (t ListItem) -> Context -> Context
goContext Map Text (t ListItem)
lists Context
context = Context
context{ cRules :: [Rule]
cRules = forall a b. (a -> b) -> [a] -> [b]
map (forall {t :: * -> *}.
Foldable t =>
Map Text (t ListItem) -> Rule -> Rule
goRule Map Text (t ListItem)
lists)
                                                 (Context -> [Rule]
cRules Context
context) }
   goRule :: Map Text (t ListItem) -> Rule -> Rule
goRule Map Text (t ListItem)
lists Rule
rule =
     case Rule -> Matcher
rMatcher Rule
rule of
        Keyword KeywordAttr
kwattr (Left Text
listname) ->
          case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
listname Map Text (t ListItem)
lists of
            Maybe (t ListItem)
Nothing -> Rule
rule
            Just t ListItem
lst -> Rule
rule{ rMatcher :: Matcher
rMatcher =
             KeywordAttr -> Either Text (WordSet Text) -> Matcher
Keyword KeywordAttr
kwattr (forall a b. b -> Either a b
Right (forall a. (FoldCase a, Ord a) => Bool -> [a] -> WordSet a
makeWordSet (KeywordAttr -> Bool
keywordCaseSensitive KeywordAttr
kwattr)
                                      (forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ListItem -> [Text] -> [Text]
goItem [] t ListItem
lst))) }
        Matcher
_ -> Rule
rule

   goItem :: ListItem -> [Text] -> [Text]
goItem (Item Text
t) [Text]
ts = Text
tforall a. a -> [a] -> [a]
:[Text]
ts
   goItem (IncludeList (Text
syntaxname,Text
listname)) [Text]
ts =
     case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
syntaxname SyntaxMap
sm forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
listname forall b c a. (b -> c) -> (a -> b) -> a -> c
. Syntax -> Map Text [ListItem]
sLists of
       Maybe [ListItem]
Nothing -> [Text]
ts
       Just [ListItem]
lst -> forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ListItem -> [Text] -> [Text]
goItem [Text]
ts [ListItem]
lst

-- | Parses an XML 'Document' as a 'Syntax' description.
documentToSyntax :: Monad m
                 => FilePath -- ^ used for short name
                 -> Document
                 -> ExceptT String m Syntax
documentToSyntax :: forall (m :: * -> *).
Monad m =>
[Char] -> Document -> ExceptT [Char] m Syntax
documentToSyntax [Char]
fp Document{ documentRoot :: Document -> Element
documentRoot = Element
rootEl } = do
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Element -> Name
elementName Element
rootEl forall a. Eq a => a -> a -> Bool
== Name
"language") forall a b. (a -> b) -> a -> b
$
    forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"Root element is not language"
  let filename :: [Char]
filename = [Char] -> [Char]
takeFileName [Char]
fp
  let casesensitive :: Bool
casesensitive = Bool -> Text -> Bool
vBool Bool
True forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"casesensitive" Element
rootEl

  Element
hlEl <- case [Char] -> Element -> [Element]
getElementsNamed [Char]
"highlighting" Element
rootEl of
            []      -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"No highlighting element"
            (Element
hl:[Element]
_)  -> forall (m :: * -> *) a. Monad m => a -> m a
return Element
hl

  Map Text [ListItem]
lists <- forall k a. Ord k => [(k, a)] -> Map k a
M.fromList forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall (m :: * -> *).
Monad m =>
Element -> ExceptT [Char] m (Text, [ListItem])
getList ([Char] -> Element -> [Element]
getElementsNamed [Char]
"list" Element
hlEl)

  let itemDatas :: ItemData
itemDatas = Element -> ItemData
getItemData Element
hlEl

  let defKeywordAttr :: KeywordAttr
defKeywordAttr = Element -> KeywordAttr
getKeywordAttrs Element
rootEl

  let contextEls :: [Element]
contextEls = [Char] -> Element -> [Element]
getElementsNamed [Char]
"contexts" Element
hlEl forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                   [Char] -> Element -> [Element]
getElementsNamed [Char]
"context"

  let syntaxname :: Text
syntaxname = [Char] -> Element -> Text
getAttrValue [Char]
"name" Element
rootEl

  [Context]
contexts <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM
    (forall (m :: * -> *).
Monad m =>
Bool
-> Text
-> ItemData
-> Map Text [ListItem]
-> KeywordAttr
-> Element
-> ExceptT [Char] m Context
getContext Bool
casesensitive Text
syntaxname ItemData
itemDatas Map Text [ListItem]
lists KeywordAttr
defKeywordAttr)
    [Element]
contextEls

  Text
startingContext <- case [Context]
contexts of
                       (Context
c:[Context]
_) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Context -> Text
cName Context
c
                       []    -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"No contexts"

  forall (m :: * -> *) a. Monad m => a -> m a
return Syntax{
             sName :: Text
sName       = [Char] -> Element -> Text
getAttrValue [Char]
"name" Element
rootEl
           , sFilename :: [Char]
sFilename   = [Char]
filename
           , sShortname :: Text
sShortname  = [Char] -> Text
T.pack forall a b. (a -> b) -> a -> b
$ [Char] -> [Char]
pathToLangName [Char]
filename
           , sAuthor :: Text
sAuthor     = [Char] -> Element -> Text
getAttrValue [Char]
"author" Element
rootEl
           , sVersion :: Text
sVersion    = [Char] -> Element -> Text
getAttrValue [Char]
"version" Element
rootEl
           , sLicense :: Text
sLicense    = [Char] -> Element -> Text
getAttrValue [Char]
"license" Element
rootEl
           , sExtensions :: [[Char]]
sExtensions = [Char] -> [[Char]]
words forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> if Char
c forall a. Eq a => a -> a -> Bool
== Char
';' then Char
' ' else Char
c)
                                 forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack
                                 forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"extensions" Element
rootEl
           , sLists :: Map Text [ListItem]
sLists      = Map Text [ListItem]
lists
           , sContexts :: Map Text Context
sContexts   = forall k a. Ord k => [(k, a)] -> Map k a
M.fromList
                  [(Context -> Text
cName Context
c, Context
c) | Context
c <- [Context]
contexts]
           , sStartingContext :: Text
sStartingContext = Text
startingContext
           }

elementNamed :: String -> Node -> Bool
elementNamed :: [Char] -> Node -> Bool
elementNamed [Char]
name (NodeElement Element
el) = Element -> Name
elementName Element
el forall a. Eq a => a -> a -> Bool
== forall a. IsString a => [Char] -> a
String.fromString [Char]
name
elementNamed [Char]
_ Node
_ = Bool
False

getElementsNamed :: String -> Element -> [Element]
getElementsNamed :: [Char] -> Element -> [Element]
getElementsNamed [Char]
name Element
node =
  [Element
el | NodeElement Element
el <- forall a. (a -> Bool) -> [a] -> [a]
filter ([Char] -> Node -> Bool
elementNamed [Char]
name) (Element -> [Node]
elementNodes Element
node)]

getAttrValue :: String -> Element -> Text
getAttrValue :: [Char] -> Element -> Text
getAttrValue [Char]
key Element
el = forall a. a -> Maybe a -> a
fromMaybe forall a. Monoid a => a
mempty forall a b. (a -> b) -> a -> b
$ forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (forall a. IsString a => [Char] -> a
String.fromString [Char]
key)
                                       forall a b. (a -> b) -> a -> b
$ Element -> Map Name Text
elementAttributes Element
el

getTextContent :: Element -> Text
getTextContent :: Element -> Text
getTextContent Element
el =
  forall a. Monoid a => [a] -> a
mconcat [Text
t | NodeContent Text
t <- Element -> [Node]
elementNodes Element
el]

getList :: Monad m => Element -> ExceptT String m (Text, [ListItem])
getList :: forall (m :: * -> *).
Monad m =>
Element -> ExceptT [Char] m (Text, [ListItem])
getList Element
el = do
  case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"name" (Element -> Map Name Text
elementAttributes Element
el) of
    Maybe Text
Nothing   -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError [Char]
"No name attribute on list"
    Just Text
name -> (Text
name,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                   forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall {m :: * -> *}. MonadError [Char] m => Element -> m ListItem
toListItem [Element
el' | NodeElement Element
el' <- Element -> [Node]
elementNodes Element
el]
 where
   toListItem :: Element -> m ListItem
toListItem Element
el' = case Element -> Name
elementName Element
el' of
                      Name
"item"    -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> ListItem
Item forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip forall a b. (a -> b) -> a -> b
$ Element -> Text
getTextContent Element
el'
                      Name
"include" -> do
                        let (Text
syntaxname, Text
listname) =
                                Text -> (Text, Text)
splitContext (Element -> Text
getTextContent Element
el')
                        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Text, Text) -> ListItem
IncludeList (Text
syntaxname, Text
listname)
                      Name
x -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ [Char]
"Unknown element " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Name
x forall a. [a] -> [a] -> [a]
++
                                        [Char]
" in list"

splitContext :: Text -> (Text, Text)
splitContext :: Text -> (Text, Text)
splitContext Text
t =
  case Text -> Text -> (Text, Text)
T.breakOn Text
"##" (Text -> Text
T.strip Text
t) of
    (Text
x, Text
y) | Text -> Bool
T.null Text
y  -> (Text
"", Text
x)
           | Bool
otherwise -> (Int -> Text -> Text
T.drop Int
2 Text
y, Text
x)

getParser :: Monad m
          => Bool -> Text -> ItemData -> M.Map Text [ListItem] -> KeywordAttr
          -> Text -> Element -> ExceptT String m Rule
getParser :: forall (m :: * -> *).
Monad m =>
Bool
-> Text
-> ItemData
-> Map Text [ListItem]
-> KeywordAttr
-> Text
-> Element
-> ExceptT [Char] m Rule
getParser Bool
casesensitive Text
syntaxname ItemData
itemdatas Map Text [ListItem]
lists KeywordAttr
kwattr Text
cattr Element
el = do
  let name :: Text
name = Name -> Text
nameLocalName forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Name
elementName forall a b. (a -> b) -> a -> b
$ Element
el
  let attribute :: Text
attribute = [Char] -> Element -> Text
getAttrValue [Char]
"attribute" Element
el
  let context :: Text
context = [Char] -> Element -> Text
getAttrValue [Char]
"context" Element
el
  let char0 :: Char
char0 = Text -> Char
readChar forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"char" Element
el
  let char1 :: Char
char1 = Text -> Char
readChar forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"char1" Element
el
  let str' :: Text
str' = [Char] -> Element -> Text
getAttrValue [Char]
"String" Element
el
  let insensitive :: Bool
insensitive = Bool -> Text -> Bool
vBool (Bool -> Bool
not Bool
casesensitive) forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"insensitive" Element
el
  let includeAttrib :: Bool
includeAttrib = Bool -> Text -> Bool
vBool Bool
False forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"includeAttrib" Element
el
  let lookahead :: Bool
lookahead = Bool -> Text -> Bool
vBool Bool
False forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"lookAhead" Element
el
  let firstNonSpace :: Bool
firstNonSpace = Bool -> Text -> Bool
vBool Bool
False forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"firstNonSpace" Element
el
  let column' :: Text
column' = [Char] -> Element -> Text
getAttrValue [Char]
"column" Element
el
  let dynamic :: Bool
dynamic = Bool -> Text -> Bool
vBool Bool
False forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"dynamic" Element
el
  [Rule]
children <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall (m :: * -> *).
Monad m =>
Bool
-> Text
-> ItemData
-> Map Text [ListItem]
-> KeywordAttr
-> Text
-> Element
-> ExceptT [Char] m Rule
getParser Bool
casesensitive
                    Text
syntaxname ItemData
itemdatas Map Text [ListItem]
lists KeywordAttr
kwattr Text
attribute)
                  [Element
e | NodeElement Element
e <- Element -> [Node]
elementNodes Element
el ]
  let tildeRegex :: Bool
tildeRegex = Text
name forall a. Eq a => a -> a -> Bool
== Text
"RegExpr" Bool -> Bool -> Bool
&& Int -> Text -> Text
T.take Int
1 Text
str' forall a. Eq a => a -> a -> Bool
== Text
"^"
  let str :: Text
str = if Bool
tildeRegex then Int -> Text -> Text
T.drop Int
1 Text
str' else Text
str'
  let column :: Maybe Int
column = if Bool
tildeRegex
                  then forall a. a -> Maybe a
Just (Int
0 :: Int)
                  else forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\[Char]
_ -> forall a. Maybe a
Nothing) (forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall a b. (a -> b) -> a -> b
$
                         forall a. Integral a => Reader a
TR.decimal Text
column'
  let re :: Matcher
re = RE -> Matcher
RegExpr RE{ reString :: ByteString
reString = Text -> ByteString
TE.encodeUtf8 Text
str
                     , reCaseSensitive :: Bool
reCaseSensitive = Bool -> Bool
not Bool
insensitive }
  let (Text
incsyntax, Text
inccontext) =
          case Text -> Text -> (Text, Text)
T.breakOn Text
"##" Text
context of
                (Text
_,Text
x) | Text -> Bool
T.null Text
x -> (Text
syntaxname, Text
context)
                (Text
cont, Text
lang)     -> (Int -> Text -> Text
T.drop Int
2 Text
lang, Text
cont)
  Matcher
matcher <- case Text
name of
                 Text
"DetectChar" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Matcher
DetectChar Char
char0
                 Text
"Detect2Chars" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Char -> Matcher
Detect2Chars Char
char0 Char
char1
                 Text
"AnyChar" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Set Char -> Matcher
AnyChar forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> Set a
Set.fromList forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
str
                 Text
"RangeDetect" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Char -> Matcher
RangeDetect Char
char0 Char
char1
                 Text
"StringDetect" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Matcher
StringDetect Text
str
                 Text
"WordDetect" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Matcher
WordDetect Text
str
                 Text
"RegExpr" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
re
                 Text
"keyword" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ KeywordAttr -> Either Text (WordSet Text) -> Matcher
Keyword KeywordAttr
kwattr (forall a b. a -> Either a b
Left Text
str)
                 Text
"Int" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
Int
                 Text
"Float" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
Float
                 Text
"HlCOct" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
HlCOct
                 Text
"HlCHex" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
HlCHex
                 Text
"HlCStringChar" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
HlCStringChar
                 Text
"HlCChar" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
HlCChar
                 Text
"LineContinue" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
LineContinue
                 Text
"IncludeRules" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
                   (Text, Text) -> Matcher
IncludeRules (Text
incsyntax, Text
inccontext)
                 Text
"DetectSpaces" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
DetectSpaces
                 Text
"DetectIdentifier" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Matcher
DetectIdentifier
                 Text
_ -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ [Char]
"Unknown element " forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name

  let contextSwitch :: [ContextSwitch]
contextSwitch = if Text
name forall a. Eq a => a -> a -> Bool
== Text
"IncludeRules"
                         then []  -- is this right?
                         else Text -> Text -> [ContextSwitch]
parseContextSwitch Text
incsyntax Text
inccontext
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Rule{ rMatcher :: Matcher
rMatcher = Matcher
matcher
               , rAttribute :: TokenType
rAttribute = forall a. a -> Maybe a -> a
fromMaybe TokenType
NormalTok forall a b. (a -> b) -> a -> b
$
                    if Text -> Bool
T.null Text
attribute
                       then forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
cattr ItemData
itemdatas
                       else forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
attribute ItemData
itemdatas
               , rIncludeAttribute :: Bool
rIncludeAttribute = Bool
includeAttrib
               , rDynamic :: Bool
rDynamic = Bool
dynamic
               , rCaseSensitive :: Bool
rCaseSensitive = Bool -> Bool
not Bool
insensitive
               , rChildren :: [Rule]
rChildren = [Rule]
children
               , rContextSwitch :: [ContextSwitch]
rContextSwitch = [ContextSwitch]
contextSwitch
               , rLookahead :: Bool
rLookahead = Bool
lookahead
               , rFirstNonspace :: Bool
rFirstNonspace = Bool
firstNonSpace
               , rColumn :: Maybe Int
rColumn = Maybe Int
column
               }


getContext :: Monad m
           => Bool
           -> Text
           -> ItemData
           -> M.Map Text [ListItem]
           -> KeywordAttr
           -> Element
           -> ExceptT String m Context
getContext :: forall (m :: * -> *).
Monad m =>
Bool
-> Text
-> ItemData
-> Map Text [ListItem]
-> KeywordAttr
-> Element
-> ExceptT [Char] m Context
getContext Bool
casesensitive Text
syntaxname ItemData
itemDatas Map Text [ListItem]
lists KeywordAttr
kwattr Element
el = do
  let name :: Text
name = [Char] -> Element -> Text
getAttrValue [Char]
"name" Element
el
  let attribute :: Text
attribute = [Char] -> Element -> Text
getAttrValue [Char]
"attribute" Element
el
  let lineEmptyContext :: Text
lineEmptyContext = [Char] -> Element -> Text
getAttrValue [Char]
"lineEmptyContext" Element
el
  let lineEndContext :: Text
lineEndContext = [Char] -> Element -> Text
getAttrValue [Char]
"lineEndContext" Element
el
  let lineBeginContext :: Text
lineBeginContext = [Char] -> Element -> Text
getAttrValue [Char]
"lineBeginContext" Element
el
  let fallthrough :: Bool
fallthrough = Bool -> Text -> Bool
vBool Bool
False forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"fallthrough" Element
el
  let fallthroughContext :: Text
fallthroughContext = [Char] -> Element -> Text
getAttrValue [Char]
"fallthroughContext" Element
el
  let dynamic :: Bool
dynamic = Bool -> Text -> Bool
vBool Bool
False forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"dynamic" Element
el

  [Rule]
parsers <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall (m :: * -> *).
Monad m =>
Bool
-> Text
-> ItemData
-> Map Text [ListItem]
-> KeywordAttr
-> Text
-> Element
-> ExceptT [Char] m Rule
getParser Bool
casesensitive
                    Text
syntaxname ItemData
itemDatas Map Text [ListItem]
lists KeywordAttr
kwattr Text
attribute)
                  [Element
e | NodeElement Element
e <- Element -> [Node]
elementNodes Element
el ]

  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Context {
            cName :: Text
cName = Text
name
          , cSyntax :: Text
cSyntax = Text
syntaxname
          , cRules :: [Rule]
cRules = [Rule]
parsers
          , cAttribute :: TokenType
cAttribute = forall a. a -> Maybe a -> a
fromMaybe TokenType
NormalTok forall a b. (a -> b) -> a -> b
$ forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
attribute ItemData
itemDatas
          , cLineEmptyContext :: [ContextSwitch]
cLineEmptyContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
lineEmptyContext
          , cLineEndContext :: [ContextSwitch]
cLineEndContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
lineEndContext
          , cLineBeginContext :: [ContextSwitch]
cLineBeginContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
lineBeginContext
          , cFallthrough :: Bool
cFallthrough = Bool
fallthrough
          , cFallthroughContext :: [ContextSwitch]
cFallthroughContext =
               Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
fallthroughContext
          , cDynamic :: Bool
cDynamic = Bool
dynamic
          }

getItemData :: Element -> ItemData
getItemData :: Element -> ItemData
getItemData Element
el = [(Text, Text)] -> ItemData
toItemDataTable forall a b. (a -> b) -> a -> b
$
  [([Char] -> Element -> Text
getAttrValue [Char]
"name" Element
e, [Char] -> Element -> Text
getAttrValue [Char]
"defStyleNum" Element
e)
    | Element
e <- ([Char] -> Element -> [Element]
getElementsNamed [Char]
"itemDatas" Element
el forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> Element -> [Element]
getElementsNamed [Char]
"itemData")
  ]

getKeywordAttrs :: Element -> KeywordAttr
getKeywordAttrs :: Element -> KeywordAttr
getKeywordAttrs Element
el =
  case ([Char] -> Element -> [Element]
getElementsNamed [Char]
"general" Element
el forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> Element -> [Element]
getElementsNamed [Char]
"keywords") of
     []    -> KeywordAttr
defaultKeywordAttr
     (Element
x:[Element]
_) ->
       let weakDelim :: [Char]
weakDelim = Text -> [Char]
T.unpack forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"weakDeliminator" Element
x
           additionalDelim :: [Char]
additionalDelim = Text -> [Char]
T.unpack forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"additionalDeliminator" Element
x
        in KeywordAttr { keywordCaseSensitive :: Bool
keywordCaseSensitive =
                             Bool -> Text -> Bool
vBool Bool
True forall a b. (a -> b) -> a -> b
$ [Char] -> Element -> Text
getAttrValue [Char]
"casesensitive" Element
x
                       , keywordDelims :: Set Char
keywordDelims = forall a. Ord a => Set a -> Set a -> Set a
Set.union Set Char
standardDelims
                           (forall a. Ord a => [a] -> Set a
Set.fromList [Char]
additionalDelim)
                             forall a. Ord a => Set a -> Set a -> Set a
Set.\\ forall a. Ord a => [a] -> Set a
Set.fromList [Char]
weakDelim }

parseContextSwitch :: Text -> Text -> [ContextSwitch]
parseContextSwitch :: Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
t =
  if Text -> Bool
T.null Text
t Bool -> Bool -> Bool
|| Text
t forall a. Eq a => a -> a -> Bool
== Text
"#stay"
     then []
     else
       case Text -> Text -> Maybe Text
T.stripPrefix Text
"#pop" Text
t of
         Just Text
rest -> ContextSwitch
Pop forall a. a -> [a] -> [a]
: Text -> Text -> [ContextSwitch]
parseContextSwitch Text
syntaxname Text
rest
         Maybe Text
Nothing   ->
           let (Text
othersyntax, Text
contextname) =
                  Text -> (Text, Text)
splitContext ((Char -> Bool) -> Text -> Text
T.dropWhile (forall a. Eq a => a -> a -> Bool
==Char
'!') Text
t)
               syntaxname' :: Text
syntaxname' = if Text -> Bool
T.null Text
othersyntax
                                then Text
syntaxname
                                else Text
othersyntax
            in [(Text, Text) -> ContextSwitch
Push (Text
syntaxname', Text
contextname)]

type ItemData = M.Map Text TokenType

toItemDataTable :: [(Text, Text)] -> ItemData
toItemDataTable :: [(Text, Text)] -> ItemData
toItemDataTable = forall k a. Ord k => [(k, a)] -> Map k a
M.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\(Text
s,Text
t) -> (Text
s, Text -> TokenType
toTokenType Text
t))

toTokenType :: Text -> TokenType
toTokenType :: Text -> TokenType
toTokenType Text
t =
  case Text
t of
    Text
"dsNormal"         -> TokenType
NormalTok
    Text
"dsKeyword"        -> TokenType
KeywordTok
    Text
"dsDataType"       -> TokenType
DataTypeTok
    Text
"dsDecVal"         -> TokenType
DecValTok
    Text
"dsBaseN"          -> TokenType
BaseNTok
    Text
"dsFloat"          -> TokenType
FloatTok
    Text
"dsConstant"       -> TokenType
ConstantTok
    Text
"dsChar"           -> TokenType
CharTok
    Text
"dsSpecialChar"    -> TokenType
SpecialCharTok
    Text
"dsString"         -> TokenType
StringTok
    Text
"dsVerbatimString" -> TokenType
VerbatimStringTok
    Text
"dsSpecialString"  -> TokenType
SpecialStringTok
    Text
"dsImport"         -> TokenType
ImportTok
    Text
"dsComment"        -> TokenType
CommentTok
    Text
"dsDocumentation"  -> TokenType
DocumentationTok
    Text
"dsAnnotation"     -> TokenType
AnnotationTok
    Text
"dsCommentVar"     -> TokenType
CommentVarTok
    Text
"dsOthers"         -> TokenType
OtherTok
    Text
"dsFunction"       -> TokenType
FunctionTok
    Text
"dsVariable"       -> TokenType
VariableTok
    Text
"dsControlFlow"    -> TokenType
ControlFlowTok
    Text
"dsOperator"       -> TokenType
OperatorTok
    Text
"dsBuiltIn"        -> TokenType
BuiltInTok
    Text
"dsExtension"      -> TokenType
ExtensionTok
    Text
"dsPreprocessor"   -> TokenType
PreprocessorTok
    Text
"dsAttribute"      -> TokenType
AttributeTok
    Text
"dsRegionMarker"   -> TokenType
RegionMarkerTok
    Text
"dsInformation"    -> TokenType
InformationTok
    Text
"dsWarning"        -> TokenType
WarningTok
    Text
"dsAlert"          -> TokenType
AlertTok
    Text
"dsError"          -> TokenType
ErrorTok
    Text
_                  -> TokenType
NormalTok

-- Note, some xml files have "\\" for a backslash,
-- others have "\".  Not sure what the rules are, but
-- this covers both bases:
readChar :: Text -> Char
readChar :: Text -> Char
readChar Text
t = case Text -> [Char]
T.unpack Text
t of
                  [Char
c] -> Char
c
                  [Char]
s   -> forall a. Read a => a -> [Char] -> a
readDef Char
'\xffff' forall a b. (a -> b) -> a -> b
$ [Char]
"'" forall a. [a] -> [a] -> [a]
++ [Char]
s forall a. [a] -> [a] -> [a]
++ [Char]
"'"

pathToLangName :: String -> String
pathToLangName :: [Char] -> [Char]
pathToLangName [Char]
s = [Char] -> [Char]
capitalize ([Char] -> [Char]
camelize ([Char] -> [Char]
takeBaseName [Char]
s))

camelize :: String -> String
camelize :: [Char] -> [Char]
camelize (Char
d:Char
c:[Char]
cs) | Bool -> Bool
not (Char -> Bool
isAlphaNum Char
d) = Char -> Char
toUpper Char
c forall a. a -> [a] -> [a]
: [Char] -> [Char]
camelize [Char]
cs
camelize (Char
c:[Char]
cs)   = Char
c forall a. a -> [a] -> [a]
: [Char] -> [Char]
camelize [Char]
cs
camelize []       = []

capitalize :: String -> String
capitalize :: [Char] -> [Char]
capitalize (Char
c:[Char]
cs) = Char -> Char
toUpper Char
c forall a. a -> [a] -> [a]
: [Char]
cs
capitalize []     = []