{- This file is part of razom-text-util. - - Written in 2015 by fr33domlover . - - ♡ Copying is an act of love. Please copy, reuse and share. - - The author(s) have dedicated all copyright and related and neighboring - rights to this software to the public domain worldwide. This software is - distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along - with this software. If not, see - . -} module Text.Razom.Lexer ( tokenizeString , tokenizeText , tokenizeFile ) where import Control.Monad (liftM) import Data.Position import qualified Data.Text.Lazy as T import Text.Razom.Types import Text.Regex.Applicative import System.IO tokenizeString :: Advance Char -> Regex t -> String -> LexResult t tokenizeString adv re syms = let depos = map $ \ (Positioned x pos) -> x re' = many $ bless re syms' = fst $ enrich adv syms in case findLongestPrefix re' syms' of Just (ts, []) -> Right $ depos ts Just (ts, Positioned c pos : ps) -> Left $ LexError pos (depos ts) (c : depos ps) Nothing -> Left $ LexError firstPosition [] syms tokenizeText :: Advance Char -> Regex t -> T.Text -> LexResult t tokenizeText adv re = tokenizeString adv re . T.unpack tokenizeFile :: Advance Char -> Regex t -> FilePath -> IO (LexResult t) tokenizeFile adv re fp = liftM (tokenizeString adv re) $ readFile fp