module Text.Razom.Lexer
( tokenizeString
, tokenizeText
, tokenizeFile
)
where
import Control.Monad (liftM)
import Data.Position
import qualified Data.Text.Lazy as T
import Text.Razom.Types
import Text.Regex.Applicative
import System.IO
tokenizeString :: Advance Char -> Regex t -> String -> LexResult t
tokenizeString adv re syms =
let depos = map $ \ (Positioned x pos) -> x
re' = many $ bless re
syms' = fst $ enrich adv syms
in case findLongestPrefix re' syms' of
Just (ts, []) ->
Right $ depos ts
Just (ts, Positioned c pos : ps) ->
Left $ LexError pos (depos ts) (c : depos ps)
Nothing ->
Left $ LexError firstPosition [] syms
tokenizeText :: Advance Char -> Regex t -> T.Text -> LexResult t
tokenizeText adv re = tokenizeString adv re . T.unpack
tokenizeFile :: Advance Char -> Regex t -> FilePath -> IO (LexResult t)
tokenizeFile adv re fp = liftM (tokenizeString adv re) $ readFile fp