{- This file is part of razom-text-util.
 -
 - Written in 2015 by fr33domlover <fr33domlover@rel4tion.org>.
 -
 - ♡ Copying is an act of love. Please copy, reuse and share.
 -
 - The author(s) have dedicated all copyright and related and neighboring
 - rights to this software to the public domain worldwide. This software is
 - distributed without any warranty.
 -
 - You should have received a copy of the CC0 Public Domain Dedication along
 - with this software. If not, see
 - <http://creativecommons.org/publicdomain/zero/1.0/>.
 -}

module Text.Razom.Lexer
    ( tokenizeString
    , tokenizeText
    , tokenizeFile
    )
where

import Control.Monad (liftM)
import Data.Position
import qualified Data.Text.Lazy as T
import Text.Razom.Types
import Text.Regex.Applicative
import System.IO

tokenizeString :: Advance Char -> Regex t -> String -> LexResult t
tokenizeString adv re syms =
    let depos = map $ \ (Positioned x pos) -> x
        re'   = many $ bless re
        syms' = fst $ enrich adv syms
    in case findLongestPrefix re' syms' of
        Just (ts, [])                    ->
            Right $ depos ts
        Just (ts, Positioned c pos : ps) ->
            Left $ LexError pos (depos ts) (c : depos ps)
        Nothing                          ->
            Left $ LexError firstPosition [] syms

tokenizeText :: Advance Char -> Regex t -> T.Text -> LexResult t
tokenizeText adv re = tokenizeString adv re . T.unpack

tokenizeFile :: Advance Char -> Regex t -> FilePath -> IO (LexResult t)
tokenizeFile adv re fp = liftM (tokenizeString adv re) $ readFile fp