module NLP.Types.Tags
where
import Data.Serialize (Serialize)
import Data.Serialize.Text ()
import Data.Text (Text)
import qualified Data.Text as T
import GHC.Generics
import Text.Read (readEither)
import Test.QuickCheck (Arbitrary(..), NonEmptyList(..))
import Test.QuickCheck.Instances ()
import NLP.Types.General (Error, toEitherErr)
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => NERTag a where
fromNERTag :: a -> Text
fromNERTag = T.pack . show
parseNERTag :: Text -> Either Error a
parseNERTag txt = toEitherErr $ readEither $ T.unpack txt
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => ChunkTag a where
fromChunk :: a -> Text
parseChunk :: Text -> Either Error a
notChunk :: a
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => Tag a where
fromTag :: a -> Text
parseTag :: Text -> a
tagUNK :: a
tagTerm :: a -> Text
startTag :: a
endTag :: a
isDt :: a -> Bool
newtype RawChunk = RawChunk Text
deriving (Ord, Eq, Read, Show, Generic)
instance Serialize RawChunk
instance ChunkTag RawChunk where
fromChunk (RawChunk ch) = ch
parseChunk txt = Right (RawChunk txt)
notChunk = RawChunk "O"
newtype RawTag = RawTag Text
deriving (Ord, Eq, Read, Show, Generic)
instance Serialize RawTag
instance Tag RawTag where
fromTag (RawTag t) = t
parseTag t = RawTag t
tagUNK = RawTag "Unk"
tagTerm (RawTag t) = t
startTag = RawTag "-START-"
endTag = RawTag "-END-"
isDt (RawTag tg) = tg == "DT"
instance Arbitrary RawTag where
arbitrary = do
NonEmpty str <- arbitrary
return $ RawTag $ T.pack str