module NLP.Types.Tags
where
import Data.Serialize (Serialize, get, put)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
import GHC.Generics
import Text.Read (readEither)
import Test.QuickCheck (Arbitrary(..), NonEmptyList(..))
import Test.QuickCheck.Instances ()
import NLP.Types.General (Error, toEitherErr)
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => NERTag a where
fromNERTag :: a -> Text
fromNERTag = T.pack . show
parseNERTag :: Text -> Either Error a
parseNERTag txt = toEitherErr $ readEither $ T.unpack txt
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => ChunkTag a where
fromChunk :: a -> Text
parseChunk :: Text -> Either Error a
notChunk :: a
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => Tag a where
fromTag :: a -> Text
parseTag :: Text -> a
tagUNK :: a
tagTerm :: a -> Text
startTag :: a
endTag :: a
isDt :: a -> Bool
newtype RawChunk = RawChunk Text
deriving (Ord, Eq, Read, Show, Generic)
instance Serialize RawChunk
instance ChunkTag RawChunk where
fromChunk (RawChunk ch) = ch
parseChunk txt = Right (RawChunk txt)
notChunk = RawChunk "O"
newtype RawTag = RawTag Text
deriving (Ord, Eq, Read, Show, Generic)
instance Serialize RawTag
instance Tag RawTag where
fromTag (RawTag t) = t
parseTag t = RawTag t
tagUNK = RawTag "Unk"
tagTerm (RawTag t) = t
startTag = RawTag "-START-"
endTag = RawTag "-END-"
isDt (RawTag tg) = tg == "DT"
instance Arbitrary RawTag where
arbitrary = do
NonEmpty str <- arbitrary
return $ RawTag $ T.pack str
instance Serialize Text where
put txt = put $ encodeUtf8 txt
get = fmap decodeUtf8 get