module Text.MeCab(
MeCab,
MeCabError(..),
Node(..), Stat(..),
MeCabString(..),
Text.MeCab.new,
Text.MeCab.new2,
parse,
parseToNodes,
parseNBest,
parseNBestInit,
nBestNext,
nBestNextNodes,
getPartial,
setPartial,
getTheta,
setTheta,
getLatticeLevel,
setLatticeLevel,
getAllMorphs,
setAllMorphs,
version,
) where
import Control.Applicative
import Control.Exception
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Typeable
import Foreign
import Foreign.C
import Prelude
newtype MeCab =
MeCab { unMeCab :: ForeignPtr MeCab }
deriving (Eq, Ord)
mkMeCab :: Ptr MeCab -> IO MeCab
mkMeCab p =
MeCab <$> newForeignPtr p_mecab_destroy p
data Stat =
NOR
| UNK
| BOS
| EOS
| EON
deriving (Eq, Read, Show)
data Node s = Node
{
nodeSurface :: s
, nodeFeature :: s
, nodeRlength :: Int
, nodeId :: Int
, nodeRcAttr :: Int
, nodeLcAttr :: Int
, nodePosid :: Int
, nodeCharType :: Int
, nodeStat :: Stat
, nodeIsBest :: Bool
, nodeAlpha :: Double
, nodeBeta :: Double
, nodeProb :: Double
, nodeWcost :: Int
, nodeCost :: Int
} deriving (Eq, Read, Show)
peekNodes :: MeCabString s => Ptr (Node s) -> IO [Node s]
peekNodes ptr
| ptr == nullPtr =
return []
| otherwise =
(:) <$> peekNode ptr
<*> (peekNodes =<< ((\hsc_ptr -> peekByteOff hsc_ptr 4)) ptr)
peekNode :: MeCabString s => Ptr (Node s) -> IO (Node s)
peekNode ptr = do
sfc <- do
p <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) ptr
len <- ((\hsc_ptr -> peekByteOff hsc_ptr 36)) ptr
fromBS <$> B.packCStringLen (p, fromIntegral (len :: CUShort))
Node
<$> return sfc
<*> (packCString =<< ((\hsc_ptr -> peekByteOff hsc_ptr 28)) ptr)
<*> ((fromIntegral :: CUShort -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 38)) ptr)
<*> ((fromIntegral :: CUInt -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 32)) ptr)
<*> ((fromIntegral :: CUShort -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 40)) ptr)
<*> ((fromIntegral :: CUShort -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 42)) ptr)
<*> ((fromIntegral :: CUShort -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 44)) ptr)
<*> ((fromIntegral :: CUChar -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 46)) ptr)
<*> (toStat <$> ((\hsc_ptr -> peekByteOff hsc_ptr 47)) ptr)
<*> ((==(1::CUChar)) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 48)) ptr)
<*> ((realToFrac :: CFloat -> Double) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 52)) ptr)
<*> ((realToFrac :: CFloat -> Double) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 56)) ptr)
<*> ((realToFrac :: CFloat -> Double) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 60)) ptr)
<*> ((fromIntegral :: CShort -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 64)) ptr)
<*> ((fromIntegral :: CLong -> Int) <$> ((\hsc_ptr -> peekByteOff hsc_ptr 68)) ptr)
where
toStat :: CUChar -> Stat
toStat (0) = NOR
toStat (1) = UNK
toStat (2) = BOS
toStat (3) = EOS
toStat (4) = EON
toStat _ = UNK
data MeCabError =
MeCabError String
deriving (Eq, Ord, Show, Typeable)
instance Exception MeCabError
new :: [String] -> IO MeCab
new args =
withCStrings args $ \argc argv -> do
p <- mecab_new (fromIntegral argc) argv
when (p == nullPtr) $
throwIO =<< (MeCabError <$> strerror nullPtr)
mkMeCab p
new2 :: String -> IO MeCab
new2 arg =
withCString arg $ \pstr ->
mkMeCab =<< mecab_new2 pstr
withCStrings :: [String] -> (Int -> Ptr CString -> IO a) -> IO a
withCStrings ss f =
withCStrings' ss $ \ps ->
withArrayLen ps f
withCStrings' :: [String] -> ([CString] -> IO a) -> IO a
withCStrings' strs f = go [] strs where
go ps [] = f $ reverse ps
go ps (s:ss) =
withCString s $ \p -> go (p:ps) ss
version :: IO String
version =
peekCString =<< mecab_version
strerror :: Ptr MeCab -> IO String
strerror p =
peekCString =<< mecab_strerror p
class MeCabString s where
toBS :: s -> B.ByteString
fromBS :: B.ByteString -> s
instance MeCabString String where
toBS = toBS . T.pack
fromBS = T.unpack . fromBS
instance MeCabString B.ByteString where
toBS = id
fromBS = id
instance MeCabString T.Text where
toBS = T.encodeUtf8
fromBS = T.decodeUtf8
parse :: MeCabString s => MeCab -> s -> IO s
parse m txt = withForeignPtr (unMeCab m) $ \pm ->
B.useAsCStringLen (toBS txt) $ \(pstr, len) -> do
p <- mecab_sparse_tostr2 pm pstr (fromIntegral len)
when (p == nullPtr) $ throwIO =<< (MeCabError <$> strerror pm)
packCString p
parseToNodes :: MeCabString s => MeCab -> s -> IO [Node s]
parseToNodes m txt = withForeignPtr (unMeCab m) $ \pm ->
B.useAsCStringLen (toBS txt) $ \(pstr, len) -> do
p <- mecab_sparse_tonode2 pm pstr (fromIntegral len)
when (p == nullPtr) $ throwIO =<< (MeCabError <$> strerror pm)
peekNodes p
parseNBest :: MeCabString s => MeCab -> Int -> s -> IO s
parseNBest m n txt = withForeignPtr (unMeCab m) $ \pm ->
B.useAsCStringLen (toBS txt) $ \(pstr, len) -> do
p <- mecab_nbest_sparse_tostr2 pm (fromIntegral n) pstr (fromIntegral len)
when (p == nullPtr) $ throwIO =<< (MeCabError <$> strerror pm)
packCString p
parseNBestInit :: MeCabString s => MeCab -> s -> IO ()
parseNBestInit m txt = withForeignPtr (unMeCab m) $ \pm ->
B.useAsCStringLen (toBS txt) $ \(pstr, len) -> do
ret <- mecab_nbest_init2 pm pstr (fromIntegral len)
when (ret /= 1) $ throwIO =<< (MeCabError <$> strerror pm)
nBestNext :: MeCabString s => MeCab -> IO (Maybe s)
nBestNext m = withForeignPtr (unMeCab m) $ \pm -> do
r <- mecab_nbest_next_tostr pm
if r == nullPtr
then return Nothing
else Just <$> packCString r
nBestNextNodes :: MeCabString s => MeCab -> IO (Maybe [Node s])
nBestNextNodes m = withForeignPtr (unMeCab m) $ \pm -> do
p <- mecab_nbest_next_tonode pm
if p == nullPtr
then return Nothing
else Just <$> peekNodes p
packCString :: MeCabString s => CString -> IO s
packCString p = fromBS <$> B.packCString p
getPartial :: MeCab -> IO Bool
getPartial m = withForeignPtr (unMeCab m) $ \pm ->
(==1) <$> mecab_get_partial pm
setPartial :: MeCab -> Bool -> IO ()
setPartial m b = withForeignPtr (unMeCab m) $ \pm ->
mecab_set_partial pm (if b then 1 else 0)
getTheta :: MeCab -> IO Double
getTheta m = withForeignPtr (unMeCab m) $ \pm ->
liftM realToFrac $ mecab_get_theta pm
setTheta :: MeCab -> Double -> IO ()
setTheta m f = withForeignPtr (unMeCab m) $ \pm ->
mecab_set_theta pm (realToFrac f)
getLatticeLevel :: MeCab -> IO Int
getLatticeLevel m = withForeignPtr (unMeCab m) $ \pm ->
fromIntegral <$> mecab_get_lattice_level pm
setLatticeLevel :: MeCab -> Int -> IO ()
setLatticeLevel m ll = withForeignPtr (unMeCab m) $ \pm ->
mecab_set_lattice_level pm (fromIntegral ll)
getAllMorphs :: MeCab -> IO Int
getAllMorphs m = withForeignPtr (unMeCab m) $ \pm ->
fromIntegral <$> mecab_get_all_morphs pm
setAllMorphs :: MeCab -> Int -> IO ()
setAllMorphs m am = withForeignPtr (unMeCab m) $ \pm ->
mecab_set_all_morphs pm (fromIntegral am)
foreign import ccall mecab_new
:: CInt -> Ptr CString -> IO (Ptr MeCab)
foreign import ccall mecab_new2
:: CString -> IO (Ptr MeCab)
foreign import ccall "&mecab_destroy"
p_mecab_destroy :: FunPtr (Ptr MeCab -> IO ())
foreign import ccall mecab_version
:: IO CString
foreign import ccall mecab_strerror
:: Ptr MeCab -> IO CString
foreign import ccall mecab_sparse_tostr2
:: Ptr MeCab -> CString -> CSize -> IO CString
foreign import ccall mecab_nbest_sparse_tostr2
:: Ptr MeCab -> CSize -> CString -> CSize -> IO CString
foreign import ccall mecab_nbest_init2
:: Ptr MeCab -> CString -> CSize -> IO CInt
foreign import ccall mecab_nbest_next_tostr
:: Ptr MeCab -> IO CString
foreign import ccall mecab_sparse_tonode2
:: Ptr MeCab -> CString -> CSize -> IO (Ptr (Node a))
foreign import ccall mecab_nbest_next_tonode
:: Ptr MeCab -> IO (Ptr (Node a))
foreign import ccall mecab_get_partial
:: Ptr MeCab -> IO CInt
foreign import ccall mecab_set_partial
:: Ptr MeCab -> CInt -> IO ()
foreign import ccall mecab_get_theta
:: Ptr MeCab -> IO CFloat
foreign import ccall mecab_set_theta
:: Ptr MeCab -> CFloat -> IO ()
foreign import ccall mecab_get_lattice_level
:: Ptr MeCab -> IO CInt
foreign import ccall mecab_set_lattice_level
:: Ptr MeCab -> CInt -> IO ()
foreign import ccall mecab_get_all_morphs
:: Ptr MeCab -> IO CInt
foreign import ccall mecab_set_all_morphs
:: Ptr MeCab -> CInt -> IO ()