module Math.SVM.SVMLight.Utils
(
Qid(..)
, FeatureIdx(..)
, Point(..)
, point
, featureIdx
, renderPoints
, renderPoint
) where
import Data.Monoid
import Data.Foldable (foldMap)
import Data.List (intersperse)
import Data.Char (ord)
import Control.Applicative
import qualified Data.Map.Strict as M
import Data.Attoparsec.ByteString.Char8 as AP
import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Char8 as BS
newtype Qid = Qid Int
deriving (Show, Ord, Eq)
newtype FeatureIdx = FIdx Int
deriving (Show, Ord, Eq)
featureIdx :: Parser FeatureIdx
featureIdx = fmap FIdx decimal
qid :: Parser Qid
qid = Qid <$> ("qid:" *> decimal)
data Point = Point { pLabel :: !Int
, pQid :: !(Maybe Qid)
, pFeatures :: !(M.Map FeatureIdx Double)
, pComment :: !(Maybe BS.ByteString)
}
deriving (Show, Ord, Eq)
point :: Parser Point
point = do
label <- decimal
skipSpace
qid <- optional qid
skipSpace
features <- M.unions <$> feature `sepBy` char ' '
skipSpace
comment <- optional $ do
char '#'
skipSpace
takeTill (isEndOfLine . fromIntegral . ord)
skipSpace
return $ Point label qid features comment
where
feature = M.singleton <$> featureIdx <* char ':' <*> double
renderPoints :: [Point] -> BSB.Builder
renderPoints pts = mconcat $ intersperse "\n" $ map renderPoint pts
renderPoint :: Point -> BSB.Builder
renderPoint pt =
mconcat $ intersperse " " $ [BSB.intDec (pLabel pt)] ++ qid ++ vs ++ c
where
vs = map (\(FIdx i,v)->BSB.intDec i<>":"<>BSB.doubleDec v) $ M.assocs (pFeatures pt)
c = maybe [] (\c->[" #"<>BSB.byteString c]) (pComment pt)
qid = maybe [] (\(Qid q)->["qid:"<>BSB.intDec q]) (pQid pt)