module Text.ARFF where
import Prelude hiding (takeWhile)
import Control.Monad
import Control.Applicative
import Data.Maybe
import Data.List hiding(takeWhile)
import qualified Data.ByteString as BS
import Data.Char as Char
import Data.Attoparsec.Combinator
import Data.Attoparsec.ByteString.Char8
import qualified Data.Attoparsec.Text as Text
data AttributeType = Numeric | Nominal [BS.ByteString] | String deriving(Show)
data Attribute = Attribute
{ name :: BS.ByteString
, dataType :: AttributeType
} deriving (Show)
data Header = Header
{ title :: BS.ByteString
, attributes :: [Attribute]
} deriving (Show)
data AttributeValue = NumericValue Double |
NominalValue BS.ByteString |
StringValue BS.ByteString
showAttributeValue :: AttributeValue -> String
showAttributeValue (NumericValue x) = show x
showAttributeValue (NominalValue x) = show x
showAttributeValue (StringValue x) = show x
instance Show AttributeValue where
show = showAttributeValue
before :: Parser p1 -> Parser p2 -> Parser p1
before p1 p2 = do
p <- p1
p2 >> return p
isInlineSpace :: Char -> Bool
isInlineSpace c = Char.isSpace c && c /= '\n' && c /= '\r'
lineSpace :: Parser ()
lineSpace = skipWhile isInlineSpace
comment :: Parser ()
comment = char '%' >> skipWhile (not . Text.isEndOfLine)
lineEnd :: Parser()
lineEnd = lineSpace >> option () comment >> (endOfInput <|> endOfLine <?> "lineEnd")
identifier :: Parser BS.ByteString
identifier = takeWhile (\x -> Char.isAlphaNum x || x == '-')
relation :: Parser BS.ByteString
relation = char '@' >> stringCI "relation" >> lineSpace >> identifier
attributeType :: Parser AttributeType
attributeType = (stringCI "numeric" >> return Numeric)
<|> (stringCI "real" >> return Numeric)
<|> (nominal >>= return . Nominal)
<?> "Attribute Type"
where nominal = do
char '{' >> lineSpace
xs <- identifier `sepBy` (lineSpace >> char ',' >> lineSpace)
lineSpace >> char '}'
return xs
attribute :: Parser Attribute
attribute = do char '@' >> stringCI "attribute" >> lineSpace
i <- identifier `before` lineSpace
t <- attributeType
return $ Attribute i t
<?> "Attribute"
line :: Parser p -> Parser p
line p = skipMany lineEnd >> lineSpace >> p `before` lineEnd
header :: Parser Header
header = do
t <- line relation
as <- manyTill (line attribute) (line $ stringCI "@data")
return $ Header t as
value' :: AttributeType -> Parser AttributeValue
value' (Nominal xs) = ((choice $ map string xs) >>= return . NominalValue)
<?> "Expected one of " ++ tail (xs >>= (',':) . show)
value' Numeric = liftM NumericValue double
value' String = error "Not implemented yet... sorry!"
value :: AttributeType -> Parser (Maybe AttributeValue)
value a = (char '?' >> return Nothing) <|> liftM Just (value' a)
row :: [AttributeType] -> Parser [Maybe AttributeValue]
row [] = error "Can't parse empty list"
row (a:as) = sequence . (value a:) . map (sep >>) $ (map value as)
where sep = lineSpace >> char ',' >> lineSpace
rows :: Header -> Parser [[Maybe AttributeValue]]
rows header = do
let as = map dataType $ attributes header
let errMsg = "expected row of types: " ++ (intercalate ", " $ map show as)
xs <- manyTill (line (row as) <?> errMsg) (manyTill lineEnd endOfInput)
return xs
arff :: Parser (Header, [[Maybe AttributeValue]])
arff = do
h <- header
rs <- rows h
return (h, rs)