-- boilerplate {{{ {-# LANGUAGE FlexibleContexts, NoMonomorphismRestriction #-} module Data.SGF.Parse.Raw ( collection, Property(..), enum ) where import Control.Applicative hiding (many, (<|>)) import Control.Monad import Data.Char import Data.Tree import Data.Word import Prelude hiding (lex) import Text.Parsec (SourcePos(..), incSourceColumn) import Text.Parsec.Prim import Text.Parsec.Combinator -- }}} data Property = Property { position :: SourcePos, -- ^ -- Currently, this is pretty lame: it doesn't track -- line number and character number, only byte -- offset from the beginning of the file. This is -- because I don't really understand how to -- correctly track line number and character number -- properly in the face of dynamically changing -- encodings, whereas byte number is a totally -- braindead statistic to track. name :: String, -- ^ -- The literal name of the property. This is -- guaranteed to be a non-empty string of -- upper-case ASCII characters. values :: [[Word8]] -- ^ The arguments to the property. } deriving (Eq, Ord, Show) -- | -- Handy way to convert known-ASCII characters from 'Word8' to 'Char', among other -- things. enum :: (Enum a, Enum b) => a -> b enum = toEnum . fromEnum ensure p x = guard (p x) >> return x satisfy p = tokenPrim ((\x -> ['\'', x, '\'']) . enum) (\pos _ _ -> incSourceColumn pos 1) (ensure p) satisfyChar = satisfy . (. enum) anyWord = satisfy (const True) exactWord = satisfy . (==) . enum someWord = satisfy . flip elem . map enum noWord = satisfy . flip notElem . map enum whitespace = many (satisfyChar isSpace) -- assumed: the current byte is literally ASCII '\\' iff the current byte is -- the last byte of the encoding of the actual character '\\' and neither of -- the bytes that are literally ASCII ']' and ASCII ':' occur after the first -- byte of any multi-byte encoded character -- (in particular, UTF-8, ASCII, and ISO 8859-1 satisfy this property) escapedChar = liftM2 (\x y -> [x, y]) (exactWord '\\') anyWord unescapedExcept ws = fmap return (noWord ws) literalTextExcept ws = fmap concat $ many (escapedChar <|> unescapedExcept ws) property = liftM3 ((. map enum) . Property) (getPosition) (many1 (satisfyChar (liftM2 (&&) isUpper (< '\128')))) (sepEndBy1 (exactWord '[' >> literalTextExcept "]" <* exactWord ']') whitespace) node = do exactWord ';' whitespace sepEndBy property whitespace gameTree = do exactWord '(' whitespace (node:nodes) <- sepEndBy1 node whitespace trees <- sepEndBy gameTree whitespace exactWord ')' return (Node node (foldr ((return .) . Node) trees nodes)) -- | -- Parse the tree-structure of an SGF file, but without any knowledge of the -- semantics of the properties, etc. collection :: Stream s m Word8 => ParsecT s u m [Tree [Property]] collection = whitespace >> sepEndBy1 gameTree whitespace <* whitespace <* eof