{-# LANGUAGE BangPatterns #-} {-| Module : Data.OrgMode Description : Parser and Serializer for Emacs OrgMode Copyright : (c) Lally Singh, 2015 License : BSD3 Maintainer : yell@lal.ly Stability : experimental Portability : BangPatterns, GADTs, DeriveDataTypeable, StandaloneDeriving A package to parse and interpret Emacs Org-Mode documents. It also supports arbitrary types that map back and forth with 'Node'. Create an instance of @NodeUpdate a@ and use @OrgDocView a@ to read/write values of @a@ in an org-mode file. Property drawers are great for this mapping. -} module Data.OrgMode ( Prefix(..), Drawer(..), Babel(..), Table(..), NodeChild(..), Node(..), OrgFileProperty(..), OrgFileElement(..), OrgDocView(..), NodeUpdate(..), OrgDoc(..), TextLine(..), LineNumber(..), TextLineSource(..), -- toNumber, isNumber, linesStartingFrom, hasNumber, makeDrawerLines, -- makeNodeLine, parseLine, orgFile, generateDocView, getRawElements, updateNode, addOrgLine, emptyZip, categorizeLines ) where -- TODO(lally): only export the interesting things! import Data.OrgMode.Text import Data.OrgMode.Doc import Data.OrgMode.OrgDocView -- import Sync.Issue import Control.Monad import Data.Char (toUpper, isAlphaNum) import Data.List import Data.Maybe (mapMaybe, fromJust, catMaybes, isJust, isNothing) import Data.Monoid import Debug.Trace (trace) import Text.Parsec import Text.Regex.Posix import Text.StringTemplate -- ** Zipper Facilities -- | The document is a forest of Nodes, with properties. The Node -- Path is the currently-constructing tree of nodes. The path is -- sorted by 'nDepth', in descending order, with each element's parent -- after it in the list. data OrgDocZipper = OrgDocZipper { ozNodePath :: [Node] , ozNodes :: [Node] , ozProperties :: [OrgFileProperty] } deriving (Eq, Show) printChild (ChildNode n) = Just $ nTopic n printChild _ = Nothing printChildren = (intercalate ",") . catMaybes . (map printChild) . nChildren -- | Closes up the path for the zipper, up through to (e.g., >=) the -- specified depth. Returns the new path, and roots that have been -- fully closed in the process. appendChildrenUpPathThroughDepth :: Int -> [Node] -> ([Node], [Node]) appendChildrenUpPathThroughDepth _ [] = ([], []) appendChildrenUpPathThroughDepth depth [n] | nDepth n >= depth = let res = [n { nChildren = reverse $ nChildren n }] in ([], res) | otherwise = ([n], []) appendChildrenUpPathThroughDepth depth (n:ns) | nDepth n >= depth = let parent = head ns parentChildren = nChildren parent fixedUpChild = n { nChildren = reverse $ nChildren n } updatedParent = parent { nChildren = (ChildNode fixedUpChild):parentChildren } updatedPath = updatedParent : (tail ns) in appendChildrenUpPathThroughDepth depth updatedPath | otherwise = (n:ns, []) addNode node doczip path@(pn:pns) | nDepth node > nDepth pn = doczip { ozNodePath = node:path } | nDepth node <= nDepth (last path) = let closed = closeZipPath doczip in closed { ozNodePath = [node] } | otherwise = let (parentPath, newnodes) = appendChildrenUpPathThroughDepth (nDepth node) path newpath = node:parentPath in doczip { ozNodePath = newpath, ozNodes = newnodes ++ (ozNodes doczip) } -- | Closes up the path for the zipper, reversing the child-lists as we -- go (to get them into first->last order). closeZipPath doczip@(OrgDocZipper path nodes properties) = doczip { ozNodePath = [], ozNodes = nodes ++ (snd (appendChildrenUpPathThroughDepth (-1) path)) } isEndLine line = ":END:" == (trim $ tlText line) openDrawer (Just (ChildDrawer (Drawer _ _ []))) = True openDrawer (Just (ChildDrawer (Drawer _ _ lines))) = not $ isEndLine $ last lines openDrawer _ = False parseDrawerName tline = let matches = (tlText tline) =~ " *:([A-Za-z]*): *" :: [[String]] in if length matches > 0 then matches !! 0 !! 1 else "" parseDrawerProperty tline = let matches = (tlText tline) =~ " *:([-_A-Za-z]*):(.*)" :: [[String]] in if length matches > 0 then [(matches !! 0 !! 1, trim $ matches !! 0 !! 2)] else [] addChildToNode n c = n { nChildren = c:(nChildren n) } addChildToLastNode c doczip path@(pn:pns) = doczip { ozNodePath = (addChildToNode pn c):pns } updateLastChildOfLastNode c doczip path@(pn:pns) = let updatedPn = pn { nChildren = c:(tail $ nChildren pn) } in doczip { ozNodePath = updatedPn:pns } addBabel :: TextLine -> Maybe NodeChild -> (NodeChild -> OrgDocZipper) -> OrgDocZipper addBabel line lastChild adder = case lastChild of Just (ChildBabel (Babel lines)) -> adder $ ChildBabel $ Babel $ lines ++ [line] Nothing -> adder (ChildBabel $ Babel [line]) addTable :: TextLine -> Maybe NodeChild -> (NodeChild -> OrgDocZipper) -> OrgDocZipper addTable line lastChild adder = case lastChild of Just (ChildTable (Table lines)) -> adder $ ChildTable $ Table $ lines ++ [line] Nothing -> adder (ChildTable $ Table [line]) addDrawer :: TextLine -> Maybe NodeChild -> OrgDocZipper -> OrgDocZipper addDrawer line lastChild doczip@(OrgDocZipper path _ _) | not (openDrawer lastChild) = let drawer = Drawer (parseDrawerName line) [] [line] in addChildToLastNode (ChildDrawer drawer) doczip path -- Parse to get drProperties, and ignore :END:. | otherwise = let Just (ChildDrawer (Drawer n p lines)) = lastChild props = parseDrawerProperty line dlines = lines ++ [line] update n p = let drawer = ChildDrawer $ Drawer n p dlines in updateLastChildOfLastNode drawer doczip path in if isEndLine line then update n p else update n (p ++ props) -- | Adds a pre-classified OrgLine to an OrgDocZipper, possibly adding -- it to some existing part of the OrgDocZipper. addOrgLine :: OrgDocZipper -> OrgLine -> OrgDocZipper -- First, the simple base case. Creates a pseudo-root for unattached -- lines, and inserts the first node into the path. There can only be -- one pseudo-root, for lines before the first Node. addOrgLine doczip@(OrgDocZipper [] nodes properties) orgline = let pseudo_root = Node (-1) Nothing [] [] "" emptyTextLine pseudRootWithChild c = doczip { ozNodePath = [pseudo_root { nChildren = [c] }] } in case orgline of (OrgText line) -> pseudRootWithChild $ ChildText line (OrgHeader line node) -> doczip { ozNodePath = [node] } (OrgDrawer line) -> pseudRootWithChild $ ChildDrawer $ Drawer "" [] [line] (OrgPragma line prop) -> doczip { ozProperties = (ozProperties doczip) ++ [prop] } (OrgBabel line) -> pseudRootWithChild $ ChildBabel $ Babel [line] (OrgTable line) -> pseudRootWithChild $ ChildTable $ Table [line] -- TODO(lally): Add a few args and split out all these inner functions! addOrgLine doczip@(OrgDocZipper path@(pn:pns) nodes props) orgline = let -- addDrawer should parse as it goes. But, we have the problem of :END: -- followed with more properties. We can detect this, expensively, by -- scanning for :END: in the last line of the existing drawer. -- Correctness over speed! lastChild = let children = nChildren pn in if null children then Nothing else Just $ last children adder cld = addChildToLastNode cld doczip path in case orgline of (OrgText line) -> adder $ ChildText line (OrgHeader line node) -> addNode node doczip path (OrgDrawer line) -> addDrawer line lastChild doczip (OrgPragma line prop) -> doczip { ozProperties = (ozProperties doczip) ++ [prop] } (OrgBabel line) -> addBabel line lastChild adder (OrgTable line) -> addTable line lastChild adder -- | Intentionally fail when we don't have a parse success, which -- shouldn't happen... allRight :: Either a b -> b allRight (Right b) = b -- * Primary File Parser categorizeLines :: String -> [OrgLine] categorizeLines text = let fileLines = lines text in map (\(nr, line) -> allRight $ parseLine nr line) $ zip [1..] fileLines emptyZip = OrgDocZipper [] [] [] -- | We have one of these per input line of the file. Some of these -- we just keep as the input text, in the TextLine (as they need -- multi-line parsing to understand). data OrgLine = OrgText TextLine | OrgHeader TextLine Node | OrgDrawer TextLine | OrgPragma TextLine OrgFileProperty | OrgBabel TextLine | OrgTable TextLine deriving (Eq, Show) instance TextLineSource OrgLine where getTextLines (OrgText t) = [t] getTextLines (OrgHeader t _) = [t] getTextLines (OrgDrawer t) = [t] getTextLines (OrgPragma t _) = [t] getTextLines (OrgBabel t) = [t] getTextLines (OrgTable t) = [t] data OrgFileElement = OrgTopProperty OrgFileProperty | OrgTopLevel { tlNode :: Node } deriving (Eq, Show) -- | Parsing the file efficiently. Let's keep it non-quadratic. -- -- * Split it up into lines -- * Identify each line, as part of one of the big structure types: -- -- * Node headers -- * Drawers -- * File-level properties -- * Babel headers -- * Lines who's type depend on context (e.g., babel entries or node -- text) -- -- * Then fold the lines over a builder function and a zipper of the -- tree. orgFile :: String -> OrgDoc orgFile fileContents = let lines = categorizeLines fileContents (OrgDocZipper path nodes props) = foldl addOrgLine emptyZip lines allNodes = nodes ++ (snd $ appendChildrenUpPathThroughDepth (-1) path) in OrgDoc allNodes props (concatMap getTextLines lines) rstrip xs = reverse $ lstrip $ reverse xs lstrip = dropWhile (== ' ') strip xs = lstrip $ rstrip xs orgPropDrawer :: Parsec String st NodeChild orgPropDrawer = do manyTill space (char ':') "Property Drawer" drawerName <- many1 letter char ':' manyTill space newline let orgProperty = do manyTill space (char ':') propName <- many1 letter char ':' value <- manyTill (satisfy (/= '\n')) (try newline) return (propName, rstrip $ lstrip value) props <- manyTill orgProperty ( try $ manyTill space (string ":END:")) manyTill space newline return $ ChildDrawer $ Drawer drawerName props [] emptyTextLine = TextLine 0 "" Nothing -- Any line that isn't a node. orgBodyLine :: Parsec String st NodeChild orgBodyLine = do firstChar <- satisfy (\a -> (a /= '*') && (a /= '#')) if firstChar /= '\n' then do rest <- manyTill anyChar newline let allText = (firstChar : rest) indent = length $ takeWhile (== ' ') allText return $ ChildText $ TextLine indent allText Nothing else return $ ChildText emptyTextLine orgProperty :: Parsec String st OrgFileElement orgProperty = do string "#+" name <- many1 (letter <|> char '_') char ':' many space value <- manyTill anyChar (try newline) return $ OrgTopProperty $ OrgFileProperty name value babelLine :: Parsec String TextLine OrgLine babelLine = do (string "#+begin_src:") <|> (string "#+end_src") <|> (string "#+BEGIN_SRC:") <|> (string "#+END_SRC") textLine <- getState return $ OrgBabel textLine fileProperty :: Parsec String TextLine OrgLine fileProperty = do string "#+" name <- many1 letter char ':' many space value <- manyTill anyChar (try newline) line <- getState return $ OrgPragma line $ OrgFileProperty name value nodeLine :: Parsec String TextLine OrgLine nodeLine = do let tagList = char ':' >> word `endBy1` char ':' where word = many1 (letter <|> char '-' <|> digit <|> char '_' <|> char '@') validPrefixes = ["TODO", "DONE", "OPEN", "CLOSED", "ACTIVE"] orgSuffix = (do tags <- tagList char '\n' return tags) <|> (char '\n' >> return []) stars <- many1 $ char '*' let depth = length stars many1 space -- stop this sillyness on the prefix. just pull the first word of the topic. -- TODO(lally): don't hard-code the list of prefixes. many space topic <- manyTill anyChar (try $ lookAhead orgSuffix) let topic_words = words topic first_word_is_prefix = length topic_words > 0 && (head topic_words `elem` validPrefixes) prefix = if first_word_is_prefix then Just $ Prefix $ head topic_words else Nothing topic_remain = if first_word_is_prefix then snd $ splitAt (length $ head topic_words) topic else topic tags <- orgSuffix loc <- getState let fxloc = loc { tlIndent = depth } line = OrgHeader loc $ Node depth prefix tags [] (strip topic_remain) fxloc return line propertyLine :: Parsec String TextLine OrgLine propertyLine = do manyTill space (char ':') propName <- many1 (letter <|> char '-' <|> digit <|> char '_' <|> char '@') char ':' remain <- manyTill (satisfy (/= '\n')) (try newline) line <- getState return $ OrgDrawer line bodyLine :: Parsec String TextLine OrgLine bodyLine = do text <- getState return $ OrgText text -- |The incoming state has TextLine within it. -- TODO(lally): update the state here to hold options we get in the -- ORG file, like TODO states. classifyOrgLine :: Parsec String TextLine OrgLine classifyOrgLine = do textLine <- getState -- Possibilities: -- - #+begin_src: -- - #+other: -- - ** blah -- - :PROPERTY: -- - anything else. res <- (try babelLine) <|> (try fileProperty) <|> (try nodeLine) <|> (try propertyLine) <|> bodyLine -- always matches. return res parseLine :: Int -> String -> Either ParseError OrgLine parseLine lineno s = do let indent = length $ takeWhile (== ' ') s line = TextLine indent s (Just lineno) in runParser classifyOrgLine line "input" (s ++ "\n")