module Documentation.Haddock.Parser ( parseString, parseParas
, overIdentifier, toRegular
, takeUntil, IString
) where
import Prelude hiding (takeWhile)
import Control.Arrow (first)
import Control.Monad (void, mfilter)
import Control.Applicative
import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
import qualified Data.ByteString.Char8 as BS
import Data.Char (chr, isAsciiUpper)
import Data.List (stripPrefix, intercalate, unfoldr)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Documentation.Haddock.Doc
import Documentation.Haddock.Types
import Documentation.Haddock.Utf8
import Data.ByteString.Char8 (ByteString)
type IString = (Char, String, Char)
toRegular :: DocH mod IString -> DocH mod String
toRegular = fmap (\(_, x, _) -> x)
overIdentifier :: (String -> Maybe a)
-> DocH mod IString
-> DocH mod a
overIdentifier f d = g d
where
g (DocIdentifier (o, x, e)) = case f x of
Nothing -> DocString $ o : x ++ [e]
Just x' -> DocIdentifier x'
g DocEmpty = DocEmpty
g (DocAppend x x') = DocAppend (g x) (g x')
g (DocString x) = DocString x
g (DocParagraph x) = DocParagraph $ g x
g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
g (DocModule x) = DocModule x
g (DocWarning x) = DocWarning $ g x
g (DocEmphasis x) = DocEmphasis $ g x
g (DocMonospaced x) = DocMonospaced $ g x
g (DocBold x) = DocBold $ g x
g (DocUnorderedList x) = DocUnorderedList $ fmap g x
g (DocOrderedList x) = DocOrderedList $ fmap g x
g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
g (DocCodeBlock x) = DocCodeBlock $ g x
g (DocHyperlink x) = DocHyperlink x
g (DocPic x) = DocPic x
g (DocAName x) = DocAName x
g (DocProperty x) = DocProperty x
g (DocExamples x) = DocExamples x
g (DocHeader (Header l x)) = DocHeader . Header l $ g x
parse :: Parser a -> BS.ByteString -> a
parse p = either err id . parseOnly (p <* endOfInput)
where
err = error . ("Haddock.Parser.parse: " ++)
parseParas :: String
-> DocH mod IString
parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
where
p :: Parser (DocH mod IString)
p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
parseString :: String -> DocH mod IString
parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
parseStringBS :: BS.ByteString -> DocH mod IString
parseStringBS = parse p
where
p :: Parser (DocH mod IString)
p = mconcat <$> many (monospace <|> anchor <|> identifier
<|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
<|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
encodedChar :: Parser (DocH mod a)
encodedChar = "&#" *> c <* ";"
where
c = DocString . return . chr <$> num
num = hex <|> decimal
hex = ("x" <|> "X") *> hexadecimal
specialChar :: [Char]
specialChar = "_/<@\"&'`"
string' :: Parser (DocH mod a)
string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
where
unescape "" = ""
unescape ('\\':x:xs) = x : unescape xs
unescape (x:xs) = x : unescape xs
skipSpecialChar :: Parser (DocH mod a)
skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
emphasis :: Parser (DocH mod IString)
emphasis = DocEmphasis . parseStringBS <$>
mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
bold :: Parser (DocH mod IString)
bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
disallowNewline = mfilter ('\n' `BS.notElem`)
takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
takeWhile_ p = scan False p_
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
takeWhile1_ = mfilter (not . BS.null) . takeWhile_
anchor :: Parser (DocH mod a)
anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
monospace :: Parser (DocH mod IString)
monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
moduleName :: Parser (DocH mod a)
moduleName = DocModule <$> (char '"' *> modid <* char '"')
where
modid = intercalate "." <$> conid `sepBy1` "."
conid = (:)
<$> satisfy isAsciiUpper
<*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
picture :: Parser (DocH mod a)
picture = DocPic . makeLabeled Picture . decodeUtf8
<$> disallowNewline ("<<" *> takeUntil ">>")
paragraph :: Parser (DocH mod IString)
paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
<|> property <|> header
<|> textParagraph)
header :: Parser (DocH mod IString)
header = do
let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
pser = foldl1 (<|>) psers
delim <- decodeUtf8 <$> pser
line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
rest <- paragraph <|> return mempty
return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
textParagraph :: Parser (DocH mod IString)
textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
list :: Parser (DocH mod IString)
list = DocUnorderedList <$> unorderedList
<|> DocOrderedList <$> orderedList
<|> DocDefList <$> definitionList
unorderedList :: Parser [DocH mod IString]
unorderedList = ("*" <|> "-") *> innerList unorderedList
orderedList :: Parser [DocH mod IString]
orderedList = (paren <|> dot) *> innerList orderedList
where
dot = (decimal :: Parser Int) <* "."
paren = "(" *> decimal <* ")"
innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString]
innerList item = do
c <- takeLine
(cs, items) <- more item
let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
return $ case items of
Left p -> [contents `joinPara` p]
Right i -> contents : i
definitionList :: Parser [(DocH mod IString, DocH mod IString)]
definitionList = do
label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
c <- takeLine
(cs, items) <- more definitionList
let contents = parseString . dropNLs . unlines $ c : cs
return $ case items of
Left p -> [(label, contents `joinPara` p)]
Right i -> (label, contents) : i
joinPara :: DocH mod id -> DocH mod id -> DocH mod id
joinPara (DocParagraph p) c = docParagraph $ docAppend p c
joinPara d p = docAppend d p
dropNLs :: String -> String
dropNLs = reverse . dropWhile (== '\n') . reverse
more :: Monoid a => Parser a
-> Parser ([String], Either (DocH mod IString) a)
more item = innerParagraphs <|> moreListItems item
<|> moreContent item <|> pure ([], Right mempty)
innerParagraphs :: Parser ([String], Either (DocH mod IString) a)
innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
moreListItems :: Parser a
-> Parser ([String], Either (DocH mod IString) a)
moreListItems item = (,) [] . Right <$> (skipSpace *> item)
moreContent :: Monoid a => Parser a
-> Parser ([String], Either (DocH mod IString) a)
moreContent item = first . (:) <$> nonEmptyLine <*> more item
indentedParagraphs :: Parser (DocH mod IString)
indentedParagraphs = parseParas . concat <$> dropFrontOfPara " "
dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
dropFrontOfPara sp = do
currentParagraph <- some (sp *> takeNonEmptyLine)
followingParagraphs <-
skipHorizontalSpace *> nextPar
<|> skipHorizontalSpace *> nlList
<|> endOfInput *> return []
return (currentParagraph ++ followingParagraphs)
where
nextPar = (++) <$> nlList <*> dropFrontOfPara sp
nlList = "\n" *> return ["\n"]
nonSpace :: BS.ByteString -> Parser BS.ByteString
nonSpace xs
| not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
| otherwise = return xs
takeNonEmptyLine :: Parser String
takeNonEmptyLine = do
(++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
birdtracks :: Parser (DocH mod a)
birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
where
line = skipHorizontalSpace *> ">" *> takeLine
stripSpace :: [String] -> [String]
stripSpace = fromMaybe <*> mapM strip'
where
strip' (' ':xs') = Just xs'
strip' "" = Just ""
strip' _ = Nothing
examples :: Parser (DocH mod a)
examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
where
go :: Parser [Example]
go = do
prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
expr <- takeLine
(rs, es) <- resultAndMoreExamples
return (makeExample prefix expr rs : es)
where
resultAndMoreExamples :: Parser ([String], [Example])
resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
where
moreExamples :: Parser ([String], [Example])
moreExamples = (,) [] <$> go
result :: Parser ([String], [Example])
result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
makeExample :: String -> String -> [String] -> Example
makeExample prefix expression res =
Example (strip expression) result
where
result = map (substituteBlankLine . tryStripPrefix) res
tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine xs = xs
nonEmptyLine :: Parser String
nonEmptyLine = mfilter (any (not . isSpace)) takeLine
takeLine :: Parser String
takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
endOfLine :: Parser ()
endOfLine = void "\n" <|> endOfInput
property :: Parser (DocH mod a)
property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
codeblock :: Parser (DocH mod IString)
codeblock =
DocCodeBlock . parseStringBS . dropSpaces
<$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
dropSpaces xs =
let rs = decodeUtf8 xs
in case splitByNl rs of
[] -> xs
ys -> case last ys of
' ':_ -> case mapM dropSpace ys of
Nothing -> xs
Just zs -> encodeUtf8 $ intercalate "\n" zs
_ -> xs
splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
_ -> Nothing)
. ('\n' :)
dropSpace "" = Just ""
dropSpace (' ':xs) = Just xs
dropSpace _ = Nothing
block' = scan False p
where
p isNewline c
| isNewline && c == '@' = Nothing
| isNewline && isSpace c = Just isNewline
| otherwise = Just $ c == '\n'
hyperlink :: Parser (DocH mod a)
hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
<$> disallowNewline ("<" *> takeUntil ">")
<|> autoUrl
autoUrl :: Parser (DocH mod a)
autoUrl = mkLink <$> url
where
url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
mkLink :: BS.ByteString -> DocH mod a
mkLink s = case BS.unsnoc s of
Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
_ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
parseValid :: Parser String
parseValid = do
vs' <- many' $ utf8String "⋆" <|> return <$> idChar
let vs = concat vs'
c <- peekChar
case c of
Just '`' -> return vs
Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
<|> return vs
_ -> fail "outofvalid"
where
idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
<|> digit <|> letter_ascii
utf8String :: String -> Parser String
utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
identifier :: Parser (DocH mod IString)
identifier = do
o <- idDelim
vid <- parseValid
e <- idDelim
return $ DocIdentifier (o, vid, e)
where
idDelim = char '\'' <|> char '`'
strip :: String -> String
strip = (\f -> f . f) $ dropWhile isSpace . reverse
skipHorizontalSpace :: Parser ()
skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
takeHorizontalSpace :: Parser BS.ByteString
takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
makeLabeled :: (String -> Maybe String -> a) -> String -> a
makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
(uri, "") -> f uri Nothing
(uri, label) -> f uri (Just $ dropWhile isSpace label)
where
removeEscapes "" = ""
removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
removeEscapes ('\\':xs) = removeEscapes xs
removeEscapes (x:xs) = x : removeEscapes xs
takeUntil :: ByteString -> Parser ByteString
takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
where
end = BS.unpack end_
p :: (Bool, String) -> Char -> Maybe (Bool, String)
p acc c = case acc of
(True, _) -> Just (False, end)
(_, []) -> Nothing
(_, x:xs) | x == c -> Just (False, xs)
_ -> Just (c == '\\', end)
dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
requireEnd = mfilter (BS.isSuffixOf end_)
gotSome xs
| BS.null xs = fail "didn't get any content"
| otherwise = return xs