{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StrictData #-} {-| Module : Headroom.Ext.Haskell.Haddock Description : Extraction of /Haddock module header/ fields Copyright : (c) 2019-2020 Vaclav Svejcar License : BSD-3-Clause Maintainer : vaclav.svejcar@gmail.com Stability : experimental Portability : POSIX Support for extracting data from /Haddock module headers/ present in /Haskell source code files/ or /templates/. -} module Headroom.Ext.Haskell.Haddock ( HaddockModuleHeader(..) , extractFieldOffsets , extractModuleHeader , indentField , stripCommentSyntax ) where import Control.Applicative ( Alternative(..) ) import Control.Monad ( ap ) import Data.Default.Class ( Default(..) ) import Headroom.Data.Regex ( re , replace , scan ) import Headroom.Data.TextExtra ( fromLines , toLines ) import Headroom.Template ( Template(..) ) import Headroom.Types ( HaddockFieldOffsets(..) , TemplateMeta(..) ) import RIO import qualified RIO.Char as C import qualified RIO.Text as T -- | Extracted fields from the /Haddock module header/. data HaddockModuleHeader = HaddockModuleHeader { hmhCopyright :: Maybe Text -- ^ module copyright (content of the @Copyright@ field) , hmhLicense :: Maybe Text -- ^ module license (content of the @License@ field) , hmhMaintainer :: Maybe Text -- ^ module license (content of the @Maintainer@ field) , hmhPortability :: Maybe Text -- ^ module license (content of the @Portability@ field) , hmhStability :: Maybe Text -- ^ module license (content of the @Stability@ field) , hmhShortDesc :: Maybe Text -- ^ module short description (content of the @Description@ field) , hmhLongDesc :: Maybe Text -- ^ module long description (the text after module header fields) } deriving (Eq, Show) -- | Extracts /offsets/ for selected haddock fields (i.e. number of chars -- between start of line and field value). This is needed to properly format -- multi-line field values rendered in new /license headers/. extractFieldOffsets :: (Template t) => t -- ^ parsed /template/ -> HaddockFieldOffsets -- ^ extracted field offsets extractFieldOffsets template = HaddockFieldOffsets { .. } where hfoCopyright = extractCopyrightOffset text text = stripCommentSyntax . rawTemplate $ template extractCopyrightOffset :: Text -> Maybe Int extractCopyrightOffset text = case scan [re|\h*Copyright\h*:\h*|] text of [(full, _)] -> Just . T.length $ full _ -> Nothing -- | Extracts metadata from given /Haddock/ module header. extractModuleHeader :: Text -- ^ text containing /Haddock/ module header -> Maybe TemplateMeta -- ^ extracted metadata from corresponding /template/ -> HaddockModuleHeader -- ^ extracted metadata extractModuleHeader text meta = let hmhCopyright = indent hfoCopyright <$> extractField "Copyright" hmhLicense = extractField "License" hmhMaintainer = extractField "Maintainer" hmhPortability = extractField "Portability" hmhStability = extractField "Stability" hmhShortDesc = extractField "Description" hmhLongDesc = if null rest' then Nothing else process rest' in HaddockModuleHeader { .. } where (fields', rest') = fromMaybe ([], input) $ runP fields input input = T.unpack . stripCommentSyntax $ text extractField name = fmap (T.strip . T.pack) (lookup name fields') process = Just . T.strip . T.pack indent c t = T.strip $ indentField c t HaddockFieldOffsets {..} = case meta of Just (HaskellTemplateMeta offsets') -> offsets' _ -> def -- | Adds correct indentation to multi-line /Haddock/ field values. It's usually -- desired to have such values indented like this: -- -- @ -- Copyright : (c) 2020, 1st Author -- (c) 2020, 2nd Author -- @ -- -- This functions achieves that using the /offset/ value, which specifies number -- of empty characters that should be placed before second (and any subsequent) -- line. -- -- >>> indentField (Just 2) "foo\nbar\nbaz" -- "foo\n bar\n baz" indentField :: Maybe Int -- ^ offset (in number of black chars) for 2nd and subsequent lines -> Text -- ^ input text to indent -> Text -- ^ processed text indentField Nothing text = text indentField (Just offset) text = fromLines . go . toLines $ text where go [] = [] go [x ] = [x] go (x : xs) = x : fmap ((prefix <>) . T.stripStart) xs prefix = T.replicate offset " " -- | Strips /Haskell/ comment syntax tokens (e.g. @{-@, @-}@) from input text. -- -- >>> stripCommentSyntax "{- foo -}\nbar\n" -- "foo \nbar\n" stripCommentSyntax :: Text -- ^ input text to strip -> Text -- ^ resulting text without comment syntax tokens stripCommentSyntax text = fromLines $ go (toLines text) [] where regex = [re|^(-- \||-{2,})|^\h*({-\h?\|?)|(-})\h*$|] strip = replace regex (const . const $ "") go [] acc = reverse acc go (x : xs) acc = go xs (strip x : acc) -------------------------------------------------------------------------------- -- Below code is slightly modified version of code copied from: -- https://github.com/haskell/haddock/blob/ghc-8.10/haddock-api/src/Haddock/Interface/ParseModuleHeader.hs ------------------------------------------------------------------------------- -- Small parser to parse module header. ------------------------------------------------------------------------------- -- The below is a small parser framework how we read keys. -- -- all fields in the header are optional and have the form -- -- [spaces1][field name][spaces] ":" -- [text]"\n" ([spaces2][space][text]"\n" | [spaces]"\n")* -- where each [spaces2] should have [spaces1] as a prefix. -- -- Thus for the key "Description", -- -- > Description : this is a -- > rather long -- > -- > description -- > -- > The module comment starts here -- -- the value will be "this is a .. description" and the rest will begin -- at "The module comment". -- 'C' is a 'Char' carrying its column. -- -- This let us make an indentation-aware parser, as we know current indentation. -- by looking at the next character in the stream ('curInd'). -- -- Thus we can munch all spaces but only not-spaces which are indented. -- data C = C {-# UNPACK #-} !Int Char newtype P a = P { unP :: [C] -> Maybe ([C], a) } deriving Functor instance Applicative P where pure x = P $ \s -> Just (s, x) (<*>) = ap instance Monad P where return = pure m >>= k = P $ \s0 -> do (s1, x) <- unP m s0 unP (k x) s1 instance Alternative P where empty = P $ const Nothing a <|> b = P $ \s -> unP a s <|> unP b s runP :: P a -> String -> Maybe a runP p input = fmap snd (unP p input') where input' = concat [ zipWith C [0 ..] l <> [C (length l) '\n'] | l <- lines input ] ------------------------------------------------------------------------------- -- ------------------------------------------------------------------------------- curInd :: P Int curInd = P $ \s -> Just . (,) s $ case s of [] -> 0 C i _ : _ -> i rest :: P String rest = P $ \cs -> Just ([], [ c | C _ c <- cs ]) munch :: (Int -> Char -> Bool) -> P String munch p = P $ \cs -> let (xs, ys) = takeWhileMaybe p' cs in Just (ys, xs) where p' (C i c) | p i c = Just c | otherwise = Nothing munch1 :: (Int -> Char -> Bool) -> P String munch1 p = P $ \case [] -> Nothing (c : cs) | Just c' <- p' c -> let (xs, ys) = takeWhileMaybe p' cs in Just (ys, c' : xs) | otherwise -> Nothing where p' (C i c) | p i c = Just c | otherwise = Nothing char :: Char -> P Char char c = P $ \case [] -> Nothing (C _ c' : cs) | c == c' -> Just (cs, c) | otherwise -> Nothing skipSpaces :: P () skipSpaces = P $ \cs -> Just (dropWhile (\(C _ c) -> C.isSpace c) cs, ()) takeWhileMaybe :: (a -> Maybe b) -> [a] -> ([b], [a]) takeWhileMaybe f = go where go xs0@[] = ([], xs0) go xs0@(x : xs) = case f x of Just y -> let (ys, zs) = go xs in (y : ys, zs) Nothing -> ([], xs0) ------------------------------------------------------------------------------- -- Fields ------------------------------------------------------------------------------- field :: Int -> P (String, String) field i = do fn <- munch1 $ \_ c -> C.isAlpha c || c == '-' skipSpaces _ <- char ':' skipSpaces val <- munch $ \j c -> C.isSpace c || j > i return (fn, val) fields :: P ([(String, String)], String) fields = do skipSpaces i <- curInd fs <- many (field i) r <- rest return (fs, r)