{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Data.Ipynb Copyright : Copyright (C) 2019 John MacFarlane License : BSD3 Maintainer : John MacFarlane Stability : alpha Portability : portable Data structure and JSON serializers for ipynb (Jupyter notebook) format. Version 4 of the format is documented here: . The library supports both version 4 ('Notebook NbV4') and version 3 ('Notebook NbV3') of nbformat. Note that this is a phantom type: the `NbV3` or `NbV4` parameter only affects JSON serialization, not the data structure itself. So code that manipulates notebooks can be polymorphic, operating on `Notebook a`. -} module Data.Ipynb ( Notebook(..) , NbV3 , NbV4 , JSONMeta , Cell(..) , Source(..) , CellType(..) , Output(..) , MimeType , MimeData(..) , MimeBundle(..) , breakLines ) where import Control.Applicative ((<|>)) import Control.Monad (when) import Data.Aeson as Aeson import qualified Data.Aeson.Types as Aeson import Data.ByteString (ByteString) import qualified Data.ByteString.Base64 as Base64 import Data.Char (isSpace) import qualified Data.HashMap.Strict as HM import Data.List (partition) import qualified Data.Map as M import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as TE import GHC.Generics import Prelude #if MIN_VERSION_base(4,11,0) #else import Data.Semigroup #endif -- | Indexes 'Notebook' for serialization as nbformat version 3. data NbV3 -- | Indexes 'Notebook' for serialization as nbformat version 4. data NbV4 -- | A Jupyter notebook. data Notebook a = Notebook { notebookMetadata :: JSONMeta , notebookFormat :: (Int, Int) , notebookCells :: [Cell a] } deriving (Show, Eq, Generic) instance Semigroup (Notebook a) where Notebook m1 f1 c1 <> Notebook m2 f2 c2 = Notebook (m1 <> m2) (max f1 f2) (c1 <> c2) instance Monoid (Notebook a) where mempty = Notebook mempty (0, 0) mempty #if MIN_VERSION_base(4,11,0) #else mappend = (<>) #endif instance FromJSON (Notebook NbV4) where parseJSON = withObject "Notebook" $ \v -> do fmt <- v .:? "nbformat" .!= 0 when (fmt < 4 || fmt > 4) $ fail "expected nbformat == 4" fmtminor <- v .:? "nbformat_minor" .!= 0 metadata <- v .:? "metadata" .!= mempty cells <- v .: "cells" return Notebook{ notebookMetadata = metadata , notebookFormat = (fmt, fmtminor) , notebookCells = cells } instance FromJSON (Notebook NbV3) where parseJSON = withObject "Notebook" $ \v -> do fmt <- v .:? "nbformat" .!= 0 when (fmt > 3) $ fail "expected nbformat <= 3" fmtminor <- v .:? "nbformat_minor" .!= 0 metadata <- v .:? "metadata" .!= mempty worksheets <- v .: "worksheets" -- NOTE: we ignore metadata on worksheets: is this ever used? cells <- mconcat <$> mapM (.: "cells") worksheets return Notebook{ notebookMetadata = metadata , notebookFormat = (fmt, fmtminor) , notebookCells = cells } instance ToJSON (Notebook NbV4) where toJSON n = object [ "nbformat" .= fst (notebookFormat n) , "nbformat_minor" .= snd (notebookFormat n) , "metadata" .= notebookMetadata n , "cells" .= (if notebookFormat n >= (4,1) then id else map (\c -> c{ cellAttachments = Nothing })) (notebookCells n) ] instance ToJSON (Notebook NbV3) where toJSON n = object [ "nbformat" .= fst (notebookFormat n) , "nbformat_minor" .= snd (notebookFormat n) , "metadata" .= notebookMetadata n , "worksheets" .= [ object [ "cells" .= (if notebookFormat n >= (4,1) then id else map (\c -> c{ cellAttachments = Nothing })) (notebookCells n) , "metadata" .= (mempty :: JSONMeta) -- see above in FromJSON instance ] ] ] type JSONMeta = M.Map Text Value -- | A 'Source' is a textual content which may be -- represented in JSON either as a single string -- or as a list of strings (which are concatenated). newtype Source = Source{ unSource :: [Text] } deriving (Show, Eq, Generic, Semigroup, Monoid) instance FromJSON Source where parseJSON v = do ts <- parseJSON v <|> (:[]) <$> parseJSON v return $ Source ts instance ToJSON Source where toJSON (Source ts) = toJSON ts -- | A Jupyter notebook cell. data Cell a = Cell { cellType :: CellType a , cellSource :: Source , cellMetadata :: JSONMeta , cellAttachments :: Maybe (M.Map Text MimeBundle) } deriving (Show, Eq, Generic) instance FromJSON (Cell NbV4) where parseJSON = withObject "Cell" $ \v -> do ty <- v .: "cell_type" cell_type <- case ty of "markdown" -> pure Markdown "raw" -> pure Raw "code" -> Code <$> v .:? "execution_count" <*> v .: "outputs" _ -> fail $ "Unknown cell_type " ++ ty metadata <- v .: "metadata" attachments <- v .:? "attachments" source <- v .: "source" return Cell{ cellType = cell_type , cellMetadata = metadata , cellAttachments = attachments , cellSource = source } instance FromJSON (Cell NbV3) where parseJSON = withObject "Cell" $ \v -> do ty <- v .: "cell_type" cell_type <- case ty of "markdown" -> pure Markdown "heading" -> Heading <$> v .: "level" "raw" -> pure Raw "code" -> Code <$> v .:? "prompt_number" <*> v .: "outputs" _ -> fail $ "Unknown cell_type " ++ ty metadata <- parseV3Metadata v source <- if ty == "code" then v .: "input" else v .: "source" return Cell{ cellType = cell_type , cellMetadata = metadata , cellAttachments = Nothing , cellSource = source } -- note that execution_count can't be omitted! instance ToJSON (Cell NbV4) where toJSON c = object $ ("metadata" .= cellMetadata c) : maybe [] (\x -> ["attachments" .= x]) (cellAttachments c) ++ case cellType c of Markdown -> [ "cell_type" .= ("markdown" :: Text) , "source" .= cellSource c ] Heading lev -> [ "cell_type" .= ("markdown" :: Text) , "source" .= (Source . breakLines . ((T.replicate lev "#" <> " ") <>) . mconcat . unSource) (cellSource c) ] Raw -> [ "cell_type" .= ("raw" :: Text) , "source" .= cellSource c ] Code{ codeExecutionCount = ec , codeOutputs = outs } -> [ "cell_type" .= ("code" :: Text) , "execution_count" .= ec , "outputs" .= outs , "source" .= cellSource c ] instance ToJSON (Cell NbV3) where toJSON c = object $ metadataToV3Pairs (cellMetadata c) ++ case cellType c of Markdown -> [ "cell_type" .= ("markdown" :: Text) , "source" .= cellSource c ] Heading lev -> [ "cell_type" .= ("heading" :: Text) , "level" .= lev , "source" .= cellSource c ] Raw -> [ "cell_type" .= ("raw" :: Text) , "source" .= cellSource c ] Code{ codeExecutionCount = ec , codeOutputs = outs } -> [ "cell_type" .= ("code" :: Text) , "input" .= cellSource c , "outputs" .= outs ] ++ maybe [] (\n -> ["prompt_number" .= n]) ec -- in v3, certain metadata fields occur in the main cell object. -- e.g. collapsed, language. metadataToV3Pairs :: JSONMeta -> [Aeson.Pair] metadataToV3Pairs meta = ("metadata" .= M.fromList regMeta) : map toPair extraMeta where (extraMeta, regMeta) = partition isExtraMeta $ M.toList meta toPair (k,v) = k .= v v3MetaInMainCell :: [Text] v3MetaInMainCell = ["collapsed", "language"] isExtraMeta :: (Text, a) -> Bool isExtraMeta (k,_) = k `elem` v3MetaInMainCell parseV3Metadata :: HM.HashMap Text Value -> Aeson.Parser JSONMeta parseV3Metadata v = do meta <- v .:? "metadata" .!= mempty let extraMeta = M.fromList $ filter isExtraMeta $ HM.toList v return (meta <> extraMeta) -- | Information about the type of a notebook cell, plus -- data specific to that type. note that 'Heading' is -- for v3 only; a 'Heading' will be rendered as 'Markdown' -- in v4. data CellType a = Markdown | Heading -- V3 only { headingLevel :: Int } | Raw | Code { codeExecutionCount :: Maybe Int , codeOutputs :: [Output a] } deriving (Show, Eq, Generic) -- | Output from a Code cell. data Output a = Stream { streamName :: Text , streamText :: Source } | DisplayData { displayData :: MimeBundle , displayMetadata :: JSONMeta } | ExecuteResult { executeCount :: Int , executeData :: MimeBundle , executeMetadata :: JSONMeta } | Err { errName :: Text , errValue :: Text , errTraceback :: [Text] } deriving (Show, Eq, Generic) instance FromJSON (Output NbV4) where parseJSON = withObject "Object" $ \v -> do ty <- v .: "output_type" case ty of "stream" -> Stream <$> v .: "name" <*> v .: "text" "display_data" -> DisplayData <$> v .: "data" <*> v .:? "metadata" .!= mempty "execute_result" -> ExecuteResult <$> v .: "execution_count" <*> v .: "data" <*> v .:? "metadata" .!= mempty "error" -> Err <$> v .: "ename" <*> v .: "evalue" <*> v .: "traceback" _ -> fail $ "Unknown object_type " ++ ty instance FromJSON (Output NbV3) where parseJSON = withObject "Object" $ \v -> do ty <- v .: "output_type" case ty of "stream" -> Stream <$> v .: "stream" <*> v .: "text" "display_data" -> DisplayData <$> extractNbV3Data v <*> v .:? "metadata" .!= mempty "pyout" -> ExecuteResult <$> v .: "prompt_number" <*> extractNbV3Data v <*> v .:? "metadata" .!= mempty "pyerr" -> Err <$> v .: "ename" <*> v .: "evalue" <*> v .: "traceback" _ -> fail $ "Unknown object_type " ++ ty -- Remove keys output_type, prompt_number, metadata; -- change short keys like text and png to mime types. extractNbV3Data :: Aeson.Object -> Aeson.Parser MimeBundle extractNbV3Data v = do let go ("output_type", _) = Nothing go ("metadata", _) = Nothing go ("prompt_number", _) = Nothing go ("text", x) = Just ("text/plain", x) go ("latex", x) = Just ("text/latex", x) go ("html", x) = Just ("text/html", x) go ("png", x) = Just ("image/png", x) go ("jpeg", x) = Just ("image/jpeg", x) go ("javascript", x) = Just ("application/javascript", x) go (_, _) = Nothing -- TODO complete list? where documented? parseJSON (Object . HM.fromList . mapMaybe go . HM.toList $ v) instance ToJSON (Output NbV4) where toJSON s@Stream{} = object [ "output_type" .= ("stream" :: Text) , "name" .= streamName s , "text" .= streamText s ] toJSON d@DisplayData{} = object [ "output_type" .= ("display_data" :: Text) , "data" .= displayData d , "metadata" .= displayMetadata d ] toJSON e@ExecuteResult{} = object [ "output_type" .= ("execute_result" :: Text) , "execution_count" .= executeCount e , "data" .= executeData e , "metadata" .= executeMetadata e ] toJSON e@Err{} = object [ "output_type" .= ("error" :: Text) , "ename" .= errName e , "evalue" .= errValue e , "traceback" .= errTraceback e ] instance ToJSON (Output NbV3) where toJSON s@Stream{} = object [ "output_type" .= ("stream" :: Text) , "stream" .= streamName s , "text" .= streamText s ] toJSON d@DisplayData{} = adjustV3DataFields $ object [ "output_type" .= ("display_data" :: Text) , "data" .= displayData d , "metadata" .= displayMetadata d ] toJSON e@ExecuteResult{} = adjustV3DataFields $ object [ "output_type" .= ("pyout" :: Text) , "prompt_number" .= executeCount e , "data" .= executeData e , "metadata" .= executeMetadata e ] toJSON e@Err{} = object [ "output_type" .= ("pyerr" :: Text) , "ename" .= errName e , "evalue" .= errValue e , "traceback" .= errTraceback e ] adjustV3DataFields :: Value -> Value adjustV3DataFields (Object hm) = case HM.lookup "data" hm of Just (Object dm) -> Object $ HM.delete "data" $ foldr (\(k, v) -> HM.insert (modKey k) v) hm (HM.toList dm) _ -> Object hm where modKey "text/plain" = "text" modKey "text/latex" = "latex" modKey "text/html" = "html" modKey "image/jpeg" = "jpeg" modKey "image/png" = "png" modKey "application/javascript" = "javascript" modKey x = x adjustV3DataFields x = x -- | Data in an execution result or display data cell. data MimeData = BinaryData ByteString | TextualData Text | JsonData Value deriving (Show, Eq, Generic) type MimeType = Text -- | A 'MimeBundle' wraps a map from mime types to mime data. newtype MimeBundle = MimeBundle{ unMimeBundle :: M.Map MimeType MimeData } deriving (Show, Eq, Generic, Semigroup, Monoid) instance FromJSON MimeBundle where parseJSON v = do m <- parseJSON v >>= mapM pairToMimeData . M.toList return $ MimeBundle $ M.fromList m pairToMimeData :: (MimeType, Value) -> Aeson.Parser (MimeType, MimeData) pairToMimeData (mt, v) | mt == "application/json" || "+json" `T.isSuffixOf` mt = return (mt, JsonData v) pairToMimeData (mt, v) = do t <- parseJSON v <|> (mconcat <$> parseJSON v) let mimeprefix = T.takeWhile (/='/') mt if mimeprefix == "text" then return (mt, TextualData t) else case Base64.decode (TE.encodeUtf8 (T.filter (not . isSpace) t)) of Left _ -> return (mt, TextualData t) Right b -> return (mt, BinaryData b) instance ToJSON MimeBundle where toJSON (MimeBundle m) = let mimeBundleToValue (BinaryData bs) = toJSON $ TE.decodeUtf8 . Base64.joinWith "\n" 76 . Base64.encode $ bs mimeBundleToValue (JsonData v) = v mimeBundleToValue (TextualData t) = toJSON (breakLines t) in toJSON $ M.map mimeBundleToValue m -- | Break up a string into a list of strings, each representing -- one line of the string (including trailing newline if any). breakLines :: Text -> [Text] breakLines t = let (x, y) = T.break (=='\n') t in case T.uncons y of Nothing -> if T.null x then [] else [x] Just (c, rest) -> (x <> T.singleton c) : breakLines rest