module Text.PageIO.Extract where
import Data.Maybe
import Data.Monoid
import Text.PageIO.Types
import qualified Text.PageIO.LabelMap as LM
import qualified Data.ByteString.Char8 as S
type Area = Page
newtype BlockResult = MkBlockResult { blockResults :: [(Area, LabelMap Value)] }
deriving (Eq, Ord, Monoid)
data SheetResult = MkSheetResult
{ resultFields :: !(LabelMap Value)
, resultBlocks :: !(LabelMap BlockResult)
}
deriving (Eq, Ord)
instance Monoid SheetResult where
mempty = MkSheetResult mempty mempty
mappend (MkSheetResult xf xb) (MkSheetResult yf yb)
= MkSheetResult (LM.union xf yf) (LM.unionWith mappend xb yb)
mconcat xs
= MkSheetResult (LM.unions (map resultFields xs)) (LM.unionsWith mappend (map resultBlocks xs))
instance Show BlockResult where
show (MkBlockResult []) = "[]"
show (MkBlockResult rs) = concatMap prettyBlockResult rs
where
prettyBlockResult (_, lm) = case LM.toList lm of
[] -> "\n - {}"
(v:vs) -> "\n - " ++ prettyEntry v ++ concatMap ((" " ++) . prettyEntry) vs
instance Show SheetResult where
show (MkSheetResult fs bs) = concat
[ "---\n"
, concatMap prettyEntry (LM.toList fs)
, concatMap prettyEntry (LM.toList bs)
]
prettyEntry :: Show a => (Label, a) -> String
prettyEntry (lbl, val) = do
show lbl ++ ": " ++ show val ++ "\n"
extractPage :: Sheet -> Page -> Maybe SheetResult
extractPage MkSheet{sheetBox, sheetFields, sheetPatterns, sheetFrames} page
| and patternResults = Just $ MkSheetResult
{ resultFields = fieldResults
, resultBlocks = frameResults
}
| otherwise = Nothing
where
sheetArea = crop sheetBox page
patternResults = map (checkPattern sheetArea) (LM.elems sheetPatterns)
fieldResults = fmap (extractField sheetArea) sheetFields
frameResults = LM.unionsWith mappend (map extractFrame sheetFrames)
extractFrame MkFrame{ frameBox, frameBlocks } = LM.fromListWith mappend blockResults
where
frameArea = crop frameBox sheetArea
blockResults = extractBlocks frameBlocks frameArea
crop :: Box -> Page -> Page
crop MkBox{boxTop, boxBottom, boxLeft, boxRight} (MkPage lns) = MkPage (map doCol (doRows lns))
where
doRows = take (boxBottom boxTop + 1) . drop (boxTop 1)
doCol = S.take (boxRight boxLeft + 1) . S.drop (boxLeft 1)
checkPattern :: Page -> Pattern -> Bool
checkPattern page MkPattern{ patternBox, patternMatch }
| val `matches` patternMatch = True
| otherwise = False
where
val = pageVal $ crop patternBox page
extractField :: Page -> Field -> Value
extractField page MkField{ fieldBox, fieldFormat } = case fieldFormat of
FNumeric{} -> valToIntVal val
_ -> val
where
val = pageVal $ crop fieldBox page
extractBlocks :: LabelMap Block -> Page -> [(Label, BlockResult)]
extractBlocks blocks page@(MkPage lns) = case blockResults of
[] -> case lns of
(_:rest) -> extractBlocks blocks (MkPage rest)
_ -> mempty
((skip, res):_) -> res:extractBlocks blocks (MkPage $ drop skip lns)
where
blockResults =
[ (blockLines b, (lbl, fromJust es))
| (lbl, b) <- LM.toList blocks
, let es = extractBlock b page, isJust es
]
extractBlock :: Block -> Page -> Maybe BlockResult
extractBlock MkBlock{ blockLines, blockPatterns, blockFields, blockFilterBy } page@(MkPage lns)
| length blockArea < blockLines = Nothing
| and patternResults = Just . MkBlockResult $ if filtersPass
then [(MkPage blockArea, fieldResults)]
else []
| otherwise = Nothing
where
blockArea = take blockLines lns
patternResults = map (checkPattern page) (LM.elems blockPatterns)
fieldResults = fmap (extractField page) blockFields
filtersPass = all runFilter blockFilterBy
runFilter (flt@MkFilter{ filterField, filterOperator, filterMatch })
= case LM.lookup filterField fieldResults of
Just val -> case filterOperator of
ONot op -> not $ runFilter flt{ filterOperator = op }
OContains -> val `matches` filterMatch
OStartsWith -> matchValue filterMatch `S.isPrefixOf` val
OEndsWith -> matchValue filterMatch `S.isSuffixOf` val
OEq -> matchValue filterMatch == val
_ -> False
pageVal :: Page -> Value
pageVal (MkPage page) = S.concat page