{-# LANGUAGE GeneralizedNewtypeDeriving, RecordPuns, PatternGuards #-} {-# OPTIONS -fno-warn-name-shadowing -funbox-strict-fields #-} module Text.PageIO.Extract where import Data.Maybe import Data.Monoid import Text.PageIO.Types import Control.Applicative import qualified Text.PageIO.LabelMap as LM import qualified Data.ByteString.Char8 as S type Area = Page newtype BlockResult = MkBlockResult { blockResults :: [(Area, LabelMap Bound)] } deriving (Eq, Ord, Monoid) type Bound = Value data SheetResult = MkSheetResult { resultFields :: !(LabelMap Bound) , resultBlocks :: !(LabelMap BlockResult) } deriving (Eq, Ord) instance Monoid SheetResult where mempty = MkSheetResult mempty mempty mappend (MkSheetResult xf xb) (MkSheetResult yf yb) = MkSheetResult (LM.union xf yf) (LM.unionWith mappend xb yb) mconcat xs = MkSheetResult (LM.unions (map resultFields xs)) (LM.unionsWith mappend (map resultBlocks xs)) instance Show BlockResult where show (MkBlockResult []) = "[]" show (MkBlockResult rs) = concatMap prettyBlockResult rs where prettyBlockResult (_, lm) = case LM.toList lm of [] -> "\n - {}" (v:vs) -> "\n - " ++ prettyEntry v ++ concatMap ((" " ++) . prettyEntry) vs instance Show SheetResult where show (MkSheetResult fs bs) = concat [ "---\n" , concatMap prettyEntry (LM.toList fs) , concatMap prettyEntry (LM.toList bs) ] prettyEntry :: Show a => (Label, a) -> String prettyEntry (lbl, val) = do show lbl ++ ": " ++ show val ++ "\n" extractPage :: Sheet -> Page -> Maybe SheetResult extractPage MkSheet{sheetBox, sheetFields, sheetPatterns, sheetFrames} page | and patternResults = Just $ MkSheetResult { resultFields = fieldResults , resultBlocks = frameResults } | otherwise = Nothing where -- sheetArea = crop sheetBox page sheetArea = page patternResults = checkPattern sheetArea <$> LM.elems sheetPatterns fieldResults = extractField sheetArea <$> sheetFields frameResults = LM.unionsWith mappend $ extractFrame <$> sheetFrames extractFrame MkFrame{ frameBox, frameBlocks } = LM.fromListWith mappend $ reverse blockResults where frameArea = crop frameBox sheetArea blockResults = extractBlocks frameBlocks frameArea crop :: Box -> Page -> Page crop MkBox{boxTop, boxBottom, boxLeft, boxRight} (MkPage lns) = MkPage $ doCol <$> doRows lns where doRows = take (boxBottom - boxTop + 1) . drop (boxTop - 1) doCol = S.take (boxRight - boxLeft + 1) . S.drop (boxLeft - 1) checkPattern :: Page -> Pattern -> Bool checkPattern page MkPattern{ patternBox, patternMatch } | val `matches` patternMatch = True | otherwise = False where val = pageVal $ crop patternBox page extractField :: Page -> Field -> Bound extractField page fld@MkField{ fieldBox, fieldFormat } = case fieldFormat of -- FNumeric 2 -> formatDotted (fieldLen fld) (S.unpack (valToIntVal val)) FNumeric{} -> valToIntVal val _ | Just idx <- S.elemIndexEnd '.' val , frac <- S.drop idx val -> S.append (formatDotted (fieldLen fld - S.length frac) (show $ valToInt val)) frac | otherwise -> val where val = pageVal $ crop fieldBox page fieldLen :: Field -> Int fieldLen MkField{ fieldBox } = boxRight fieldBox - boxLeft fieldBox + 1 formatDotted len n = S.pack $ (replicate pad ' ') ++ dottedStr where pad = len - length dottedStr + 3 str = n dottedStr = reverse (addDot $ reverse str) addDot (x:y:z:rest@(_:_)) = (x:y:z:',':addDot rest) addDot xs = xs extractBlocks :: LabelMap Block -> Page -> [(Label, BlockResult)] extractBlocks blocks page@(MkPage lns) = case blockResults of [] -> case lns of (_:rest) -> extractBlocks blocks (MkPage rest) _ -> mempty ((skip, res):_) -> res:extractBlocks blocks (MkPage $ drop skip lns) where blockResults = [ (blockLines b, (lbl, fromJust es)) | (lbl, b) <- LM.toList blocks , let es = extractBlock b page, isJust es ] extractBlock :: Block -> Page -> Maybe BlockResult extractBlock MkBlock{ blockLines, blockPatterns, blockFields, blockFilterBy } page@(MkPage lns) | length blockArea < blockLines = Nothing | and patternResults = Just . MkBlockResult $ if filtersPass then [(MkPage blockArea, fieldResults)] else [] | otherwise = Nothing where blockArea = take blockLines lns patternResults = checkPattern page <$> LM.elems blockPatterns fieldResults = extractField page <$> blockFields filtersPass = all runFilter blockFilterBy runFilter (flt@MkFilter{ filterField, filterOperator, filterMatch }) = case LM.lookup filterField fieldResults of Just val -> case filterOperator of ONot op -> not $ runFilter flt{ filterOperator = op } OContains -> val `matches` filterMatch OStartsWith -> matchValue filterMatch `S.isPrefixOf` val OEndsWith -> matchValue filterMatch `S.isSuffixOf` val OEq -> matchValue filterMatch == val _ -> False pageVal :: Page -> Value pageVal (MkPage page) = S.concat page