{-# LANGUAGE RecordPuns, ParallelListComp, PatternGuards #-} {-# OPTIONS -fno-warn-name-shadowing -funbox-strict-fields #-} module Text.PageIO.Transform where import Data.Maybe import Data.Monoid import Data.Function (on) import Data.List (find, inits, tails, sortBy, groupBy, intersperse, mapAccumR, sort) import Text.Printf import Text.Regex import Control.Applicative import qualified Text.PageIO.LabelMap as LM import qualified Data.ByteString.Char8 as S import qualified Data.ByteString.Lazy.Char8 as L import Debug.Trace import Text.PageIO.Types import Text.PageIO.Extract (extractPage, SheetResult(..), BlockResult(..), Area, Bound(..), crop, fieldLen, formatDotted) data Doc = MkDoc { docMeta :: !SheetResult , docContent :: !L.ByteString } deriving (Show, Eq, Ord) type ValueMap = LabelMap [Value] data AppliedVariable = MkAppliedVariable { avRow :: !Row , avCol :: !Col , avValue :: !Value } deriving (Show, Eq, Ord) data Slot = MkSlot { slotSize :: !Row , slotBlocks :: ![Label] } deriving (Eq, Ord, Show) type Ordered a = ([OrderBy (Maybe Value)], a) data BlockData = MkBlockData { dataSize :: !Row , dataAreas :: ![Ordered FieldBinding] } deriving (Eq, Ord, Show) instance Monoid BlockData where mempty = MkBlockData (error "impossible") [] mappend x y = x{ dataAreas = dataAreas x ++ dataAreas y } mconcat [] = mempty mconcat xs = MkBlockData { dataSize = dataSize (head xs) , dataAreas = concatMap dataAreas xs } type PageCapacity = [Slot] type FitAttempt = LabelMap BlockData -- The actual data bound to that area. type PageBinding = LabelMap [FieldBinding] type FieldBinding = (Area, LabelMap Bound) parsePages :: Sheet -> [Page] -> [Doc] parsePages sheetIn pagesIn = map emitDoc docGroupsOut where docGroupsOut = case sheetGroupBy sheetIn of [] -> [map snd resultsIn] lbls -> map (map snd) $ groupBy ((==) `on` docKeys) $ sortBy (compare `on` docKeys) resultsIn where docKeys (vm, _) = map (`LM.lookup` vm) lbls resultsIn = [ (makeValueMap res, (res, page)) | (Just res, page) <- map (\p -> (extractPage sheetIn p, p)) pagesIn ] emitDoc :: [(SheetResult, Page)] -> Doc emitDoc xs = MkDoc meta (packPages pages) where meta = mconcat results results = map fst xs pages = map snd xs -- Turn a list of page into pages grouped by results transformPages :: Sheet -> [Page] -> Sheet -> [Page] -> [Doc] transformPages sheetIn pagesIn sheetOut pagesOut = map (makeDoc sheetOut) docGroupsOut where bindingsOut = concatMap (bindDoc sheetOut pagesOut) docGroupsIn docGroupsOut = case sheetGroupBy sheetOut of [] -> [bindingsOut] lbls -> groupBy ((==) `on` docKeys) $ sortBy (compare `on` docKeys) bindingsOut where docKeys MkDocBinding{ docValueMap } = map (`LM.lookup` docValueMap) lbls docGroupsIn = case groupBys of [] -> [map snd resultsIn] lbls -> map (map snd) $ groupBy ((==) `on` docKeys) $ sortBy (compare `on` docKeys) resultsIn where docKeys (vm, _) = map (`LM.lookup` vm) lbls where -- Really `melse` groupBys = case sheetOrderBy sheetIn of [] -> sheetOrderBy sheetOut xs -> xs resultsIn = [ (makeValueMap res, res) | Just res <- map (extractPage sheetIn) pagesIn ] makeDoc :: Sheet -> [DocBinding] -> Doc makeDoc sheetOut xs = MkDoc meta (packPages pages) where meta = mconcat . catMaybes $ map (extractPage $ constToPattern sheetOut) pages pages = map (crop (sheetBox sheetOut)) $ fillVariables sheetOut results [ makePage sheetOut b p | b <- map docBinding xs | p <- map docPage xs ] results = map docResult xs packPages :: [Page] -> L.ByteString packPages ps = unpages [ L.unlines [ L.fromChunks [l] | l <- pageLines p ] | p <- ps ] where unpages :: [L.ByteString] -> L.ByteString unpages [] = L.empty unpages ss = (L.concat $ intersperse nl ss) `L.append` nl -- half as much space where nl = L.pack "\x0C\n" -- Using Sheet+Page as template to rewrite a page -- First, rewrite SheetResult to eliminate all Variable fields -- Next, populate all fields -- Finally, populate frames with blocks and space-ify the rest of the frame lines -- -- First try fitting everything on the last page. -- All frames must be consumed by blocks mentioned on that page. -- If it won't fit, then retry with the last two pages, etc. -- -- Oh, and we re-extract the result pages to fill in vars! What joy! -- -- XXX - If data would expand, then maybe repeat the first page indefinitely? -- data DocBinding = MkDocBinding { docValueMap :: !ValueMap , docResult :: !SheetResult , docPage :: !Page , docBinding :: !PageBinding } deriving (Show, Eq, Ord) bindDoc :: Sheet -> [Page] -> [SheetResult] -> [DocBinding] bindDoc sheetOut pagesOut resultsIn = case find (\(x, _, _) -> isJust x) pageBindings of Just (Just bindings, boundPages, boundResults) -> [ MkDocBinding (makeValueMap r) r p b | r <- boundResults | b <- bindings | p <- boundPages ] _ -> [] where resultsOut = fromMaybe (error "Roundtrip failed!") . extractPage sheetOut <$> pagesOut orders = sheetBlockOrderBys sheetOut capacity = foldl (doCapacity sheetOut) [] resultsOut attempt = foldl (doAttempt orders) mempty resultsIn capacityTails = repeatTails capacity pageTails = repeatTails pagesOut resultTails = repeatTails resultsIn pageBindings = [ (tryFit pc groupedAttempt, pages', results') | pc <- capacityTails | pages' <- pageTails | results' <- resultTails ] groupedAttempt = LM.mapWithKey (doGroupBlockData groups fields) sortedAttempt where groups = sheetBlockGroupBys sheetOut fields = sheetBlockFields sheetOut sortedAttempt = (<$> filteredAttempt) $ \dat -> dat{ dataAreas = sortBy (compare `on` fst) $ dataAreas dat } filteredAttempt = attempt `LM.intersection` capacityLabels capacityLabels = LM.fromList [ (k, ()) | k <- concatMap (concatMap slotBlocks) capacity ] -- This is positively weird. -- We want to somehow collapse all rows into one, and calculate their -- variables "right here". doGroupBlockData :: LabelMap [Label] -> LabelMap (LabelMap Field) -> Label -> BlockData -> BlockData doGroupBlockData allGroups allFields lbl dat | Just groups@(_:_) <- LM.lookup lbl allGroups , Just fields <- LM.lookup lbl allFields = dat{ dataAreas = doGroupArea groups fields (dataAreas dat) } | Just fields <- LM.lookup lbl allFields = dat{ dataAreas = doExpandFields fields <$> dataAreas dat } | otherwise = dat{ dataAreas = sort $ dataAreas dat } doExpandFields :: LabelMap Field -> Ordered FieldBinding -> Ordered FieldBinding doExpandFields fields (order, (area, bounds)) = (order, (area, bounds')) where bounds' = LM.mapWithKey doExpandField fields doExpandField lbl fld | Just var <- fieldVariable fld = case var of VLiteral lit -> lit VSubStr label d t -> maybe S.empty (S.take t . S.drop d) $ LM.lookup label bounds VReplace label mrs -> maybe S.empty (\v -> foldl (flip $ uncurry replaceWith) v mrs) $ LM.lookup label bounds _ -> val | otherwise = val where val = fromMaybe S.empty (LM.lookup lbl bounds) replaceWith :: Value -> Value -> Value -> Value replaceWith match replace str | match == S.singleton '$' = str `S.append` replace | match == S.singleton '^' = replace `S.append` str | otherwise = S.pack $ subRegex (mkRegex $ S.unpack match) (S.unpack str) (S.unpack replace) -- type FieldBinding = (Area, LabelMap Bound) doGroupArea :: [Label] -> LabelMap Field -> [Ordered FieldBinding] -> [Ordered FieldBinding] doGroupArea groups fields xs = map (doGroupRows fields) $ groupBy ((==) `on` areaKeys) xs where -- XXX - Now groupby with "groups" and refill with vars! ("head" above is wrong) areaKeys (_, (_, bounds)) = (`LM.lookup` bounds) <$> groups doGroupRows :: LabelMap Field -> [Ordered FieldBinding] -> Ordered FieldBinding doGroupRows _ [] = error "Impossible" doGroupRows fields xs@((order, (area, bounds)):_) = (order, (area, bounds')) where bounds' = LM.mapWithKey doGroupRow fields doGroupRow lbl fld | Just var <- fieldVariable fld , len <- fieldLen fld = case var of VLiteral lit -> lit VCount{} -> formatInt len $ length xs VSum{ vLabel = l } -> formatFloat len . sum . map valToInt $ catMaybes [ LM.lookup l vs | (_, (_, vs)) <- xs ] _ -> val | otherwise = val where val = fromMaybe S.empty (LM.lookup lbl bounds) -- Given "abc", generate ["", "c", "bc", "abc", "aabc", "aaabc", "aaaabc"...] repeatTails :: [a] -> [[a]] repeatTails [] = [] repeatTails orig@(x:xs) = reverse (tails xs) ++ map (++ orig) infinitePrefixes where infinitePrefixes = inits (repeat x) constToPattern :: Sheet -> Sheet constToPattern sheet = sheet{ sheetFrames = doFrame <$> sheetFrames sheet } where doFrame frame = frame{ frameBlocks = doBlock <$> frameBlocks frame } doBlock block = block { blockPatterns = blockPatterns block `mappend` (LM.mapMaybe doField $ blockFields block) } doField field | Just (VLiteral lit) <- fieldVariable field = Just MkPattern { patternBox = fieldBox field , patternMatch = MkMatch lit , patternUseWildcards = False } | otherwise = Nothing -- We now calculate the applied vars for each page fillVariables :: Sheet -> [SheetResult] -> [Page] -> [Page] fillVariables sheet results pages = [ fillPageVariables appliedVars p | p <- pages | appliedVars <- applyVariable <$> [1..] `zip` valueMaps ] where (fieldVars, frameVars) = sheetVariableFields sheet valueMaps = makeValueMap <$> [ MkSheetResult rf nr | MkSheetResult rf _ <- results | MkSheetResult _ nr <- newResults ] docVals = LM.unionsWith (++) valueMaps newResults = fromMaybe err . extractPage sheet <$> pages err = error "Roundtrip failed during variable fill!" applyVariable :: (Int, ValueMap) -> LabelMap AppliedVariable applyVariable (pageNum, pageVals) = LM.mapMaybeWithKey (applyOneVariable False) fieldVars `LM.union` LM.mapMaybeWithKey (applyOneVariable True) frameVars where applyOneVariable :: Bool -> Label -> Field -> Maybe AppliedVariable applyOneVariable isInFrame lbl fld@MkField{ fieldBox, fieldVariable } | Just var <- fieldVariable , if isInFrame then LM.member lbl pageVals else True = Just MkAppliedVariable { avRow = boxTop fieldBox , avCol = boxLeft fieldBox , avValue = getVar var } | otherwise = Nothing where len = fieldLen fld valOf :: Scope -> Label -> [Value] valOf scope lbl = fromMaybe [] $ LM.lookup lbl vals where vals = case scope of SDoc -> docVals SPage -> pageVals getVar VPage = formatInt len pageNum getVar (VSum scope label) = case fieldFormat fld of FNumeric 2 -> (formatDotted (len-6) . show $ sum (valToInt <$> valOf scope label)) `S.append` S.pack ".00" _ -> formatInt len $ sum (valToInt <$> valOf scope label) getVar (VCount scope label) = formatInt len $ length (valOf scope label) getVar (VLiteral lit) = formatLiteral lit getVar (VLabel label) = formatLiteral $ case valOf SPage label of [] -> S.empty (x:_) -> x getVar (VSubStr label d t) = formatLiteral $ case valOf SPage label of [] -> S.empty (x:_) -> S.take t (S.drop d x) getVar (VReplace label mrs) = formatLiteral $ case valOf SPage label of [] -> S.empty (x:_) -> foldl (flip $ uncurry replaceWith) x mrs formatLiteral lit = lit `S.append` S.replicate (len - S.length lit) ' ' formatInt :: Int -> Int -> Value formatInt len = S.pack . printf ("%" ++ show len ++ "d") formatFloat :: Int -> Int -> Value formatFloat len = S.pack . printf ("%" ++ show (len - 3) ++ "d.00") makeValueMap :: SheetResult -> ValueMap makeValueMap MkSheetResult{ resultFields, resultBlocks } = LM.unionsWith (++) (fieldMap:blockMaps) where fieldMap = (:[]) <$> resultFields blockMaps :: [ValueMap] blockMaps = makeBlockValueMap <$> LM.elems resultBlocks makeBlockValueMap :: BlockResult -> ValueMap makeBlockValueMap (MkBlockResult avs) = LM.unionsWith (++) maps where maps :: [LabelMap [Value]] maps = [ (:[]) <$> bound | (_, bound) <- avs ] -- Now comes the fun part. -- For each frame, see if any of its blocks are there. -- If yes, concat them and pain the rest white. makePage :: Sheet -> PageBinding -> Page -> Page makePage MkSheet{ sheetFrames, sheetUseBlockSortPriority = True } binding page = foldl makeFrame page sheetFrames where makeFrame page MkFrame{ frameBox, frameBlocks } | null frameBindings = page | otherwise = pageReplaced where pageCleared = clearArea frameBox page pageReplaced = replaceArea frameBox (concat frameBindings) pageCleared frameBindings = [ fst <$> fromJust result | frameLabel <- LM.keys frameBlocks , let result = LM.lookup frameLabel binding , isJust result ] -- Otherwise we inspect individual vars and apply them. -- Also: For GROUP BY fields, leave things blank! -- Key here is blockGroupBy. makePage MkSheet{ sheetFrames } binding page = foldl makeFrame page sheetFrames where makeFrame page MkFrame{ frameBox, frameBlocks } | null frameBindings = page | otherwise = pageReplaced where (pageReplaced, _) = foldl replacePage (page, frameBox) (concatMap (reverse . doGroupBy . reverse) frameBindings) frameBindings = [ (\(_, bound) -> (block, bound)) <$> fromJust result | (frameLabel, block) <- LM.toList frameBlocks , let result = LM.lookup frameLabel binding , isJust result ] -- If we group by some fields, the second time the same field occurs, -- _and_ if all fields to the left also matches, then do not bother to display it. doGroupBy :: [(Block, LabelMap Bound)] -> [(Block, LabelMap Bound)] doGroupBy [] = [] doGroupBy [x] = [x] doGroupBy ((xb@MkBlock{ blockGroupBy }, xs):rest@((_, ys):_)) = (xb, xs'):doGroupBy rest where -- For each kv in xs: -- If the k is in blockGroupBy -- and v is in ys as v' -- and v == v' -- and all preceding fields also match -- then replace the v with empty xs' = LM.mapWithKey doCollapse xs doCollapse lbl val | lbl `elem` blockGroupBy , Just val' <- LM.lookup lbl ys , val == val' , preds <- takeWhile (/= lbl) blockGroupBy , map (`LM.lookup` xs) preds == map (`LM.lookup` ys) preds = S.empty | otherwise = val replacePage :: (Page, Box) -> (Block, LabelMap Bound) -> (Page, Box) replacePage (page, box@MkBox{ boxTop, boxLeft }) (MkBlock{ blockLines, blockFields }, bounds) = (page', box') where box' = box{ boxTop = boxTop + blockLines } page' = foldl replaceField page [ (fromJust rv, fieldBox) | (lbl, MkField{ fieldBox }) <- LM.toList blockFields , let rv = LM.lookup lbl bounds , isJust rv ] rowOffset = boxTop - 1 colOffset = boxLeft - 1 replaceField p (val, MkBox{ boxTop, boxLeft, boxBottom, boxRight}) = fillArea (boxTop + rowOffset) (boxLeft + colOffset) (valueToArea val) p replaceArea :: Box -> [Area] -> Page -> Page replaceArea _ [] page = page replaceArea box@MkBox{ boxTop, boxLeft } (area:rest) page = replaceArea box' rest page' where box' = box{ boxTop = boxTop + areaRows area} page' = fillArea boxTop boxLeft area page clearArea :: Box -> Page -> Page clearArea MkBox{ boxTop, boxLeft, boxBottom, boxRight } (MkPage lns) = MkPage $ before ++ cleared ++ after where (before, rest) = splitAt (boxTop-1) lns (middle, after) = splitAt (boxBottom-boxTop+1) rest cleared = fillLine boxLeft . ((,) whiteSpace) <$> middle whiteSpace = S.replicate (boxRight-boxLeft+1) ' ' {-# INLINE fillArea #-} fillArea :: Row -> Col -> Area -> Page -> Page fillArea row col (MkPage areaLines) (MkPage lns) = MkPage $ before ++ replaced ++ after where (before, rest) = splitAt (row-1) lns (middle, after) = splitAt (length areaLines) rest replaced = fillLine col <$> areaLines `zip` middle {-# INLINE fillLine #-} fillLine :: Col -> (Value, Value) -> Value fillLine col (areaLine, origLine) = S.concat [before, areaLine, after] where origLinePadded | padLength <= 0 = origLine | otherwise = origLine `S.append` S.replicate padLength ' ' padLength = (col + areaLength - 1) - S.length origLine (before, rest) = S.splitAt (col-1) origLinePadded after = S.drop areaLength rest areaLength = S.length areaLine doCapacity :: Sheet -> [PageCapacity] -> SheetResult -> [PageCapacity] doCapacity MkSheet{ sheetFrames } pcs MkSheetResult{ resultBlocks } = pc:pcs where pc = map frameToSlot framesOccured framesOccured = filter (labelOccured . frameBlocks) sheetFrames labelOccured lm = any (`LM.member` lm) resultLabels resultLabels = LM.keys resultBlocks frameToSlot MkFrame{ frameBox, frameBlocks } = MkSlot { slotSize = boxBottom frameBox - boxTop frameBox + 1 , slotBlocks = LM.keys frameBlocks } doAttempt :: LabelMap [OrderBy Label] -> FitAttempt -> SheetResult -> FitAttempt doAttempt orders att MkSheetResult{ resultBlocks } = att' where att' = LM.unionsWith mappend [att, LM.mapMaybeWithKey blockToData resultBlocks] blockToData lbl (MkBlockResult lms) = case lms of [] -> Nothing ((area, _):_) -> Just MkBlockData { dataSize = areaRows area , dataAreas = areas } where orderFrom = case LM.lookup lbl orders of Just orderBys -> \bound -> [ (`LM.lookup` bound) <$> orderBy | orderBy <- orderBys ] Nothing -> const [] areas = [ (orderFrom bound, area) | area@(_, bound) <- lms ] areaRows :: Area -> Row areaRows (MkPage lns) = length lns fillPageVariables :: LabelMap AppliedVariable -> Page -> Page fillPageVariables avs page = foldl fillOneVar page (LM.elems avs) where fillOneVar page MkAppliedVariable{ avRow, avCol, avValue } = fillArea avRow avCol (valueToArea avValue) page valueToArea :: Value -> Area valueToArea val = MkPage [val] sheetVariableFields :: Sheet -> (LabelMap Field, LabelMap Field) sheetVariableFields MkSheet{ sheetFrames, sheetFields } = (fieldMaps, LM.unions filteredMaps) where fieldMaps = LM.mapWithKey addVariable sheetFields filteredMaps = LM.filter (fieldIsVariable . fieldVariable) <$> frameMaps frameMaps = concatMap frameVariableFields sheetFrames fieldIsVariable (Just VReplace{}) = False fieldIsVariable (Just VSubStr{}) = False fieldIsVariable Nothing = False fieldIsVariable _ = True addVariable lbl fld@MkField{ fieldVariable } = case fieldVariable of Just{} -> fld _ -> fld{ fieldVariable = Just (VLabel lbl) } frameVariableFields frame@MkFrame{ frameBox } = map blockVariableFields blocks where -- XXX - Special rule - blocks with GROUP BY is not filled in at this stage! blocks = filter (null . blockGroupBy) $ LM.elems (frameBlocks frame) rowOffset = boxTop frameBox - 1 colOffset = boxLeft frameBox - 1 blockVariableFields = fmap adjustField . blockFields adjustField field@MkField{ fieldBox = MkBox{ boxTop, boxLeft, boxBottom, boxRight } } = field { fieldBox = MkBox { boxTop = boxTop + rowOffset , boxLeft = boxLeft + colOffset , boxBottom = boxBottom + rowOffset , boxRight = boxRight + colOffset } } sheetBlockOrderBys :: Sheet -> LabelMap [OrderBy Label] sheetBlockOrderBys = gatherForSheetBlock blockOrderBy sheetBlockGroupBys :: Sheet -> LabelMap [Label] sheetBlockGroupBys = gatherForSheetBlock blockGroupBy sheetBlockFields :: Sheet -> LabelMap (LabelMap Field) sheetBlockFields = gatherForSheetBlock blockFields gatherForSheetBlock :: (Block -> a) -> Sheet -> LabelMap a gatherForSheetBlock f MkSheet{ sheetFrames } = LM.unions (map doGather sheetFrames) where doGather MkFrame{ frameBlocks } = f <$> frameBlocks {-LM.fromList [ (lbl, vals) | (lbl, vals@(_:_)) <- LM.toList (f <$> frameBlocks) ] -} -- Each try is on a list of Frame Capacities -- Each frame capacity is a number of rows, and the label of blocks it can consume -- We try it with the total number of blocks tryFit :: [PageCapacity] -> FitAttempt -> Maybe [PageBinding] tryFit pcs att = go pcs ([], att) where go [] (bindings, att) | all (null . dataAreas) (LM.elems att) = Just (reverse bindings) | otherwise = Nothing go (p:ps) (bindings, att) = go ps ((bnd:bindings), att') where (bnd, att') = fitOnePage p (mempty, att) fitOnePage :: [Slot] -> (PageBinding, FitAttempt) -> (PageBinding, FitAttempt) fitOnePage [] x = x fitOnePage (s:ss) x = fitOnePage ss $ fitOneSlot s x fitOneSlot :: Slot -> (PageBinding, FitAttempt) -> (PageBinding, FitAttempt) fitOneSlot slot@MkSlot{ slotSize, slotBlocks } x@(binding, att) = case slotBlocks of [] -> x (b:bs) -> case LM.lookup b att of Just dat@MkBlockData{ dataSize, dataAreas } -> let consumed = min (length dataAreas) (slotSize `div` dataSize) consumedSize = consumed * dataSize newData = dat{ dataAreas = drop consumed dataAreas } newAttempt = LM.insert b newData att newBinding = LM.insertWith (++) b (snd <$> take consumed dataAreas) binding newSlot = slot { slotSize = slotSize - consumedSize , slotBlocks = bs } in fitOneSlot newSlot (newBinding, newAttempt) _ -> fitOneSlot slot{ slotBlocks = bs } x