{-# LANGUAGE ImplicitParams #-} module Main (main) where import Data.IORef import System.IO import System.IO.Unsafe import System.Exit import System.Environment import Control.Applicative import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.IntMap as IM {-# NOINLINE __POS__ #-} __POS__ :: IORef Int __POS__ = unsafePerformIO (newRef 0) {-# NOINLINE __OBJ__ #-} __OBJ__ :: IORef Int __OBJ__ = unsafePerformIO (newRef 0) {-# NOINLINE __LOC__ #-} __LOC__ :: IORef (IM.IntMap String) __LOC__ = unsafePerformIO (newRef IM.empty) {-# NOINLINE __PAGE__ #-} __PAGE__ :: IORef [Obj] __PAGE__ = unsafePerformIO (newRef []) type M = IO type Obj = Int main :: IO () main = do args <- getArgs input <- case args of [] -> do putStrLn "Usage: line2pdf input.txt > output.pdf" putStrLn " (Form feed (^L) in input denotes a pagebreak.)" exitWith ExitSuccess (i:_) -> return i hSetBinaryMode stdout True pr$ "%PDF-1.2\n" ++ "%\xE2\xE3\xCF\xD3\n" (info, root, tPages, resources) <- writeHeader pageObjs <- let ?tPages = tPages ?resources = resources in writePages =<< (L.lines <$> L.readFile input) markLocation root writeObj root $ do pr$ "/Type/Catalog" ++ "/Pages " pr$ show tPages ++ " 0 R" markLocation tPages writeObj tPages $ do pr$ "/Type/Pages" ++ "/Count " pr$ show (length pageObjs) ++ "/MediaBox[0 0 " ++ show pageWidth ++ " " ++ show pageHeight pr$ "]" ++ "/Kids[" pr$ concatMap ((++ " 0 R ") . show) pageObjs pr$ "]" xfer <- currentLocation objCount <- incrObj pr$ "xref\n" ++ "0 " ++ show objCount ++ "\n" ++ "0000000000 65535 f \r" writeLocations pr$ "trailer\n" ++ "<<" ++ "/Size " pr$ show objCount pr$ "/Root " ++ show root ++ " 0 R" pr$ "/Info " ++ show info ++ " 0 R" pr$ ">>\n" ++ "startxref\n" pr$ show xfer ++ "\n" ++ "%%EOF\n" writeObj :: Obj -> M a -> M a writeObj obj f = do pr$ show obj ++ " 0 obj" ++ "<<" rv <- f pr$ ">>" ++ "endobj\n" return rv writeLocations :: M () writeLocations = do locs <- IM.elems <$> readRef __LOC__ pr$ concatMap fmt locs where fmt x = pad ++ x ++ " 00000 n \r" where pad = replicate (10-l) '0' l = length x printObj :: String -> M Obj printObj str = markObj $ (pr str >>) . return writeHeader :: M (Obj, Obj, Obj, Obj) writeHeader = do info <- printObj $ "/CreationDate(D:20080707163949+08'00')" ++ "/Producer(line2pdf.hs)" ++ "/Title(Untitled)" encoding <- printObj strDefaultEncoding everyFont <- (`mapM` ([1..] `zip` baseFonts)) $ \(n, font) -> do markObj $ \obj -> do pr$ "/Type/Font" ++ "/Subtype/Type1" ++ "/Name/F" pr$ show (n :: Int) pr$ "/Encoding " ++ show encoding ++ " 0 R" pr$ font return $ "/F" ++ show n ++ " " ++ show obj ++ " 0 R" tradFont <- tradChineseFonts root <- incrObj tPages <- incrObj markObj $ \resources -> do pr$ "/Font<<" ++ concat everyFont ++ concat tradFont ++ ">>" ++ "/ProcSet[/PDF/Text]" ++ "/XObject<<>>" return (info, root, tPages, resources) baseFonts :: [String] baseFonts = [ "/BaseFont/Courier" , "/BaseFont/Courier-Oblique" , "/BaseFont/Courier-Bold" , "/BaseFont/Courier-BoldOblique" , "/BaseFont/Helvetica" , "/BaseFont/Helvetica-Oblique" , "/BaseFont/Helvetica-Bold" , "/BaseFont/Helvetica-BoldOblique" , "/BaseFont/Times-Roman" , "/BaseFont/Times-Italic" , "/BaseFont/Times-Bold" , "/BaseFont/Times-BoldItalic" , "/BaseFont/Symbol" , "/BaseFont/ZapfDingbats" ] pr :: String -> M () pr str = do putStr str modifyRef __POS__ (+ (length str)) currentLocation :: IO Int currentLocation = readRef __POS__ newRef :: a -> M (IORef a) newRef = newIORef readRef :: IORef a -> M a readRef = readIORef writeRef :: IORef a -> a -> M () writeRef = writeIORef modifyRef :: IORef a -> (a -> a) -> M () modifyRef = modifyIORef incrObj :: M Obj incrObj = do obj <- succ <$> readRef __OBJ__ writeRef __OBJ__ obj return obj markObj :: (Obj -> M a) -> M a markObj f = do obj <- incrObj markLocation obj writeObj obj (f obj) markLocation :: Obj -> M () markLocation obj = do loc <- currentLocation modifyRef __LOC__ $ IM.insert obj (show loc) startPage :: (?tPages :: Obj, ?resources :: Obj) => M Int startPage = do markObj $ \obj -> do modifyRef __PAGE__ (obj:) pr$ "/Type/Page" pr$ "/Parent " ++ show ?tPages ++ " 0 R" pr$ "/Resources " ++ show ?resources ++ " 0 R" pr$ "/Contents " ++ show (succ obj) ++ " 0 R" pr$ "/Rotate 0" obj <- incrObj markLocation obj pr$ show obj ++ " 0 obj" ++ "<<" pr$ "/Length " ++ show (succ obj) ++ " 0 R" pr$ ">>" ++ "stream\n" streamPos <- currentLocation pr$ "BT\n"; let fontN = 30 :: Int ptSize = 12 :: Int pr$ "/F" ++ show fontN ++ " " ++ show ptSize ++ " Tf\n" pr$ "1 0 0 1 50 " ++ show (pageHeight - 40) ++ " Tm\n" pr$ "12 TL\n" return streamPos endPage :: Int -> M () endPage streamStart = do pr$ "ET\n" streamEnd <- currentLocation pr$ "endstream\n" ++ "endobj\n" obj <- incrObj markLocation obj pr$ show obj ++ " 0 obj\n" ++ show (streamEnd - streamStart) ++ "\n" ++ "endobj\n" writePages :: (?tPages :: Obj, ?resources :: Obj) => [L.ByteString] -> M [Obj] writePages lns = do pos <- newRef =<< startPage (`mapM_` lns) $ \ln -> do case fromEnum (L.length ln) of 1 | L.head ln == '\f' -> do endPage =<< readRef pos writeRef pos =<< startPage len -> do pr$ "T*(" L.putStr ln modifyRef __POS__ (+ len) pr$ ")Tj\n" endPage =<< readRef pos reverse <$> readRef __PAGE__ strDefaultEncoding :: String strDefaultEncoding = "/Type/Encoding" ++ "/Differences[0 /.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/space/exclam" ++ "/quotedbl/numbersign/dollar/percent/ampersand" ++ "/quoteright/parenleft/parenright/asterisk/plus/comma" ++ "/hyphen/period/slash/zero/one/two/three/four/five" ++ "/six/seven/eight/nine/colon/semicolon/less/equal" ++ "/greater/question/at/A/B/C/D/E/F/G/H/I/J/K/L" ++ "/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft" ++ "/backslash/bracketright/asciicircum/underscore" ++ "/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p" ++ "/q/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright" ++ "/asciitilde/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef" ++ "/dotlessi/grave/acute/circumflex/tilde/macron/breve" ++ "/dotaccent/dieresis/.notdef/ring/cedilla/.notdef" ++ "/hungarumlaut/ogonek/caron/space/exclamdown/cent" ++ "/sterling/currency/yen/brokenbar/section/dieresis" ++ "/copyright/ordfeminine/guillemotleft/logicalnot/hyphen" ++ "/registered/macron/degree/plusminus/twosuperior" ++ "/threesuperior/acute/mu/paragraph/periodcentered" ++ "/cedilla/onesuperior/ordmasculine/guillemotright" ++ "/onequarter/onehalf/threequarters/questiondown/Agrave" ++ "/Aacute/Acircumflex/Atilde/Adieresis/Aring/AE" ++ "/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave" ++ "/Iacute/Icircumflex/Idieresis/Eth/Ntilde/Ograve" ++ "/Oacute/Ocircumflex/Otilde/Odieresis/multiply/Oslash" ++ "/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn" ++ "/germandbls/agrave/aacute/acircumflex/atilde/adieresis" ++ "/aring/ae/ccedilla/egrave/eacute/ecircumflex" ++ "/edieresis/igrave/iacute/icircumflex/idieresis/eth" ++ "/ntilde/ograve/oacute/ocircumflex/otilde/odieresis" ++ "/divide/oslash/ugrave/uacute/ucircumflex/udieresis" ++ "/yacute/thorn/ydieresis]" pageWidth :: Int pageWidth = 792 pageHeight :: Int pageHeight = 612 tradChineseFonts :: M [String] tradChineseFonts = do tradFont <- newRef [] let addFont o fn = modifyRef tradFont (("/F" ++ show (fn :: Int) ++ " " ++ show o ++ " 0 R"):) markFont fn suffix = do markObj $ \obj -> do addFont obj fn pr$ "/Type/Font" ++ "/Subtype/Type0" ++ "/Name/F" pr$ show fn ++ "/BaseFont/MingLiU" pr$ suffix ++ "/Encoding/ETen-B5-H" pr$ "/DescendantFonts[" ++ show (succ obj) ++ " 0 R]" markObj $ \obj -> do pr$ "/Type/Font" ++ "/Subtype/CIDFontType2" ++ "/BaseFont/MingLiU" pr$ suffix ++ "/FontDescriptor " ++ show (succ obj) ++ " 0 R" ++ "/CIDSystemInfo<<" ++ "/Registry(Adobe)" ++ "/Ordering(CNS1)" ++ "/Supplement 0" ++ ">>" ++ "/DW 1000" ++ "/W[13500 14000 500]" printObj $ "/Type/FontDescriptor" ++ "/FontName/MingLiU" ++ "/FontBBox[0 -199 1000 801]" ++ "/Flags 7" ++ "/CapHeight 0" ++ "/Ascent 800" ++ "/Descent -199" ++ "/StemV 0" ++ "/ItalicAngle 0" markFont 30 "" markFont 31 ",Italic" markFont 32 ",Bold" markFont 33 ",BoldItalic" readRef tradFont