module Holumbus.Utility where
import Control.Exception (bracket)
import Data.Binary
import qualified Data.ByteString.Lazy as B
import Data.Char
import qualified Data.List as L
import Numeric ( showHex )
import System.IO
import Text.XML.HXT.Core
split :: Eq a => [a] -> [a] -> [[a]]
split _ [] = [[]]
split at w@(x:xs) = maybe ((x:r):rs) ((:) [] . split at) (L.stripPrefix at w)
where (r:rs) = split at xs
join :: Eq a => [a] -> [[a]] -> [a]
join = L.intercalate
strip :: String -> String
strip = stripWith isSpace
stripl :: String -> String
stripl = dropWhile isSpace
stripr :: String -> String
stripr = reverse . dropWhile isSpace . reverse
stripWith :: (a -> Bool) -> [a] -> [a]
stripWith f = reverse . dropWhile f . reverse . dropWhile f
strictDecodeFile :: Binary a => FilePath -> IO a
strictDecodeFile f =
bracket (openBinaryFile f ReadMode)
hClose
$ \h -> do c <- B.hGetContents h
return $! decode c
partitionListByLength :: Int -> [a] -> [[a]]
partitionListByLength _ [] = []
partitionListByLength count l = [take count l] ++ (partitionListByLength count (drop count l))
partitionListByCount :: Int -> [a] -> [[a]]
partitionListByCount sublistCount list = partition sublistCount list
where
partition 0 _ = []
partition sublists l
= let next = ((length l) `div` sublists)
in if next == 0 then [l]
else [take next l] ++ partition (sublists 1) (drop next l)
escape :: String -> String
escape [] = []
escape (c:cs)
= if isAlphaNum c || isSpace c
then c : escape cs
else '%' : showHex (fromEnum c) "" ++ escape cs
computeDocBase :: ArrowXml a => a XmlTree String
computeDocBase
= ( ( ( this
/> hasName "html"
/> hasName "head"
/> hasName "base"
>>> getAttrValue "href"
)
&&&
getAttrValue "transfer-URI"
)
>>> expandURI
)
`orElse`
getAttrValue "transfer-URI"
traceOffset :: Int
traceOffset = 3
trcMsg :: String -> IO ()
trcMsg m = hPutStrLn stderr ('-':"- (0) " ++ m)
getByPath :: ArrowXml a => [String] -> a XmlTree XmlTree
getByPath = seqA . map (\ n -> getChildren >>> hasName n)
robotsNo :: String -> LA XmlTree XmlTree
robotsNo what = none
`when`
( getByPath ["html", "head", "meta"]
>>>
hasAttrValue "name" ( map toUpper
>>>
(== "ROBOTS")
)
>>>
getAttrValue0 "content"
>>>
isA ( map (toUpper >>> (\ x -> if isLetter x then x else ' '))
>>>
words
>>>
(what `elem`)
)
)
robotsNoIndex :: ArrowXml a => a XmlTree XmlTree
robotsNoIndex = fromLA $ robotsNo "NOINDEX"
robotsNoFollow :: ArrowXml a => a XmlTree XmlTree
robotsNoFollow = fromLA $ robotsNo "NOFOLLOW"