module Text.HTML.DirectoryListing.Parser (parseDirectoryListing)
where
import Text.HTML.DirectoryListing.Type
import Text.HTML.TagSoup
import Data.Time.LocalTime
import Data.Time.Format
import Data.List
import Data.Maybe
import qualified Data.Text as T
parseDirectoryListing :: T.Text -> [Entry]
parseDirectoryListing html = catMaybes fileLines
where
fileLines :: [Maybe Entry]
fileLines = map (toEntry . filter (not . isNoise) . parseTags) . T.lines $ html
toEntry :: [Tag T.Text] -> Maybe Entry
toEntry [TagOpen "a" [("href", ref)], TagText name, TagClose "a", TagText dateTimeAndFilesize]
| (length . T.words $ dateTimeAndFilesize) /= 3 = Nothing
| otherwise = toEntry [TagOpen "a" [("href", ref)], TagText name, TagClose "a", TagText dateTime, TagText filesize]
where
dateTime = T.concat . intersperse " " . take 2 . T.words $ dateTimeAndFilesize
filesize = last . T.words $ dateTimeAndFilesize
toEntry [TagOpen "a" [("href", ref)], TagText name, TagClose "a", TagText dateTime, TagText filesize] =
Just Entry { visibleName = name
, href = ref
, lastModified = parseLastModified dateTime
, fileSize = parseFileSize filesize
}
toEntry _ = Nothing
isNoise (TagOpen "tr" _) = True
isNoise (TagClose "tr") = True
isNoise (TagOpen "td" _) = True
isNoise (TagClose "td") = True
isNoise (TagOpen "img" _) = True
isNoise (TagText t) = t' `elem` [" ", "\160", ""]
where
t' = T.strip t
isNoise _ = False
parseLastModified :: T.Text -> LocalTime
parseLastModified t = parseTimeOrError True locale format (T.unpack t)
where
format = "%d-%b-%Y %R"
locale = defaultTimeLocale
{ months = map (\y -> (y, y))
[ "Jan", "Feb", "Mar", "Apr"
, "May", "Jun", "Jul", "Aug"
, "Sep", "Oct", "Nov", "Dec"
]
}
parseFileSize :: T.Text -> Maybe Integer
parseFileSize t =
case reads s::[(Double, String)] of
[] -> Nothing
[(f, "" )] -> Just . round $ f
[(f, "B")] -> Just . round $ f
[(f, "K")] -> Just . round $ f*1024
[(f, "M")] -> Just . round $ f*1024*1024
[(f, "G")] -> Just . round $ f*1024*1024*1024
_ -> Nothing
where
s = T.unpack . T.strip $ t