{-# LANGUAGE ScopedTypeVariables , ViewPatterns , PatternGuards , TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Parsing GTFS files. -- -- Besides these functions, this module provides many orphan -- instances of @'Field'@ and @'ParseRow'@. module Data.GTFS.Parse ( parseFile , parseFeed ) where import Data.GTFS.Types import Text.RowRecord import Text.RowRecord.TH import Data.List.Split import Control.Applicative import System.Directory import System.IO.Unsafe ( unsafeInterleaveIO ) import qualified Text.CSV as CSV import qualified System.FilePath as Path enumDecode :: forall a. (Enum a, Bounded a) => Maybe String -> Result a enumDecode xs = decode xs >>= conv where mx = fromEnum (maxBound :: a) conv n | n > mx = Failure $ NoParse "" ("out of range: " ++ show n) | otherwise = Success $ toEnum n instance Field LocationType where decode = enumDecode instance Field RouteType where decode = enumDecode instance Field DirectionID where decode = enumDecode instance Field OnOffType where decode = enumDecode instance Field ServiceFlag where decode = enumDecode instance Field ExceptionType where decode = enumDecode instance Field PaymentMethod where decode = enumDecode instance Field TransferType where decode = enumDecode instance Field Date where decode = require f where f (splitAt 4 -> (ys, splitAt 2 -> (ms, ds))) | length ds == 2 , Just [y,m,d] <- mapM safeRead [ys,ms,ds] = Just $ Date y m d f _ = Nothing instance Field Time where decode = require f where f (mapM safeRead . splitOn ":" -> Just [h,m,s]) = Just $ Time h m s f _ = Nothing $(rowRecords [ ''Agency , ''Stop , ''Route , ''Trip , ''StopTime , ''Calendar , ''CalendarDate , ''FareAttribute , ''FareRule , ''Shape , ''Frequency , ''Transfer ]) -- drop some bad rows from a CSV file cleanup :: [[String]] -> [[String]] cleanup = filter ok where ok [] = False ok [""] = False ok _ = True getCSV :: FilePath -> IO [[String]] getCSV p = do x <- CSV.parseCSVFromFile p case x of Left e -> error ("CSV parse failed on " ++ p ++ ": " ++ show e) Right v -> return . cleanup $ v -- | Parse a single GTFS data file. -- -- Since some files are optional, this produces an empty list -- if the file does not exist. parseFile :: (ParseRow a) => FilePath -> IO [a] parseFile p = do ex <- doesFileExist p if not ex then return [] else go where go = do x <- getCSV p case fromStrings x >>= parseTable of Failure e -> error ("field parse failure: " ++ show e) Success y -> return y -- | Parse an entire feed directory. -- -- Each individual file is read and parsed only when its field in @'Feed'@ -- is forced. The usual caveats of lazy I\/O apply. Parsing within a file -- is not lazy. -- -- Alternatives to this function include @'parseFile'@ and @'parseRow'@. parseFeed :: FilePath -> IO Feed parseFeed d = Feed <$> f "agency.txt" <*> f "stops.txt" <*> f "routes.txt" <*> f "trips.txt" <*> f "stop_times.txt" <*> f "calendar.txt" <*> f "calendar_dates.txt" <*> f "fare_attributes.txt" <*> f "fare_rules.txt" <*> f "shapes.txt" <*> f "frequencies.txt" <*> f "transfers.txt" where f :: (ParseRow a) => String -> IO [a] f x = unsafeInterleaveIO . parseFile $ Path.combine d x