module Bio.PDB.Reader
  ( fromTextPDB
  , fromFilePDB
  , PDBWarnings(..)
  ) where
import           Bio.PDB.Parser         (pdbP)
import           Bio.PDB.Type           (PDB (..))
import           Control.Monad.IO.Class (MonadIO, liftIO)
import           Data.Attoparsec.Text   (parseOnly)
import           Data.Bifunctor         (first)
import           Data.List              as L (findIndices, length)
import           Data.Maybe             (catMaybes)
import           Data.Text              as T (Text, length, lines, pack,
                                              replicate, take, unlines)
import qualified Data.Text.IO           as TIO (readFile)
type LineNumber = Int
data PDBWarnings = LineTooLong LineNumber
                 | LineTooShort LineNumber
  deriving (Show, Eq)
standardizeText :: Text -> ([PDBWarnings], Text)
standardizeText text = (textWarnings, T.unlines standardizedLines)
  where
    textLines = T.lines text
    desiredLength = 80  
    warnings'n'text = map standardizeLine $ zip [0..] textLines
    textWarnings = catMaybes (fst <$> warnings'n'text)
    standardizedLines = snd <$> warnings'n'text
    standardizeLine :: (Int, Text) -> (Maybe PDBWarnings, Text)
    standardizeLine (lineNumber,line) | lineLength < desiredLength = (Just (LineTooShort lineNumber), line <> T.replicate spacesCount " ")
                                      | lineLength > desiredLength = (Just (LineTooLong lineNumber), T.take desiredLength line)
                                      | otherwise = (Nothing, line)
      where
        lineLength = T.length line
        spacesCount = desiredLength - lineLength
isMdlLine :: Text -> Bool
isMdlLine line = elem (T.take 6 line) modelStrings || elem (T.take 5 line) modelStrings
  where
    modelStrings = ["MODEL ", "ENDMDL", "ATOM ", "TER   ", "HETATM", "ANISOU", "CONECT"]
checkRow :: [Int] -> Bool
checkRow [] = True
checkRow xs = last xs - head xs + 1 == L.length xs
checkMdlLines :: ([PDBWarnings], Text) -> Bool
checkMdlLines warnings'n'text = checkRow mdlLineNumbers
  where
    mdlLineNumbers = findIndices isMdlLine $ T.lines (snd warnings'n'text)
preprocess :: Text -> Either Text ([PDBWarnings], Text)
preprocess text = do
  let standardizedText = standardizeText text
  if checkMdlLines standardizedText
  then Right standardizedText
  else Left "There are trash strings between model strings"
fromFilePDB :: MonadIO m => FilePath -> m (Either Text ([PDBWarnings], PDB))
fromFilePDB = liftIO . fmap fromTextPDB . TIO.readFile
fromTextPDB :: Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB text = do
  (warnings, preprocessedText) <- preprocess text
  pdb <- first pack $ parseOnly pdbP preprocessedText
  pure (warnings, pdb)