module Language.Fortran.Lexer.FixedFormSpec where import Language.Fortran.ParserMonad import Language.Fortran.Lexer.FixedForm import Test.Hspec import Test.Hspec.QuickCheck import TestUtil import Data.List (isPrefixOf) import qualified Data.ByteString.Char8 as B lex66 :: String -> Maybe Token lex66 = collectToLex Fortran66 safeLex66 :: String -> Maybe Token safeLex66 = collectToLexSafe Fortran66 lex77 :: String -> Maybe Token lex77 = collectToLex Fortran77 collectToLex :: FortranVersion -> String -> Maybe Token collectToLex version srcInput = dropUntil2 $ collectFixedTokens version (B.pack srcInput) where dropUntil2 [] = Nothing dropUntil2 [_] = Nothing dropUntil2 [a,_] = Just a dropUntil2 (_:xs) = dropUntil2 xs collectToLexSafe :: FortranVersion -> String -> Maybe Token collectToLexSafe version srcInput = dropUntil2 $ collectFixedTokensSafe version (B.pack srcInput) where dropUntil2 (Just [a,_]) = Just a dropUntil2 (Just (_:xs)) = dropUntil2 $ Just xs dropUntil2 _ = Nothing collectFixedTokens' :: FortranVersion -> String -> [Token] collectFixedTokens' v = collectFixedTokens v . B.pack spec :: Spec spec = describe "Fortran Fixed Form Lexer" $ do describe "Fortran 77" $ describe "String" $ do it "lexes 'hello'" $ resetSrcSpan (lex77 " c = 'hello'") `shouldBe` resetSrcSpan (Just $ TString u "hello") it "lexes 'he''llo'" $ resetSrcSpan (lex77 " c = 'he''llo'") `shouldBe` resetSrcSpan (Just $ TString u "he'llo") it "lexes 'he''''ll''o'" $ resetSrcSpan (lex77 " c = 'he''''ll''o'") `shouldBe` resetSrcSpan (Just $ TString u "he''ll'o") it "lexes '''hello'''" $ resetSrcSpan (lex77 " c = '''hello'''") `shouldBe` resetSrcSpan (Just $ TString u "'hello'") it "lexes 'hello world'" $ resetSrcSpan (lex77 " c = 'hello world'") `shouldBe` resetSrcSpan (Just $ TString u "hello world") it "lexes 'hello world'" $ resetSrcSpan (collectFixedTokens' Fortran77 " c = 'x' // 'o'") `shouldBe` resetSrcSpan [TId u "c", TOpAssign u, TString u "x", TSlash u, TSlash u, TString u "o", TEOF u] describe "Fortran 66" $ do prop "lexes Label, Comment, Newline or EOF in the first six columns or returns Nothing " $ \x -> isPrefixOf " " x || case safeLex66 x of Nothing -> True Just (TLabel _ _) -> True Just (TComment _ _) -> True Just (TEOF _) -> True Just (TNewline _) -> True _ -> False it "lexes alphanumeric identifier" $ resetSrcSpan (collectFixedTokens' Fortran66 " e42 =") `shouldBe` resetSrcSpan [TId u "e42", TOpAssign u, TEOF u] it "lexes exponent" $ resetSrcSpan (collectFixedTokens' Fortran66 " a = 42 e42") `shouldBe` resetSrcSpan [TId u "a", TOpAssign u, TInt u "42", TExponent u "e42", TEOF u] it "lexes 'function foo()'" $ resetSrcSpan (collectFixedTokens' Fortran66 " function foo()") `shouldBe` resetSrcSpan [TFunction u, TId u "foo", TLeftPar u, TRightPar u, TEOF u] it "lexes 'end'" $ resetSrcSpan (lex66 " end") `shouldBe` resetSrcSpan (Just $ TEnd u) it "lexes identifier" $ resetSrcSpan (lex66 " a = mistr") `shouldBe` resetSrcSpan (Just $ TId u "mistr") it "lexes comment if first column is C" $ resetSrcSpan (lex66 "c this is a comment") `shouldBe` resetSrcSpan (Just $ TComment u " this is a comment") it "lexes empty comment" $ resetSrcSpan (lex66 "c") `shouldBe` resetSrcSpan (Just $ TComment u "") it "lexes comment with one char" $ resetSrcSpan (lex66 "ca") `shouldBe` resetSrcSpan (Just $ TComment u "a") it "should not lex from the next line" $ resetSrcSpan (safeLex66 "cxxx\nselam") `shouldNotBe` resetSrcSpan (Just $ TComment u "xxxselam") -- This is commented out as identifiers are longer than what the standard says. it "lexes three tokens" $ do pending resetSrcSpan (collectFixedTokens' Fortran66 " function end format") `shouldBe` resetSrcSpan [TFunction u, TId u "endfor", TId u "mat", TEOF u] it "lexes multiple comments in a line" $ resetSrcSpan (collectFixedTokens' Fortran66 "csomething\ncsomething else\n\nc\ncc\n") `shouldBe` resetSrcSpan [TComment u "something", TNewline u, TComment u "something else", TNewline u, TNewline u, TComment u "", TNewline u, TComment u "c", TNewline u, TEOF u] it "lexes example1" $ resetSrcSpan (collectFixedTokens' Fortran66 example1) `shouldBe` resetSrcSpan example1Expectation it "lexes end of file" $ resetSrcSpan (lex66 "") `shouldBe` Nothing it "lexes '3 + 2'" $ resetSrcSpan (collectFixedTokens' Fortran66 " a = 3 + 2") `shouldBe` resetSrcSpan [TId u "a", TOpAssign u, TInt u "3", TOpPlus u , TInt u "2", TEOF u] it "should lex continuation lines properly" $ resetSrcSpan (collectFixedTokens' Fortran66 continuationExample) `shouldBe` resetSrcSpan [ TType u "integer", TId u "ix", TNewline u, TId u "ix", TOpAssign u, TInt u "42", TNewline u, TEnd u, TNewline u, TEOF u ] it "lexes 'ASSIGN 100 TO FOO'" $ resetSrcSpan (collectFixedTokens' Fortran66 " ASSIGN 100 TO FOO") `shouldBe` resetSrcSpan [TAssign u, TInt u "100", TTo u, TId u "foo", TEOF u] it "lexes 'DO 100 dovar = 1, 10'" $ resetSrcSpan (collectFixedTokens' Fortran66 " DO 100 dovar = 1, 10") `shouldBe` resetSrcSpan [TDo u, TInt u "100", TId u "dovar", TOpAssign u, TInt u "1", TComma u, TInt u "10", TEOF u] describe "lexN" $ it "`lexN 5` parses lexes next five characters" $ (lexemeMatch . aiLexeme) (evalParse (lexN 5 >> getAlex) (initParseState (B.pack "helloWorld") Fortran66 "")) `shouldBe` reverse "hello" describe "lexHollerith" $ do it "lexes Hollerith '7hmistral'" $ resetSrcSpan (lex66 " x = 7hmistral") `shouldBe` resetSrcSpan (Just $ THollerith u "mistral") it "becomes case sensitive" $ resetSrcSpan (collectFixedTokens' Fortran66 " format (5h a= 1)") `shouldBe` resetSrcSpan [ TFormat u, TBlob u "(5ha=1)", TEOF u ] it "lexes if statement ' IF (IY) 5,6,6'" $ resetSrcSpan (collectFixedTokens' Fortran66 " IF (IY) 5,6,6") `shouldBe` resetSrcSpan [TIf u, TLeftPar u, TId u "iy", TRightPar u, TInt u "5", TComma u, TInt u "6", TComma u, TInt u "6", TEOF u] it "lexes if then statement ' if (x) then'" $ resetSrcSpan (collectFixedTokens' Fortran77 " if (x) then") `shouldBe` resetSrcSpan [TIf u, TLeftPar u, TId u "x", TRightPar u, TThen u, TEOF u] it "lexes if variable decl ' INTEGER IF'" $ -- yes, really.. resetSrcSpan (collectFixedTokens' Fortran77 " INTEGER IF") `shouldBe` resetSrcSpan [TType u "integer", TId u "if", TEOF u] describe "Fortran 77 Legacy" $ do it "lexes inline comments" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " integer foo ! bar") `shouldBe` resetSrcSpan [TType u "integer", TId u "foo", TEOF u] it "lexes continuation lines separated by comments" $ do let src = unlines [ " integer foo," , "C hello" , " + bar" ] in resetSrcSpan (collectFixedTokens' Fortran77Legacy src) `shouldBe` resetSrcSpan [TType u "integer", TId u "foo", TComma u, TId u "bar", TNewline u, TEOF u] let src = unlines [ " integer foo, ! hello" , " + bar" ] in resetSrcSpan (collectFixedTokens' Fortran77Legacy src) `shouldBe` resetSrcSpan [TType u "integer", TId u "foo", TComma u, TId u "bar", TNewline u, TEOF u] let src = unlines [ " integer foo," , "" , " + bar" ] in resetSrcSpan (collectFixedTokens' Fortran77Legacy src) `shouldBe` resetSrcSpan [TType u "integer", TId u "foo", TComma u, TId u "bar", TNewline u, TEOF u] let src = unlines [ " integer foo," , " " -- the space is intentional , " + bar" ] in resetSrcSpan (collectFixedTokens' Fortran77Legacy src) `shouldBe` resetSrcSpan [TType u "integer", TId u "foo", TComma u, TId u "bar", TNewline u, TEOF u] it "lexes the older TYPE statement" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " type *, 'hello'") `shouldBe` resetSrcSpan [TTypePrint u, TStar u, TComma u, TString u "hello", TEOF u] it "lexes width-specific type declarations" $ do resetSrcSpan (collectFixedTokens' Fortran77Legacy " integer*4 i") `shouldBe` resetSrcSpan [TType u "integer", TStar u, TInt u "4", TId u "i", TEOF u] resetSrcSpan (collectFixedTokens' Fortran77Legacy " integer*4 function foo()") `shouldBe` resetSrcSpan [TType u "integer", TStar u, TInt u "4", TFunction u, TId u "foo", TLeftPar u, TRightPar u, TEOF u] resetSrcSpan (collectFixedTokens' Fortran77Legacy " character*4 s") `shouldBe` resetSrcSpan [TType u "character", TStar u, TInt u "4", TId u "s", TEOF u] resetSrcSpan (collectFixedTokens' Fortran77Legacy " character*(*) s") `shouldBe` resetSrcSpan [TType u "character", TStar u, TLeftPar u, TStar u, TRightPar u, TId u "s", TEOF u] resetSrcSpan (collectFixedTokens' Fortran77Legacy " character s*(*)") `shouldBe` resetSrcSpan [TType u "character", TId u "s", TStar u, TLeftPar u, TStar u, TRightPar u, TEOF u] it "lexes strings case-sensitively" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " c = 'Hello'") `shouldBe` resetSrcSpan [TId u "c", TOpAssign u, TString u "Hello", TEOF u] it "lexes strings delimited by '\"'" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " c = \"hello\"") `shouldBe` resetSrcSpan [TId u "c", TOpAssign u, TString u "hello", TEOF u] it "lexes Hollerith constants" $ do resetSrcSpan (collectFixedTokens' Fortran77Legacy " x = 7hmistral") `shouldBe` resetSrcSpan [TId u "x", TOpAssign u, THollerith u "mistral", TEOF u] resetSrcSpan (collectFixedTokens' Fortran77Legacy " x = 7hshort\n") `shouldBe` resetSrcSpan [TId u "x", TOpAssign u, THollerith u "short ", TNewline u, TEOF u] it "lexes BOZ constants" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " integer i, j, k / b'0101', o'0755', z'ab01' /") `shouldBe` resetSrcSpan [ TType u "integer", TId u "i", TComma u, TId u "j", TComma u, TId u"k" , TSlash u, TBozInt u "b'0101'", TComma u, TBozInt u "o'0755'", TComma u, TBozInt u "z'ab01'", TSlash u , TEOF u ] it "lexes non-standard identifiers" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " integer _this_is_a_long_identifier$") `shouldBe` resetSrcSpan [TType u "integer", TId u "_this_is_a_long_identifier$", TEOF u] it "lexes ';' as a line-terminator" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " integer i; integer j") `shouldBe` resetSrcSpan [TType u "integer", TId u "i", TNewline u, TType u "integer", TId u "j", TEOF u] it "lexes subscripts in assignments" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " x(0,0) = 0") `shouldBe` resetSrcSpan [TId u "x", TLeftPar u, TInt u "0", TComma u, TInt u "0", TRightPar u, TOpAssign u, TInt u "0", TEOF u] it "lexes labeled DO WHILE blocks" $ resetSrcSpan (collectFixedTokens' Fortran77Legacy " do 10 while (.true.)") `shouldBe` resetSrcSpan [TDo u, TInt u "10", TWhile u, TLeftPar u, TBool u ".true.", TRightPar u, TEOF u] it "lexes structure/union/map blocks" $ do let src = unlines [ " structure /foo/" , " union" , " map" , " integer i" , " real r" , " end map" , " end union" , " end structure"] resetSrcSpan (collectFixedTokens' Fortran77Legacy src) `shouldBe` resetSrcSpan [ TStructure u, TSlash u, TId u "foo", TSlash u, TNewline u , TUnion u, TNewline u , TMap u, TNewline u , TType u "integer", TId u "i", TNewline u , TType u "real", TId u "r", TNewline u , TEndMap u, TNewline u , TEndUnion u, TNewline u , TEndStructure u, TNewline u , TEOF u ] example1 :: String example1 = unlines [ " intEGerix", "1 iX= 42", " 200 ix =IX* ix", " 10 wrITe (*,*), ix", " EnD" ] continuationExample :: String continuationExample = unlines [ " inte", " .ger i", " .x", " ix = 4", " .2", " end"] example1Expectation :: [Token] example1Expectation = [ TType u "integer", TId u "ix", TNewline u, TLabel u "1", TId u "ix", TOpAssign u, TInt u "42", TNewline u, TLabel u "200", TId u "ix", TOpAssign u, TId u "ix", TStar u, TId u "ix", TNewline u, TLabel u "10", TWrite u, TLeftPar u, TStar u, TComma u, TStar u, TRightPar u, TComma u, TId u "ix", TNewline u, TEnd u, TNewline u, TEOF u]