{-# LANGUAGE OverloadedStrings, UnicodeSyntax #-} module LexerTest (lexerSuite) where import Test.Framework (testGroup, Test) import Test.Framework.Providers.HUnit import Test.HUnit hiding (Test) import Language.Rust.Parser.Lexer import Language.Rust.Parser.ParseMonad import Language.Rust.Syntax import Language.Rust.Data.Ident import Language.Rust.Data.Position import Language.Rust.Data.InputStream lexerSuite :: Test lexerSuite = testGroup "lexer suite" [ commonCode, literals ] -- | This contains some random real-life code fragments. The purpose here is -- primarily black-box testing. commonCode :: Test commonCode = testGroup "lexing common code fragments" [ testCode "let span = $p.span;" [ IdentTok (mkIdent "let") , Space Whitespace " " , IdentTok (mkIdent "span") , Space Whitespace " " , Equal , Space Whitespace " " , Dollar , IdentTok (mkIdent "p") , Dot , IdentTok (mkIdent "span") , Semicolon ] , testCode "$(p.span),+" [ Dollar , OpenDelim Paren , IdentTok (mkIdent "p") , Dot , IdentTok (mkIdent "span") , CloseDelim Paren , Comma , Plus ] , testCode "pub s: pp::Printer<'a>," [ IdentTok (mkIdent "pub") , Space Whitespace " " , IdentTok (mkIdent "s") , Colon , Space Whitespace " " , IdentTok (mkIdent "pp") , ModSep , IdentTok (mkIdent "Printer") , Less , LifetimeTok (mkIdent "a") , Greater , Comma ] , testCode "impl<'a,T> Tr for &'a T {}" [ IdentTok (mkIdent "impl") , Less , LifetimeTok (mkIdent "a") , Comma , IdentTok (mkIdent "T") , Greater , Space Whitespace " " , IdentTok (mkIdent "Tr") , Space Whitespace " " , IdentTok (mkIdent "for") , Space Whitespace " " , Ampersand , LifetimeTok (mkIdent "a") , Space Whitespace " " , IdentTok (mkIdent "T") , Space Whitespace " " , OpenDelim Brace , CloseDelim Brace ] , testCode "x /* some comment */ y" [ IdentTok (mkIdent "x") , Space Whitespace " " , Space Comment " some comment " , Space Whitespace " " , IdentTok (mkIdent "y") ] , testCode "x /* some /* nested */ comment */ y" [ IdentTok (mkIdent "x") , Space Whitespace " " , Space Comment " some /* nested */ comment " , Space Whitespace " " , IdentTok (mkIdent "y") ] , testCode "fn ܐ_ܐ() { println!(\"Hello, čušpajž日本語\"); }" [ IdentTok (mkIdent "fn") , Space Whitespace " " , IdentTok (mkIdent "ܐ_ܐ") , OpenDelim Paren , CloseDelim Paren , Space Whitespace " " , OpenDelim Brace , Space Whitespace " " , IdentTok (mkIdent "println") , Exclamation , OpenDelim Paren , LiteralTok (StrTok "Hello, čušpajž日本語") Nothing , CloseDelim Paren , Semicolon , Space Whitespace " " , CloseDelim Brace ] , testCode "123.f32" [ LiteralTok (IntegerTok "123") Nothing , Dot , IdentTok (mkIdent "f32") ] , testCode "0e+10" [ LiteralTok (FloatTok "0e+10") Nothing ] , testCode "123.+1" [ LiteralTok (FloatTok "123.") Nothing , Plus , LiteralTok (IntegerTok "1") Nothing ] ] -- | test group for literals. Note that literals can have any suffix (even if -- almost all of those suffixes end up being invalid). literals :: Test literals = testGroup "literals (numbers, characters, strings, etc.)" -- byte's [ testCode "b'a'" [ LiteralTok (ByteTok "a") Nothing ] , testCode "b'\\n'" [ LiteralTok (ByteTok "\\n") Nothing ] , testCode "b'a'suffix" [ LiteralTok (ByteTok "a") (Just "suffix") ] -- char's , testCode "'a'" [ LiteralTok (CharTok "a") Nothing ] , testCode "'\\n'" [ LiteralTok (CharTok "\\n") Nothing ] , testCode "'a'suffix" [ LiteralTok (CharTok "a") (Just "suffix") ] -- integers , testCode "123" [ LiteralTok (IntegerTok "123") Nothing ] , testCode "123i32" [ LiteralTok (IntegerTok "123") (Just "i32") ] , testCode "0b1100_1101" [ LiteralTok (IntegerTok "0b1100_1101") Nothing ] , testCode "0b1100_1101isize" [ LiteralTok (IntegerTok "0b1100_1101") (Just "isize") ] , testCode "0o3170" [ LiteralTok (IntegerTok "0o3170") Nothing ] , testCode "0o3170i64" [ LiteralTok (IntegerTok "0o3170") (Just "i64") ] , testCode "0xAFAC" [ LiteralTok (IntegerTok "0xAFAC") Nothing ] , testCode "0xAFACu32" [ LiteralTok (IntegerTok "0xAFAC") (Just "u32") ] -- float's , testCode "123." [ LiteralTok (FloatTok "123.") Nothing ] , testCode "123.1" [ LiteralTok (FloatTok "123.1") Nothing ] , testCode "123.1f32" [ LiteralTok (FloatTok "123.1") (Just "f32") ] , testCode "123e-9f32" [ LiteralTok (FloatTok "123e-9") (Just "f32") ] , testCode "9e+10" [ LiteralTok (FloatTok "9e+10") Nothing ] -- string's , testCode "\"hello \\n world!\"" [ LiteralTok (StrTok "hello \\n world!") Nothing ] , testCode "\"hello \\n world!\"suffix" [ LiteralTok (StrTok "hello \\n world!") (Just "suffix") ] -- raw string's , testCode "r\"hello \n world!\"" [ LiteralTok (StrRawTok "hello \n world!" 0) Nothing ] , testCode "r\"hello \n world!\"suffix" [ LiteralTok (StrRawTok "hello \n world!" 0) (Just "suffix") ] , testCode "r##\"hello \"#\n world!\"##suffix" [ LiteralTok (StrRawTok "hello \"#\n world!" 2) (Just "suffix") ] -- bytestring's , testCode "b\"hello \\n world!\"" [ LiteralTok (ByteStrTok "hello \\n world!") Nothing ] , testCode "b\"hello \\n world!\"suffix" [ LiteralTok (ByteStrTok "hello \\n world!") (Just "suffix") ] -- raw bytestring's , testCode "br\"hello \n world!\"" [ LiteralTok (ByteStrRawTok "hello \n world!" 0) Nothing ] , testCode "br\"hello \n world!\"suffix" [ LiteralTok (ByteStrRawTok "hello \n world!" 0) (Just "suffix") ] , testCode "br##\"hello \"#\n world!\"##suffix" [ LiteralTok (ByteStrRawTok "hello \"#\n world!" 2) (Just "suffix") ] -- multiline strings , testCode "\"hello \\\n world!\"" [ LiteralTok (StrTok "hello \\\n world!") Nothing ] , testCode "b\"hello \\\n world!\"" [ LiteralTok (ByteStrTok "hello \\\n world!") Nothing ] ] -- | Create a test for a code fragment that should tokenize. testCode :: String -> [Token] -> Test testCode inp toks = testCase inp $ Right toks @=? lexTokensNoSpans (inputStreamFromString inp) -- | Turn an InputStream into either an error or a list of tokens. lexTokensNoSpans :: InputStream -> Either ParseFail [Token] lexTokensNoSpans inp = map unspan <$> execParser (lexTokens lexToken) inp initPos