{-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} module Main where import Data.Maybe import Data.Aeson (decode, encode) import Data.Algorithm.Diff import qualified Data.ByteString.Lazy as BL import qualified Data.Map as Map import Data.Monoid ((<>)) import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Text.IO as Text import qualified Data.Text.Encoding as TE import System.Directory import System.Environment (getArgs) import System.FilePath import System.Exit (exitFailure) import System.IO (hSetEncoding, utf8, openFile, IOMode(..)) import Test.QuickCheck import Test.Tasty import Test.Tasty.Golden.Advanced (goldenTest) import Test.Tasty.HUnit import Test.Tasty.QuickCheck (testProperty) import Text.Show.Pretty import GHC.IO.Encoding (setLocaleEncoding) import Skylighting.Core readTextFile :: FilePath -> IO Text readTextFile fp = do h <- openFile fp ReadMode hSetEncoding h utf8 Text.hGetContents h tokToText :: Token -> Text tokToText (_, s) = s xmlPath :: FilePath xmlPath = "xml/" main :: IO () main = do setLocaleEncoding utf8 sMap <- do result <- loadSyntaxesFromDir xmlPath case result of Left e -> do putStrLn $ "Error loading syntax definitions from " <> xmlPath <> ": " <> e exitFailure Right m -> return m let syntaxes = Map.elems sMap defConfig = TokenizerConfig { traceOutput = False , syntaxMap = sMap } let getMatchers = map rMatcher . concatMap cRules . sContexts let getRegexFromMatcher (RegExpr re) = Just $ reString re getRegexFromMatcher _ = Nothing let getRegexesFromSyntax = mapMaybe getRegexFromMatcher . getMatchers inputs <- filter (\fp -> take 1 fp /= ".") <$> getDirectoryContents ("test" "cases") allcases <- mapM (fmap (Text.take 240) . readTextFile . (("test" "cases") )) inputs args <- getArgs let regen = "--accept" `elem` args defaultTheme <- BL.readFile ("test" "default.theme") defaultMain $ testGroup "skylighting tests" $ [ testGroup "tokenizer tests" $ map (tokenizerTest defConfig sMap regen) inputs , testGroup "FromJSON instance tests" [ testCase "decode simple color" $ Just (RGB 0x15 0xff 0xa0) @=? decode "\"#15ffa0\"" , testCase "decode TokenStyle" $ Just (TokenStyle{tokenColor = Just (RGB 0x1f 0x1c 0x1b), tokenBackground = Nothing, tokenBold = True, tokenItalic = False, tokenUnderline = False }) @=? decode "{ \"text-color\": \"#1f1c1b\", \"bold\": true }" , testCase "decode KDE theme to Style" $ Just kate @=? decode defaultTheme , testCase "round trip style -> theme -> style" $ Just kate @=? decode (encode kate) ] , testGroup "Skylighting" $ [ testCase "syntaxesByFilename" $ ["Perl"] @=? map sName (syntaxesByFilename sMap "foo/bar.pl") ] , testGroup "Doesn't hang or drop text on a mixed syntax sample" $ map (noDropTest defConfig allcases) syntaxes , testGroup "Doesn't hang or drop text on fuzz" $ map (\syn -> testProperty (Text.unpack (sName syn)) (p_no_drop defConfig syn)) syntaxes , testGroup "All regexes compile" $ map (\syn -> testGroup ("syntax " <> sFilename syn) (map (\regex -> testCase ("regex " <> (Text.unpack $ TE.decodeUtf8 regex) <> " in " <> sFilename syn) $ assertBool "regex does not compile" $ case compileRegex True regex of Right _ -> True Left _ -> False) $ getRegexesFromSyntax syn)) syntaxes , testGroup "Regex module" $ map regexTest regexTests , testGroup "Regression tests" $ let perl = maybe (error "could not find Perl syntax") id (lookupSyntax "Perl" sMap) html = maybe (error "could not find HTML syntax") id (lookupSyntax "html" sMap) cpp = maybe (error "could not find CPP syntax") id (lookupSyntax "cpp" sMap) c = maybe (error "could not find C syntax") id (lookupSyntax "c" sMap) in [ testCase "perl NUL case" $ Right [[(OtherTok,"s\NULb\NUL") ,(StringTok,"c") ,(OtherTok,"\NUL")]] @=? tokenize defConfig perl "s\0b\0c\0" , testCase "perl backslash case 1" $ Right [[(OtherTok,"m\\'")]] @=? tokenize defConfig perl "m\\'" , testCase "perl backslash case 2" $ Right [[(OtherTok,"m\\a\\")]] @=? tokenize defConfig perl "m\\a\\" , testCase "perl quoting case" $ Right [[(KeywordTok,"my") ,(NormalTok," ") ,(DataTypeTok,"$foo") ,(NormalTok," = ") ,(OtherTok,"q/") ,(SpecialStringTok,"bar") ,(OtherTok,"/") ,(NormalTok,";")] ,[(KeywordTok,"my") ,(NormalTok," ") ,(DataTypeTok,"$baz") ,(NormalTok," = ") ,(OtherTok,"'") ,(SpecialStringTok,"quux") ,(OtherTok,"'") ,(NormalTok,";")]] @=? tokenize defConfig perl "my $foo = q/bar/;\nmy $baz = 'quux';\n" , testCase "cpp floats" $ Right [ [ (FloatTok,"0.1") , (BuiltInTok,"f")] , [ (FloatTok,"1.0") , (BuiltInTok,"f")] , [ (OperatorTok,"-") , (FloatTok,"0.1") , (BuiltInTok,"f")] , [ (OperatorTok,"-") , (FloatTok,"1.0") , (BuiltInTok,"F")] , [ (OperatorTok,"-") , (FloatTok,"1.0") , (BuiltInTok,"L")] , [ (FloatTok,"1e3")] , [ (OperatorTok,"-") , (FloatTok,"15e+3")] , [ (FloatTok,"0.") , (BuiltInTok,"f")] , [ (FloatTok,"1.") , (BuiltInTok,"F")] , [ (FloatTok,"1.E3")] ] @=? tokenize defConfig cpp "0.1f\n1.0f\n-0.1f\n-1.0F\n-1.0L\n1e3\n-15e+3\n0.f\n1.F\n1.E3" , testCase "cpp identifier (#76)" $ Right [ [ (NormalTok,"ng_or") ] ] @=? tokenize defConfig cpp "ng_or" , testCase "c '\\0' (#82)" $ Right [ [ (CharTok,"'\\0'") ] ] @=? tokenize defConfig c "'\\0'" , testCase "c very long integer (#81)" $ Right [ [ (DecValTok, "1111111111111111111111") ] ] @=? tokenize defConfig c "1111111111111111111111" , testCase "Chinese characters in HTML (#110)" $ Right [ [ ( NormalTok , "\35797\65306" ) , ( KeywordTok , "" ) ] ] @=? tokenize defConfig html "试:" ] ] compareValues :: FilePath -> Text -> Text -> IO (Maybe String) compareValues referenceFile expected actual = if expected == actual then return $ Nothing else return $ Just $ makeDiff referenceFile (Text.lines expected) (Text.lines actual) makeDiff :: FilePath -> [Text] -> [Text] -> String makeDiff referenceFile expected actual = unlines $ [ "--- " ++ referenceFile , "+++ actual" ] ++ map (Text.unpack . vividize) (filter notBoth (getDiff expected actual)) where notBoth (Both _ _ ) = False notBoth _ = True instance Arbitrary Text where arbitrary = Text.pack <$> arbitrary shrink xs = Text.pack <$> shrink (Text.unpack xs) p_no_drop :: TokenizerConfig -> Syntax -> Text -> Bool p_no_drop cfg syntax t = case tokenize cfg syntax t of Right ts -> Text.lines t == map (mconcat . map tokToText) ts Left _ -> False noDropTest :: TokenizerConfig -> [Text] -> Syntax -> TestTree noDropTest cfg inps syntax = localOption (mkTimeout 25000000) $ testCase (Text.unpack (sName syntax)) $ mapM_ go inps where go inp = case tokenize cfg syntax inp of Right ts -> assertBool ("Text has been dropped:\n" ++ diffs) (inplines == toklines) where inplines = Text.lines inp toklines = map (mconcat . map tokToText) ts diffs = makeDiff "expected" inplines toklines Left e -> assertFailure ("Unexpected error: " ++ e ++ "\ninput = " ++ show inp) tokenizerTest :: TokenizerConfig -> SyntaxMap -> Bool -> FilePath -> TestTree tokenizerTest cfg sMap regen inpFile = localOption (mkTimeout 25000000) $ goldenTest testname getExpected getActual (compareValues referenceFile) updateGolden where testname = lang ++ " tokenizing of " ++ inpFile getExpected = readTextFile referenceFile getActual = do code <- readTextFile (casesdir inpFile) syntax <- case lookupSyntax (Text.pack lang) sMap of Just s -> return s Nothing -> fail $ "Could not find syntax definition for " ++ lang case tokenize cfg syntax $! code of Left e -> fail e Right ls -> return $ Text.pack $ ppShow ls ++ "\n" updateGolden = if regen then Text.writeFile referenceFile else \_ -> return () expecteddir = "test" "expected" casesdir = "test" "cases" referenceFile = expecteddir inpFile <.> "native" lang = drop 1 $ takeExtension inpFile regexTest :: (String, String, Maybe (String, [(Int,String)])) -> TestTree regexTest (re, inp, expected) = testCase ("/" ++ re ++ "/ " ++ inp) $ expected @=? testRegex True re inp regexTests :: [(String, String, Maybe (String, [(Int,String)]))] regexTests = [ (".", "aab", Just ("a", [])) , ("ab", "aab", Nothing) , ("ab", "abb", Just ("ab", [])) , ("a(b)", "abb", Just ("ab", [(1,"b")])) , ("a(b.)*", "abbbcb", Just ("abbbc", [(1,"bc")])) , ("a(?:b.)*", "abbbcb", Just ("abbbc", [])) , ("a(?=b)", "abb", Just ("a", [])) , ("a(?=b)", "acb", Nothing) , ("a(?!b)", "abb", Nothing) , ("a(?!b)", "acb", Just ("a", [])) , ("a?b+", "bbb", Just ("bbb", [])) , ("a?b+", "abbb", Just ("abbb", [])) , ("a?b+", "ac", Nothing) , ("a*", "bbb", Just ("", [])) , ("abc|ab$", "ab", Just ("ab", [])) , ("abc|ab$", "abcd", Just ("abc", [])) , ("abc|ab$", "abd", Nothing) , ("(?:ab)*|a.*", "abababa", Just ("abababa", [])) , ("a[b-e]*", "abcdefg", Just ("abcde", [])) , ("a[b-e\\n-]*", "abcde\nb-bcfg", Just ("abcde\nb-bc", [])) , ("^\\s+\\S+\\s+$", " abc ", Just (" abc ", [])) , ("\\$", "$$", Just ("$", [])) , ("[\\z12bb]", "\x12bb", Just ("\x12bb", [])) , ("\\bhello\\b|hell", "hello there", Just ("hello", [])) , ("\\bhello\\b|hell", "hellothere", Just ("hell", [])) , ("[[:space:]]{2,4}.", " abc", Just (" a", [])) , ("[[:space:]]{2,4}.", " abc", Nothing) , ("[[:space:]]{2,4}.", " abc", Just (" ", [])) , ("((..)\\+\\2)", "aa+aabb+bbbc+cb", Just ("aa+aa", [(1,"aa+aa"), (2,"aa")])) , ("(\\d+)/(\\d+) == \\{1}", "22/2 == 22", Just ("22/2 == 22", [(1,"22"), (2,"2")])) , ("([a-z]+){2}", "htabc", Just ("htabc", [(1,"c")])) , ("((.+)(.+)(.+))*", (replicate 400 'a'), Just (replicate 400 'a', [(1, replicate 400 'a') ,(2,replicate 398 'a') ,(3,"a") ,(4,"a")])) , ("a++a", "aaaaa", Nothing) , ("\\w+e", "aaaeeee", Just ("aaaeeee", [])) , ("\\w+?e", "aaaeeee", Just ("aaae", [])) , ("a+b??", "aaab", Just ("aaa", [])) , ("\\([a-z]+(?R)*\\)", "(aa(b(c)(d)))", Just ("(aa(b(c)(d)))", [])) , ("a{}", "aaa", Nothing) , ("a{}", "a{}", Just ("a{}", [])) , ("a{3", "a{3", Just ("a{3", [])) , ("(?|(abc)|(def))", "abc", Just ("abc", [(1,"abc")])) , ("(?|(abc)|(def))", "def", Just ("def", [(1,"def")])) , ("(?:(abc)|(def))", "def", Just ("def", [(2,"def")])) ] vividize :: Diff Text -> Text vividize (Both s _) = " " <> s vividize (First s) = "- " <> s vividize (Second s) = "+ " <> s