-- palindromesAroundCentres -- palindromeWordsAroundCentres -- approximatePalindromesAroundCentres -- are the central functions of this module -- approximatePalindromesAroundCentres is easily defined in the style of -- the others. -- Use the text-based palindrome finding, and check for each addition whether or not it is a wordpalindrome ----------------------------------------------------------------------------- -- | -- Module : Data.Algorithms.Palindromes.Palindromes -- Copyright : (c) 2007 - 2012 Johan Jeuring -- License : BSD3 -- -- Maintainer : johan@jeuring.net -- Stability : experimental -- Portability : portable -- ----------------------------------------------------------------------------- module Data.Algorithms.Palindromes.Palindromes (longestPalindrome ,longestPalindromes ,lengthLongestPalindrome ,lengthLongestPalindromes ,longestTextPalindrome ,longestTextPalindromes ,longestWordPalindrome ,longestWordPalindromes ,palindromesAroundCentres ) where import Data.List (maximumBy,intersperse) import Data.Char(toLower,isPunctuation,isSpace,isControl) import Data.Array(Array(),bounds,listArray,(!)) import Control.Arrow ----------------------------------------------------------------------------- -- longestPalindrome ----------------------------------------------------------------------------- -- | longestPalindrome returns the longest palindrome in a string. longestPalindrome :: (Eq a,Show a) => [a] -> String longestPalindrome input = let inputArray = listArrayl0 input (maxLength,pos) = maximumBy (\(l,_) (l',_) -> compare l l') (zip (palindromesAroundCentres inputArray) [0..]) in showPalindrome inputArray (maxLength,pos) ----------------------------------------------------------------------------- -- longestPalindromes ----------------------------------------------------------------------------- -- | longestPalindromes returns the longest palindrome around each position -- in a string. The integer argument is used to only show palindromes -- of length at least this integer. longestPalindromes :: (Eq a,Show a) => Int -> [a] -> String longestPalindromes m input = let inputArray = listArrayl0 input in concat $ intersperse "\n" $ map (showPalindrome inputArray) $ filter ((m<=) . fst) $ zip (palindromesAroundCentres inputArray) [0..] ----------------------------------------------------------------------------- -- lengthLongestPalindrome ----------------------------------------------------------------------------- -- | lengthLongestPalindrome returns the length of the longest palindrome in -- a string. lengthLongestPalindrome :: Eq a => [a] -> String lengthLongestPalindrome = show . maximum . palindromesAroundCentres . listArrayl0 ----------------------------------------------------------------------------- -- lengthLongestPalindromes ----------------------------------------------------------------------------- -- | lengthLongestPalindromes returns the lengths of the longest palindrome -- around each position in a string. lengthLongestPalindromes :: Eq a => [a] -> String lengthLongestPalindromes = show . palindromesAroundCentres . listArrayl0 ----------------------------------------------------------------------------- -- longestTextPalindrome ----------------------------------------------------------------------------- -- | longestTextPalindrome returns the longest text palindrome in a string, -- ignoring spacing, punctuation symbols, and case of letters. longestTextPalindrome :: String -> String longestTextPalindrome input = let inputArray = listArrayl0 input ips = zip input [0..] textinput = map (first toLower) (filter (myIsLetter.fst) ips) textInputArray = listArrayl0 (map fst textinput) positionTextInputArray = listArrayl0 (map snd textinput) in longestTextPalindromeArray textInputArray positionTextInputArray inputArray longestTextPalindromeArray :: Array Int Char -> Array Int Int -> Array Int Char -> String longestTextPalindromeArray a positionArray inputArray = let (len,pos) = maximumBy (\(l,_) (l',_) -> compare l l') (zip (palindromesAroundCentres a) [0..]) in showTextPalindrome positionArray inputArray (len,pos) ----------------------------------------------------------------------------- -- longestTextPalindromes ----------------------------------------------------------------------------- -- | longestTextPalindromes returns the longest text palindrome around each -- position in a string. The integer argument is used to only show palindromes -- of length at least this integer. longestTextPalindromes :: Int -> String -> String longestTextPalindromes m input = let inputArray = listArrayl0 input ips = zip input [0..] textinput = map (first toLower) (filter (myIsLetter.fst) ips) textInputArray = listArrayl0 (map fst textinput) positionTextInputArray = listArrayl0 (map snd textinput) in concat $ intersperse "\n" $ longestTextPalindromesArray m textInputArray positionTextInputArray inputArray longestTextPalindromesArray :: Int -> Array Int Char -> Array Int Int -> Array Int Char -> [String] longestTextPalindromesArray m a positionArray inputArray = map (showTextPalindrome positionArray inputArray) $ filter ((m<=) . fst) $ zip (palindromesAroundCentres a) [0..] -- introduce == only here (not in longestTextPalindromes)? ----------------------------------------------------------------------------- -- palindromesAroundCentres -- -- The function that implements the palindrome finding algorithm. -- Used in all the above interface functions. ----------------------------------------------------------------------------- -- | palindromesAroundCentres is the central function of the module. It returns -- the list of lenghths of the longest palindrome around each position in a -- string. palindromesAroundCentres :: Eq a => Array Int a -> [Int] palindromesAroundCentres a = let (afirst,_) = bounds a in reverse $ extendTail a afirst 0 [] extendTail :: Eq a => Array Int a -> Int -> Int -> [Int] -> [Int] extendTail a n currentTail centres | n > alast = -- reached the end of the array finalCentres currentTail centres (currentTail:centres) | n-currentTail == afirst = -- the current longest tail palindrome -- extends to the start of the array extendCentres a n (currentTail:centres) centres currentTail | (a!n) == (a!(n-currentTail-1)) = -- the current longest tail palindrome -- can be extended extendTail a (n+1) (currentTail+2) centres | otherwise = -- the current longest tail palindrome -- cannot be extended extendCentres a n (currentTail:centres) centres currentTail where (afirst,alast) = bounds a extendCentres :: Eq a => Array Int a -> Int -> [Int] -> [Int] -> Int -> [Int] extendCentres a n centres tcentres centreDistance | centreDistance == 0 = -- the last centre is on the last element: -- try to extend the tail of length 1 extendTail a (n+1) 1 centres | centreDistance-1 == head tcentres = -- the previous element in the centre list -- reaches exactly to the end of the last -- tail palindrome use the mirror property -- of palindromes to find the longest tail -- palindrome extendTail a n (head tcentres) centres | otherwise = -- move the centres one step -- add the length of the longest palindrome -- to the centres extendCentres a n (min (head tcentres) (centreDistance-1):centres) (tail tcentres) (centreDistance-1) finalCentres :: Int -> [Int] -> [Int] -> [Int] finalCentres n tcentres centres | n == 0 = centres | n > 0 = finalCentres (n-1) (tail tcentres) (min (head tcentres) (n-1):centres) | otherwise = error "finalCentres: input < 0" ----------------------------------------------------------------------------- -- longestWordPalindromes ----------------------------------------------------------------------------- -- | longestWordPalindromes returns the longest word palindrome around each -- position in a string. The integer argument is used to only show -- palindromes of length at least this integer. longestWordPalindromes :: Int -> String -> String longestWordPalindromes m input = let inputArray = listArrayl0 input ips = zip input [0..] textinput = map (first toLower) (filter (myIsLetter.fst) ips) textInputArray = listArrayl0 (map fst textinput) positionTextInputArray = listArrayl0 (map snd textinput) in concat $ intersperse "\n" $ longestWordPalindromesArray m textInputArray positionTextInputArray inputArray longestWordPalindromesArray :: Int -> Array Int Char -> Array Int Int -> Array Int Char -> [String] longestWordPalindromesArray m textInputArray positionArray inputArray = map (showTextPalindrome positionArray inputArray) $ filter ((m<=) . fst) $ zip (wordPalindromesAroundCentres textInputArray positionArray inputArray) [0..] -- | longestWordPalindrome returns the longest text palindrome preceded and -- followed by non-letter symbols (if any). longestWordPalindrome :: String -> String longestWordPalindrome input = let inputArray = listArrayl0 input ips = zip input [0..] textinput = map (first toLower) (filter (myIsLetter.fst) ips) textInputArray = listArrayl0 (map fst textinput) positionArray = listArrayl0 (map snd textinput) (maxLength,pos) = maximumBy (\(w,_) (w',_) -> compare w w') (zip (wordPalindromesAroundCentres textInputArray positionArray inputArray) [0..]) in showTextPalindrome positionArray inputArray (maxLength,pos) ----------------------------------------------------------------------------- -- wordPalindromesAroundCentres -- -- This is the function palindromesAroundCentres, extended with the longest -- word palindromes around each centre. ----------------------------------------------------------------------------- -- | wordPalindromesAroundCentres returns the same lengths of palindromes as -- palindromesAroundCentres, but at the same time also the length of the -- longest word palindromes around the centres. wordPalindromesAroundCentres :: Array Int Char -> Array Int Int -> Array Int Char -> [Int] wordPalindromesAroundCentres textInputArray positionArray inputArray = let (afirst,_) = bounds textInputArray in reverse $ map (head . snd) $ extendTailWord textInputArray positionArray inputArray afirst (0,[0]) [] extendTailWord :: Array Int Char -> Array Int Int -> Array Int Char -> Int -> (Int,[Int]) -> [(Int,[Int])] -> [(Int,[Int])] extendTailWord textInputArray positionArray inputArray n current@(currentTail,currentTailWords) centres | n > alast = -- reached the end of the text input array finalWordCentres textInputArray positionArray inputArray currentTail centres (current:centres) | n-currentTail == afirst = -- the current longest tail palindrome extends to the start of the text input array extendWordCentres textInputArray positionArray inputArray n (current:centres) centres currentTail | (textInputArray!!!n) == (textInputArray!!!(n-currentTail-1)) = -- the current longest tail palindrome can be extended -- check whether or not the extended palindrome is a wordpalindrome if surroundedByPunctuation (positionArray!!!(n-currentTail-1)) (positionArray!!!n) inputArray then extendTailWord textInputArray positionArray inputArray (n+1) (currentTail+2,currentTail+2:currentTailWords) centres else extendTailWord textInputArray positionArray inputArray (n+1) (currentTail+2,currentTailWords) centres | otherwise = -- the current longest tail palindrome cannot be extended extendWordCentres textInputArray positionArray inputArray n (current:centres) centres currentTail where (afirst,alast) = bounds textInputArray extendWordCentres :: Array Int Char -> Array Int Int -> Array Int Char -> Int -> [(Int,[Int])] -> [(Int,[Int])] -> Int -> [(Int,[Int])] extendWordCentres textInputArray positionArray inputArray n centres tcentres centreDistance | centreDistance == 0 = -- the last centre is on the last element: -- try to extend the tail of length 1 if surroundedByPunctuation (positionArray!n) (positionArray!n) inputArray then extendTailWord textInputArray positionArray inputArray (n+1) (1,[1,0]) centres else extendTailWord textInputArray positionArray inputArray (n+1) (1,[0]) centres | centreDistance-1 == fst (head tcentres) = -- the previous element in the centre list -- reaches exactly to the end of the last -- tail palindrome use the mirror property -- of palindromes to find the longest tail -- palindrome let (currentTail,oldWord:oldWords) = head tcentres in if surroundedByPunctuation (positionArray!(n-currentTail)) (positionArray!(n-1)) inputArray then if oldWord == currentTail then extendTailWord textInputArray positionArray inputArray n (head tcentres) centres else extendTailWord textInputArray positionArray inputArray n (currentTail,currentTail:oldWord:oldWords) centres else if oldWord == currentTail && oldWord > 0 then extendTailWord textInputArray positionArray inputArray n (currentTail, tail (snd (head tcentres))) centres else extendTailWord textInputArray positionArray inputArray n (head tcentres) centres | otherwise = -- move the centres one step -- add the length of the longest palindrome -- to the centres let newTail = min (fst (head tcentres)) (centreDistance-1) oldWord = head (snd (head tcentres)) newWords = if oldWord < newTail then if surroundedByPunctuation (positionArray!(n-newTail+1)) (positionArray!n) inputArray then newTail:snd (head tcentres) else snd (head tcentres) else if null (tail (snd (head tcentres))) then snd (head tcentres) else tail (snd (head tcentres)) in extendWordCentres textInputArray positionArray inputArray n ((newTail,newWords):centres) (tail tcentres) (centreDistance-1) finalWordCentres :: Array Int Char -> Array Int Int -> Array Int Char -> Int -> [(Int,[Int])] -> [(Int,[Int])] -> [(Int,[Int])] finalWordCentres textInputArray positionArray inputArray n tcentres centres | n == 0 = centres | n > 0 = let (_,tlast) = bounds textInputArray (oldTail,oldWord:oldWords) = head tcentres newTail = min oldTail (n-1) diff = if oldTail < n-1 then n - 1 - oldTail else 0 firstMirror = min tlast (tlast-diff-newTail+1) lastMirror = tlast-diff newWords = if oldWord < newTail && surroundedByPunctuation (positionArray!firstMirror) (positionArray!lastMirror) inputArray then newTail:oldWord:oldWords else if null oldWords then oldWord:oldWords else oldWords in finalWordCentres textInputArray positionArray inputArray (n-1) (tail tcentres) ((newTail,newWords):centres) | otherwise = error "finalWordCentres: input < 0" {- Outcommented for release 0.2.2.1; gives away what is going to appear in 0.3 ----------------------------------------------------------------------------- -- longestApproximatePalindromes ----------------------------------------------------------------------------- -- | longestApproximatePalindromes returns the longest approximate -- palindrome around each position in a string. An approximate palindrome -- is a palindrome with at most a specified number of errors. longestApproximatePalindromes :: (Eq a,Show a) => Int -> [a] -> String longestApproximatePalindromes nrOfErrors input = let inputArray = listArrayl0 input (maxLength,pos) = (\(((l,nrOfErrors):xs),p) -> (l,p)) $ maximumBy (\((l,_):_,_) ((l',_):_,_) -> compare l l') (zip (approximatePalindromesAroundCentres nrOfErrors inputArray) [0..]) in showPalindrome inputArray (maxLength,pos) ----------------------------------------------------------------------------- -- approximatePalindromesAroundCentres -- -- This is the function palindromesAroundCentres, but now the palindrome -- may contain a specified number of errors. ----------------------------------------------------------------------------- -- | for each centre, approximatePalindromesAroundCentres calculates a list -- palindromes with corresponding nrOfErrors, in decreasing length. approximatePalindromesAroundCentres :: Int -> Array Int a -> [[(Int,Int)]] approximatePalindromesAroundCentres nrOfErrors inputArray = let (afirst,_) = bounds inputArray in reverse $ extendTailWord inputArray afirst (0,[0]) [] wordPalindromesAroundCentres :: Array Int Char -> Array Int Int -> Array Int Char -> [Int] wordPalindromesAroundCentres textInputArray positionArray inputArray = let (afirst,_) = bounds textInputArray in reverse $ map (head . snd) $ extendTailWord textInputArray positionArray inputArray afirst (0,[0]) [] extendTailWord :: Array Int Char -> Array Int Int -> Array Int Char -> Int -> (Int,[Int]) -> [(Int,[Int])] -> [(Int,[Int])] extendTailWord textInputArray positionArray inputArray n current@(currentTail,currentTailWords) centres | n > alast = -- reached the end of the text input array finalWordCentres textInputArray positionArray inputArray currentTail centres (current:centres) | n-currentTail == afirst = -- the current longest tail palindrome extends to the start of the text input array extendWordCentres textInputArray positionArray inputArray n (current:centres) centres currentTail | (textInputArray!!!n) == (textInputArray!!!(n-currentTail-1)) = -- the current longest tail palindrome can be extended -- check whether or not the extended palindrome is a wordpalindrome if surroundedByPunctuation (positionArray!!!(n-currentTail-1)) (positionArray!!!n) inputArray then extendTailWord textInputArray positionArray inputArray (n+1) (currentTail+2,currentTail+2:currentTailWords) centres else extendTailWord textInputArray positionArray inputArray (n+1) (currentTail+2,currentTailWords) centres | otherwise = -- the current longest tail palindrome cannot be extended extendWordCentres textInputArray positionArray inputArray n (current:centres) centres currentTail where (afirst,alast) = bounds textInputArray extendWordCentres :: Array Int Char -> Array Int Int -> Array Int Char -> Int -> [(Int,[Int])] -> [(Int,[Int])] -> Int -> [(Int,[Int])] extendWordCentres textInputArray positionArray inputArray n centres tcentres centreDistance | centreDistance == 0 = -- the last centre is on the last element: -- try to extend the tail of length 1 if surroundedByPunctuation (positionArray!n) (positionArray!n) inputArray then extendTailWord textInputArray positionArray inputArray (n+1) (1,[1,0]) centres else extendTailWord textInputArray positionArray inputArray (n+1) (1,[0]) centres | centreDistance-1 == fst (head tcentres) = -- the previous element in the centre list -- reaches exactly to the end of the last -- tail palindrome use the mirror property -- of palindromes to find the longest tail -- palindrome let (currentTail,oldWord:oldWords) = head tcentres in if surroundedByPunctuation (positionArray!(n-currentTail)) (positionArray!(n-1)) inputArray then if oldWord == currentTail then extendTailWord textInputArray positionArray inputArray n (head tcentres) centres else extendTailWord textInputArray positionArray inputArray n (currentTail,currentTail:oldWord:oldWords) centres else if oldWord == currentTail && oldWord > 0 then extendTailWord textInputArray positionArray inputArray n (currentTail, tail (snd (head tcentres))) centres else extendTailWord textInputArray positionArray inputArray n (head tcentres) centres | otherwise = -- move the centres one step -- add the length of the longest palindrome -- to the centres let newTail = min (fst (head tcentres)) (centreDistance-1) oldWord = head (snd (head tcentres)) newWords = if oldWord < newTail then if surroundedByPunctuation (positionArray!(n-newTail+1)) (positionArray!n) inputArray then newTail:snd (head tcentres) else snd (head tcentres) else if null (tail (snd (head tcentres))) then snd (head tcentres) else tail (snd (head tcentres)) in extendWordCentres textInputArray positionArray inputArray n ((newTail,newWords):centres) (tail tcentres) (centreDistance-1) finalWordCentres :: Array Int Char -> Array Int Int -> Array Int Char -> Int -> [(Int,[Int])] -> [(Int,[Int])] -> [(Int,[Int])] finalWordCentres textInputArray positionArray inputArray n tcentres centres | n == 0 = centres | n > 0 = let (_,tlast) = bounds textInputArray (oldTail,oldWord:oldWords) = head tcentres newTail = min oldTail (n-1) diff = if oldTail < n-1 then n - 1 - oldTail else 0 firstMirror = min tlast (tlast-diff-newTail+1) lastMirror = tlast-diff newWords = if oldWord < newTail && surroundedByPunctuation (positionArray!firstMirror) (positionArray!lastMirror) inputArray then newTail:oldWord:oldWords else if null oldWords then oldWord:oldWords else oldWords in finalWordCentres textInputArray positionArray inputArray (n-1) (tail tcentres) ((newTail,newWords):centres) | otherwise = error "finalWordCentres: input < 0" -} ----------------------------------------------------------------------------- -- Showing palindromes and other text related functionality ----------------------------------------------------------------------------- showPalindrome :: (Show a) => Array Int a -> (Int,Int) -> String showPalindrome a (len,pos) = let startpos = pos `div` 2 - len `div` 2 endpos = if odd len then pos `div` 2 + len `div` 2 else pos `div` 2 + len `div` 2 - 1 in show [a!n|n <- [startpos .. endpos]] showTextPalindrome :: (Show a) => Array Int Int -> Array Int a -> (Int,Int) -> String showTextPalindrome positionArray inputArray (len,pos) = let startpos = pos `div` 2 - len `div` 2 endpos = if odd len then pos `div` 2 + len `div` 2 else pos `div` 2 + len `div` 2 - 1 (pfirst,plast) = bounds positionArray (ifirst,ilast) = bounds inputArray in if endpos < startpos then [] else let start = if startpos > pfirst then (positionArray!(startpos-1))+1 else ifirst end = if endpos < plast then (positionArray!(endpos+1))-1 else ilast in show [inputArray!n | n<- [start..end]] {- Using this code instead of the last else above shows text palindromes without all punctuation around it. Right now this punctuation is shown. else let start = positionArray!!!startpos end = positionArray!!!endpos -} -- For palindromes in strings, punctuation, spacing, and control characters -- are often ignored myIsLetter :: Char -> Bool myIsLetter c = (not $ isPunctuation c) && (not $ isControl c) && (not $ isSpace c) surroundedByPunctuation :: Int -> Int -> Array Int Char -> Bool surroundedByPunctuation begin end inputArray | begin > afirst && end < alast = not (myIsLetter (inputArray!(begin-1))) && not (myIsLetter (inputArray!(end+1))) | begin <= afirst && end < alast = not (myIsLetter (inputArray!(end+1))) | begin <= afirst && end >= alast = True | begin > afirst && end >= alast = not (myIsLetter (inputArray!(begin-1))) | otherwise = error "surroundedByPunctuation" where (afirst,alast) = bounds inputArray ----------------------------------------------------------------------------- -- Array utils ----------------------------------------------------------------------------- listArrayl0 :: [a] -> Array Int a listArrayl0 string = listArray (0,length string - 1) string -- (!!!) is a variant of (!), which prints out the problem in case of -- an index out of bounds. (!!!) :: Array Int a -> Int -> a a!!! n = -- trace (show n ++ " " ++ show (snd (bounds a))) $ if n >= fst (bounds a) && n <= snd (bounds a) then a!n else error (show (fst (bounds a)) ++ " " ++ show (snd (bounds a)) ++ " " ++ show n) {- -- Used for testing purposes. wpac = wordPalindromesAroundCentres lwp = longestWordPalindromes 0 s = "aaaab a" -- "what is non si, not?"-- "www www www www"-- "wwww w woow waw wwwwwww w" a = listArrayl0 s i = zip s [0..] t' = map (first toLower) (filter (myIsLetter.fst) i) t = listArrayl0 (map fst t') p::Array Int Int p = listArrayl0 (map snd t') sbp = surroundedByPunctuation te = sbp 2 2 a etw = extendTailWord et = etw t p a 0 (0,[0]) [] ret = reverse $ map (head . snd) $ et zret = zip ret [0..] mzret = maximumBy (\(w,_) (w',_) -> compare w w') zret -}