----------------------------------------------------------------------------- -- -- Module : Data.Algorithms.Palindromes.Palindromes -- Copyright : (c) 2007 - 2012 Johan Jeuring -- License : BSD3 -- -- Maintainer : johan@jeuring.net -- Stability : experimental -- Portability : portable -- ----------------------------------------------------------------------------- module Data.Algorithms.Palindromes.Palindromes (longestPalindrome ,maximalPalindromes ,maximalPalindromesLengthAtLeast ,maximalPalindromesLengthBetween ,maximalPalindromesLengthAtMost ,palindromesLengthExact ,lengthLongestPalindrome ,lengthMaximalPalindromes ,longestTextPalindrome ,maximalTextPalindromesLengthAtLeast ,longestWordPalindrome ,maximalWordPalindromesLengthAtLeast ,palindromesAroundCentres ,myIsLetterC ) where import Data.List (maximumBy,intercalate) import Data.Word import Data.Char (toLower,isPunctuation,isSpace,isControl) import Data.Array (Array(),bounds,listArray,(!)) import qualified Data.ByteString as B import Data.ByteString.Internal ----------------------------------------------------------------------------- -- longestPalindrome ----------------------------------------------------------------------------- -- | longestPalindrome returns the longest palindrome in a string. longestPalindrome :: B.ByteString -> String longestPalindrome input = let (maxLength,pos) = maximumBy (\(l,_) (l',_) -> compare l l') (zip (palindromesAroundCentres input) [0..]) in showPalindrome input (maxLength,pos) ----------------------------------------------------------------------------- -- maximalPalindromes ----------------------------------------------------------------------------- -- | maximalPalindromes returns the maximal palindrome around each position -- in a string. maximalPalindromes :: B.ByteString -> String maximalPalindromes input = intercalate "\n" $ map (showPalindrome input) $ zip (palindromesAroundCentres input) [0..] ----------------------------------------------------------------------------- -- maximalPalindromesLengthAtLeast ----------------------------------------------------------------------------- -- | maximalPalindromesLengthAtLeast returns the longest palindrome around -- each position in a string. The integer argument is used to only show -- palindromes of length at least this integer. maximalPalindromesLengthAtLeast :: Int -> B.ByteString -> String maximalPalindromesLengthAtLeast m input = intercalate "\n" $ map (showPalindrome input) $ filter ((m<=) . fst) $ zip (palindromesAroundCentres input) [0..] ----------------------------------------------------------------------------- -- maximalPalindromesLengthBetween ----------------------------------------------------------------------------- -- | maximalPalindromesLengthBetween returns the longest palindrome around each -- position in a string. The integer arguments are used to only show palindromes -- of length in between the specified lengths. maximalPalindromesLengthBetween :: Int -> Int -> B.ByteString -> String maximalPalindromesLengthBetween m n input = intercalate "\n" $ map (showPalindrome input) $ filter (\(pl,_) -> pl >= m && pl <= n) $ zip (palindromesAroundCentres input) [0..] ----------------------------------------------------------------------------- -- maximalPalindromesLengthAtMost ----------------------------------------------------------------------------- -- | maximalPalindromesLengthAtMost returns the longest palindrome around each -- position in a string. The integer arguments are used to only show palindromes -- of length in between the specified lengths. maximalPalindromesLengthAtMost :: Int -> B.ByteString -> String maximalPalindromesLengthAtMost m input = intercalate "\n" $ map (showPalindrome input) $ filter ((<=m) . fst) $ zip (palindromesAroundCentres input) [0..] ----------------------------------------------------------------------------- -- palindromesLengthExact ----------------------------------------------------------------------------- -- | palindromesLengthExact returns the longest palindrome around each -- position in a string. The integer arguments are used to only show palindromes -- of length in between the specified lengths. palindromesLengthExact :: Int -> B.ByteString -> String palindromesLengthExact m input = intercalate "\n" $ map (showPalindrome input . \(_,p) -> (m,p)) $ filter (\(l,_) -> m<=l && (odd l == odd m)) $ zip (palindromesAroundCentres input) [0..] ----------------------------------------------------------------------------- -- lengthLongestPalindrome ----------------------------------------------------------------------------- -- | lengthLongestPalindrome returns the length of the longest palindrome in -- a string. lengthLongestPalindrome :: B.ByteString -> String lengthLongestPalindrome = show . maximum . palindromesAroundCentres ----------------------------------------------------------------------------- -- lengthLongestPalindromes ----------------------------------------------------------------------------- -- | lengthMaximalPalindromes returns the lengths of the longest palindrome -- around each position in a string. lengthMaximalPalindromes :: B.ByteString -> String lengthMaximalPalindromes = show . palindromesAroundCentres ----------------------------------------------------------------------------- -- longestTextPalindrome ----------------------------------------------------------------------------- -- | longestTextPalindrome returns the longest text palindrome in a string, -- ignoring spacing, punctuation symbols, and case of letters. longestTextPalindrome :: B.ByteString -> String longestTextPalindrome input = let textInput = B.map myToLower (B.filter myIsLetterW input) positionTextInput = listArrayl0 (B.findIndices myIsLetterW input) in longestTextPalindromeBS input textInput positionTextInput longestTextPalindromeBS :: B.ByteString -> B.ByteString -> Array Int Int -> String longestTextPalindromeBS input textInput positionTextInput = let (len,pos) = maximumBy (\(l,_) (l',_) -> compare l l') (zip (palindromesAroundCentres textInput) [0..]) in showTextPalindrome input positionTextInput (len,pos) ----------------------------------------------------------------------------- -- longestTextPalindromes ----------------------------------------------------------------------------- -- | longestTextPalindromes returns the longest text palindrome around each -- position in a string. The integer argument is used to only show palindromes -- of length at least this integer. maximalTextPalindromesLengthAtLeast :: Int -> B.ByteString -> String maximalTextPalindromesLengthAtLeast m input = let textInput = B.map myToLower (B.filter myIsLetterW input) positionTextInput = listArrayl0 (B.findIndices myIsLetterW input) in intercalate "\n" $ maximalTextPalindromesLengthAtLeastBS m textInput positionTextInput input maximalTextPalindromesLengthAtLeastBS :: Int -> B.ByteString -> Array Int Int -> B.ByteString -> [String] maximalTextPalindromesLengthAtLeastBS m textInput positionTextInput input = map (showTextPalindrome input positionTextInput) $ filter ((m<=) . fst) $ zip (palindromesAroundCentres textInput) [0..] ----------------------------------------------------------------------------- -- palindromesAroundCentres -- -- The function that implements the palindrome finding algorithm. -- Used in all the above interface functions. ----------------------------------------------------------------------------- -- | palindromesAroundCentres is the central function of the module. It returns -- the list of lenghths of the longest palindrome around each position in a -- string. palindromesAroundCentres :: B.ByteString -> [Int] palindromesAroundCentres input = reverse $ extendPalindrome input 0 0 [] extendPalindrome :: B.ByteString -> Int -> Int -> [Int] -> [Int] extendPalindrome input rightmost currentPalindrome currentMaximalPalindromes | rightmost > last -- reached the end of the array = finalPalindromes currentPalindrome currentMaximalPalindromes (currentPalindrome:currentMaximalPalindromes) | rightmost-currentPalindrome == first || B.index input rightmost /= B.index input (rightmost-currentPalindrome-1) -- the current palindrome extends to the start of the array, or -- it cannot be extended = moveCenter input rightmost (currentPalindrome:currentMaximalPalindromes) currentMaximalPalindromes currentPalindrome | otherwise -- the current palindrome can be extended = extendPalindrome input (rightmost+1) (currentPalindrome+2) currentMaximalPalindromes where first = 0 last = B.length input - 1 moveCenter :: B.ByteString -> Int -> [Int] -> [Int] -> Int -> [Int] moveCenter input rightmost currentMaximalPalindromes previousMaximalPalindromes centreDistance | centreDistance == 0 -- the last centre is on the last element: try to extend the tail of length 1 = extendPalindrome input (rightmost+1) 1 currentMaximalPalindromes | centreDistance-1 == head previousMaximalPalindromes -- the previous element in the centre list reaches exactly to the end of the last -- tail palindrome use the mirror property of palindromes to find the longest tail palindrome = extendPalindrome input rightmost (head previousMaximalPalindromes) currentMaximalPalindromes | otherwise -- move the centres one step add the length of the longest palindrome to the centres = moveCenter input rightmost (min (head previousMaximalPalindromes) (centreDistance-1):currentMaximalPalindromes) (tail previousMaximalPalindromes) (centreDistance-1) finalPalindromes :: Int -> [Int] -> [Int] -> [Int] finalPalindromes nrOfCenters previousMaximalPalindromes currentMaximalPalindromes | nrOfCenters == 0 = currentMaximalPalindromes | nrOfCenters > 0 = finalPalindromes (nrOfCenters-1) (tail previousMaximalPalindromes) (min (head previousMaximalPalindromes) (nrOfCenters-1):currentMaximalPalindromes) | otherwise = error "finalCentres: input < 0" ----------------------------------------------------------------------------- -- longestWordPalindromes ----------------------------------------------------------------------------- -- | longestWordPalindromes returns the longest word palindrome around each -- position in a string. The integer argument is used to only show -- palindromes of length at least this integer. maximalWordPalindromesLengthAtLeast :: Int -> B.ByteString -> String maximalWordPalindromesLengthAtLeast m input = let textInput = B.map myToLower (B.filter myIsLetterW input) positionTextInput = listArrayl0 (B.findIndices myIsLetterW input) in intercalate "\n" $ maximalWordPalindromesLengthAtLeastBS m input textInput positionTextInput maximalWordPalindromesLengthAtLeastBS :: Int -> B.ByteString -> B.ByteString -> Array Int Int -> [String] maximalWordPalindromesLengthAtLeastBS m input textInput positionTextInput = map (showTextPalindrome input positionTextInput) $ filter ((m<=) . fst) $ zip (wordPalindromesAroundCentres input textInput positionTextInput) [0..] -- | longestWordPalindrome returns the longest text palindrome preceded and -- followed by non-letter symbols (if any). longestWordPalindrome :: B.ByteString -> String longestWordPalindrome input = let textInput = B.map myToLower (B.filter myIsLetterW input) positionTextInput = listArrayl0 (B.findIndices myIsLetterW input) (len,pos) = maximumBy (\(w,_) (w',_) -> compare w w') (zip (wordPalindromesAroundCentres input textInput positionTextInput) [0..]) in showTextPalindrome input positionTextInput (len,pos) ----------------------------------------------------------------------------- -- wordPalindromesAroundCentres -- -- This is the function palindromesAroundCentres, extended with the longest -- word palindromes around each centre. ----------------------------------------------------------------------------- -- | wordPalindromesAroundCentres returns the same lengths of palindromes as -- palindromesAroundCentres, but at the same time also the length of the -- longest word palindromes around the centres. wordPalindromesAroundCentres :: B.ByteString -> B.ByteString -> Array Int Int -> [Int] wordPalindromesAroundCentres input textInput positionTextInput = let tfirst = 0 in reverse $ map (head . snd) $ extendTailWord input textInput positionTextInput [] tfirst (0,[0]) -- extendTailWordold textInput positionTextInput input n current centres = extendTailWord input textInput positionTextInput centres n current extendTailWord :: B.ByteString -> B.ByteString -> Array Int Int -> [(Int,[Int])] -> Int -> (Int,[Int]) -> [(Int,[Int])] extendTailWord input textInput positionTextInput centres n current@(currentTail,currentTailWords) | n > alast = -- reached the end of the text input array finalWordCentres input textInput positionTextInput (current:centres) currentTail centres (1+length centres) | n-currentTail == afirst = -- the current longest tail palindrome extends to the start of the text input array extendWordCentres input textInput positionTextInput (current:centres) n centres currentTail | B.index textInput n == B.index textInput (n-currentTail-1) = -- the current longest tail palindrome can be extended -- check whether or not the extended palindrome is a wordpalindrome if surroundedByPunctuation (positionTextInput!(n-currentTail-1)) (positionTextInput!n) input then extendTailWord input textInput positionTextInput centres (n+1) (currentTail+2,currentTail+2:currentTailWords) else extendTailWord input textInput positionTextInput centres (n+1) (currentTail+2,currentTailWords) | otherwise = -- the current longest tail palindrome cannot be extended extendWordCentres input textInput positionTextInput (current:centres) n centres currentTail where (afirst,alast) = (0,B.length textInput -1) extendWordCentres :: B.ByteString -> B.ByteString -> Array Int Int -> [(Int,[Int])] -> Int -> [(Int,[Int])] -> Int -> [(Int,[Int])] extendWordCentres input textInput positionTextInput centres n tcentres centreDistance | centreDistance == 0 = -- the last centre is on the last element: -- try to extend the tail of length 1 if surroundedByPunctuation (positionTextInput!n) (positionTextInput!n) input then extendTailWord input textInput positionTextInput centres (n+1) (1,[1,0]) else extendTailWord input textInput positionTextInput centres (n+1) (1,[0]) | centreDistance-1 == fst (head tcentres) = -- the previous element in the centre list -- reaches exactly to the end of the last -- tail palindrome use the mirror property -- of palindromes to find the longest tail -- palindrome let (currentTail,oldWord:oldWords) = head tcentres in if surroundedByPunctuation (positionTextInput!(n-currentTail)) (positionTextInput!(n-1)) input then if oldWord == currentTail then extendTailWord input textInput positionTextInput centres n (head tcentres) else extendTailWord input textInput positionTextInput centres n (currentTail,currentTail:oldWord:oldWords) else if oldWord == currentTail && oldWord > 0 then extendTailWord input textInput positionTextInput centres n (currentTail, tail (snd (head tcentres))) else extendTailWord input textInput positionTextInput centres n (head tcentres) | otherwise = -- move the centres one step -- add the length of the longest palindrome -- to the centres let newTail = min (fst (head tcentres)) (centreDistance-1) oldWord = head (snd (head tcentres)) newWords | oldWord < newTail = if surroundedByPunctuation (positionTextInput!(n-newTail+1)) (positionTextInput!n) input then newTail:snd (head tcentres) else snd (head tcentres) | null (tail (snd (head tcentres))) = snd (head tcentres) | otherwise = tail (snd (head tcentres)) in extendWordCentres input textInput positionTextInput ((newTail,newWords):centres) n (tail tcentres) (centreDistance-1) finalWordCentres :: B.ByteString -> B.ByteString -> Array Int Int -> [(Int,[Int])] -> Int -> [(Int,[Int])] -> Int -> [(Int,[Int])] finalWordCentres input textInput positionTextInput centres n tcentres mirrorPoint | n == 0 = centres | n > 0 = let tlast = B.length textInput - 1 (oldTail,oldWord:oldWords) = head tcentres newTail = min oldTail (n-1) newWord = min oldWord (n-1) tailFirstMirror = min tlast (div (mirrorPoint - newTail) 2) tailLastMirror = min tlast (if odd newTail then div (mirrorPoint + newTail) 2 else div (mirrorPoint + newTail) 2 - 1) wordFirstMirror = min tlast (div (mirrorPoint - newWord) 2) wordLastMirror = min tlast (if odd newWord then div (mirrorPoint + newTail) 2 else div (mirrorPoint + newTail) 2 - 1) newWords | surroundedByPunctuation (positionTextInput!tailFirstMirror) (positionTextInput!tailLastMirror) input = if newWord == newTail then newTail:oldWords else newTail:oldWord:oldWords | surroundedByPunctuation (positionTextInput!wordFirstMirror) (positionTextInput!wordLastMirror) input || null oldWords = newWord:oldWords | otherwise = oldWords in finalWordCentres input textInput positionTextInput ((newTail,newWords):centres) (n-1) (tail tcentres) (mirrorPoint+1) | otherwise = error "finalWordCentres: input < 0" ----------------------------------------------------------------------------- -- Showing palindromes and other text related functionality ----------------------------------------------------------------------------- showPalindrome :: B.ByteString -> (Int,Int) -> String showPalindrome input (len,pos) = let startpos = pos `div` 2 - len `div` 2 in show $ B.take len $ B.drop startpos input showTextPalindrome :: B.ByteString -> Array Int Int -> (Int,Int) -> String showTextPalindrome input positionTextInput (len,pos) = let startpos = pos `div` 2 - len `div` 2 endpos = if odd len then pos `div` 2 + len `div` 2 else pos `div` 2 + len `div` 2 - 1 (pfirst,plast) = bounds positionTextInput (ifirst,ilast) = (0,1 + B.length input) in if endpos < startpos then [] else let start = if startpos > pfirst then (positionTextInput!(startpos-1))+1 else ifirst end = if endpos < plast then (positionTextInput!(endpos+1))-1 else ilast in show (B.take (end-start+1) (B.drop start input)) {- Using this code instead of the last else above shows text palindromes without all punctuation around it. Right now this punctuation is shown. else let start = positionArray!!!startpos end = positionArray!!!endpos -} -- For palindromes in strings, punctuation, spacing, and control characters -- are often ignored myIsLetterW :: Word8 -> Bool myIsLetterW c' = not (isPunctuation c) && not (isControl c) && not (isSpace c) where c = w2c c' myIsLetterC :: Char -> Bool myIsLetterC c = not (isPunctuation c) && not (isControl c) && not (isSpace c) myToLower :: Word8 -> Word8 myToLower = c2w . toLower . w2c surroundedByPunctuation :: Int -> Int -> B.ByteString -> Bool surroundedByPunctuation begin end input | begin > afirst && end < alast = not (myIsLetterW (B.index input (begin-1))) && not (myIsLetterW (B.index input (end+1))) | begin <= afirst && end < alast = not (myIsLetterW (B.index input (end+1))) | begin <= afirst && end >= alast = True | begin > afirst && end >= alast = not (myIsLetterW (B.index input (begin-1))) | otherwise = error "surroundedByPunctuation" where (afirst,alast) = (0,B.length input - 1) ----------------------------------------------------------------------------- -- Array utils ----------------------------------------------------------------------------- listArrayl0 :: [a] -> Array Int a listArrayl0 string = listArray (0,length string - 1) string