Create  desumasuのWiki  Index  Search  Changes  RSS  wikifarm  Login

desumasuのWiki - Rubyの文字列操作関数をHaskellで定義する Diff

  • Added parts are displayed like this.
  • Deleted parts are displayed like this.

!はじめに
Rubyの文字列操作関数の一部をHaskellで実装してみました。

ここに掲載されているソースコードは[[cinnamon|http://sourceforge.jp/projects/cinnamon/]]に取り込まれております。ライセンスはcinnamonに従います。

バグ等ありましたら、ページ最後のコメントの方にコメント欄に書き込んでいただけると非常に助かります。

!文字列操作関数一覧
以下は文字列操作関数の一覧です。HUnitで記述したテストも実装コードと一緒に掲載しますので、関数を利用する際の参考にしてください。

!!String#capitalize
<<<
capitalize :: String -> String
capitalize []       = []
capitalize (x : xs) = toUpper x : map toLower xs
>>>
<<<
testCapitalize = test [
    "test1" ~: "Hello" ~=? capitalize "hello",
    "test2" ~: "Hello" ~=? capitalize "HELLO",
    "test3" ~: ",hello" ~=? capitalize ",Hello",
    "oneChar" ~: "H" ~=? capitalize "h",
    "empty" ~: "" ~=? capitalize ""
    ]
>>>

!!String#center
<<<
center :: String -> Int -> String
center str width
  | width <= strWidth = str
  | otherwise = replicate leftSp ' ' ++ str ++ replicate rightSp ' '
        where
            strWidth = length str
            leftSp = (width - strWidth) `div` 2
            rightSp = width - (leftSp + strWidth)
>>>
<<<
testCenter = test [
    "test1" ~: "  hoge  " ~=? center "hoge" 8,
    "test2" ~: "hoge" ~=? center "hoge" 1,
    "test3" ~: " hoge  " ~=? center "hoge" 7,
    "empty1" ~: "" ~=? center "" 0,
    "empty2" ~: "  " ~=? center "" 2
    ]
>>>

!!String#chomp
引数を指定した場合と指定しない場合と2種類を定義しました。

!!!chomp'
こちらは引数を指定した場合。
<<<
chomp' :: (Eq a) => [a] -> [a] -> [a]
chomp' str rs
  | end == rs = main
  | otherwise = str
        where (main, end) = splitAt (length str - length rs) str
>>>
<<<
testChomp' = test [
    "test1" ~: "hoge" ~=? chomp' "hoge\n" "\n",
    "test2" ~: "hoge" ~=? chomp' "hoge" "\n",
    "test3" ~: "hoge" ~=? chomp' "hoge\r\n" "\r\n",
    "empty1" ~: "" ~=? chomp' "\n" "\n",
    "empty2" ~: "" ~=? chomp' "" "\n"
    ]
>>>

!!!chomp
こちらは引数を指定しない場合。Rubyのchompの仕様とは少し違って、末尾の"\r", "\n", "\r\n"をすべて削除するようにしています。
<<<
chomp :: String -> String
chomp str
  | null str = str
  | isSuffixOf "\r\n" str = init $ init str
  | isSuffixOf "\n" str || isSuffixOf "\r" str = init str
  | otherwise = str
>>>
<<<
testChomp = test [
    "test1" ~: "hoge" ~=? chomp "hoge\n",
    "test2" ~: "hoge" ~=? chomp "hoge\r\n",
    "test3" ~: "hoge\n" ~=? chomp "hoge\n\r",
    "test4" ~: "hoge" ~=? chomp "hoge\r",
    "empty1" ~: "" ~=? chomp "",
    "empty2" ~: "" ~=? chomp "\r\n"
    ]
>>>

!!String#chop
<<<
chop :: String -> String
chop str
  | null str = str
  | isSuffixOf "\r\n" str = init $ init str
  | otherwise = init str
>>>
<<<
testChop = test [
    "test1" ~: "hoge" ~=? chop "hoge\n",
    "test2" ~: "hoge" ~=? chop "hoge\r",
    "test3" ~: "hoge" ~=? chop "hoge\r\n",
    "test4" ~: "hoge\n" ~=? chop "hoge\n\r",
    "test5" ~: "hog" ~=? chop "hoge",
    "empty" ~: "" ~=? chop ""
    ]
>>>

!!String#concat
あまり利用価値のない関数なのですが、一応。
<<<
rbConcat :: (Show a) => String -> a -> String
rbConcat = (. show) . (++)
>>>
テストは省略します。

!!String#count
省略します。

!!String#crypt
省略します。

!!String#delete
省略します。

!!String#downcase
<<<
downcase :: String -> String
downcase = map toLower
>>>
<<<
testDowncase = test [
    "test1" ~: "hoge" ~=? downcase "HOGE",
    "test2" ~: "hoge" ~=? downcase "Hoge",
    "empty1" ~: "" ~=? downcase ""
    ]
>>>

!!String#dump
<<<
dump :: String -> String
dump = foldr convert []
    where convert ch converted =
            if isPrint ch
                then ch : converted
                else showLitChar ch converted
>>>
<<<
testDump = test [
    "test1" ~: "hoge\\n" ~=? dump "hoge\n",
    "test2" ~: "hoge" ~=? dump "hoge",
    "test3" ~: "hoge\"" ~=? dump "hoge\"",
    "test4" ~: "hoge\\" ~=? dump "hoge\\",
    "empty1" ~: "" ~=? dump ""
    ]
>>>

!!String#each
この関数では、ここで定義するindexという関数も使用しています。
<<<
each :: (Monad m, Eq a) => [a] -> ([a] -> m b) -> [a] -> m [b]
each rs f str = mapM f (toLines rs str)

toLines :: (Eq a) => [a] -> [a] -> [[a]]
toLines rs str = case index str rs of
    Just idx -> h : if t == [] then [] else toLines rs t
        where (h, t) = splitAt (idx + length rs) str
    Nothing -> str : []
>>>
<<<
testToLines = test [
    "test1" ~: ["hoge\n", "huga\n"] ~=? toLines "\n" "hoge\nhuga\n",
    "test2" ~: ["hoge\n", "\n"] ~=? toLines "\n" "hoge\n\n",
    "test3" ~: ["hoge\r\n", "huga\r\n"] ~=? toLines "\r\n" "hoge\r\nhuga\r\n",
    "test4" ~: ["hoge\r\n", "huga"] ~=? toLines "\r\n" "hoge\r\nhuga",
    "empty1" ~: [""] ~=? toLines "\n" ""
    ]
>>>

!!String#hex
Rubyの定義がよく分からなかったので、微妙なところで動作が異なると思います。
<<<
hex :: String -> Int
hex str = filtrated $ filter (/= '_') str
    where
        filtrated str'@(strH : strT) = if strH == '-'
            then -1 * afterSign strT
            else afterSign str'
        filtrated _ = 0
        afterSign str' = if isPrefixOf "0x" str' || isPrefixOf "0X" str'
            then afterPrefix (drop 2 str') 0
            else afterPrefix str' 0
        afterPrefix (strH : strT) acc = if isHexDigit strH
            then afterPrefix strT $ digitToInt strH + acc * 16
            else acc
        afterPrefix [] acc = acc
>>>
<<<
testHex = test [
    "test1" ~: 16 ~=? hex "10",
    "test2" ~: 255 ~=? hex "ff",
    "test3" ~: 16 ~=? hex "0x10",
    "test4" ~: -16 ~=? hex "-0x10",
    "test5" ~: 0 ~=? hex "xyz",
    "test6" ~: 16 ~=? hex "10z",
    "test7" ~: 16 ~=? hex "1_0",
    "empty1" ~: 0 ~=? hex ""
    ]
>>>

!!String#include
この関数では、ここで定義するindexという関数も使用しています。
<<<
include :: (Eq a) => [a] -> [a] -> Bool
include str substr = index str substr /= Nothing
>>>
<<<
testInclude = test [
    "test1" ~: True ~=? include "hoge" "ge",
    "test2" ~: False ~=? include "hoge" "ha",
    "test3" ~: True ~=? include "hoge" "",
    "empty1" ~: True ~=? include "" "",
    "empty2" ~: False ~=? include "" "ho"
    ]
>>>

!!String#index
文字列を想定して書いたのですが、リストすべてに対して使えます。
<<<
index :: (Eq a, Num b) => [a] -> [a] -> Maybe b
index str substr
    | isPrefixOf substr str = Just 0
    | length str <= length substr = Nothing
    | otherwise = index xs substr >>= return . (+1)
    where _ : xs = str
>>>
<<<
testIndex = test [
    "test1" ~: Just 3 ~=? index "hoge" "e",
    "test2" ~: Just 2 ~=? index "hoge" "ge",
    "test3" ~: Nothing ~=? index "hoge" "ga",
    "test4" ~: Just 0 ~=? index "hoge" "",
    "empty1" ~: Just 0 ~=? index "" "",
    "empty2" ~: Nothing ~=? index "" "e"
    ]
>>>

!!String#intern
省略します。

!!String#length
length関数そのまま。

!!String#ljust
<<<
ljust :: String -> Int -> String
ljust str width = str ++ spaces
    where
        spaces = if strWidth < width
            then replicate (width - strWidth) ' '
            else []
        strWidth = length str
>>>
<<<
testLjust = test [
    "test1" ~: "hoge  " ~=? ljust "hoge" 6,
    "test2" ~: "hoge" ~=? ljust "hoge" 1,
    "empty1" ~: "  " ~=? ljust "" 2
    ]
>>>

!!String#oct
16進数のパースにはここで定義したhexを用いています。
実装が汚いのでそのうち修正します(笑
<<<
oct :: String -> Int
oct str = filtrated $ filter (/= '_') str
    where
        filtrated str'@(strH : strT) = if strH == '-'
            then -1 * afterSign strT
            else afterSign str'
        filtrated _ = 0
        afterSign str' = if isPrefixOf "0x" str' || isPrefixOf "0X" str'
            then hex str'
            else if isPrefixOf "0b" str' || isPrefixOf "0B" str'
            then bin' $ drop 2 str'
            else afterPrefix str' 0
        afterPrefix (strH : strT) acc
            | isOctDigit strH = afterPrefix strT $ digitToInt strH + acc * 8
            | otherwise = acc
        afterPrefix [] acc = acc

bin' :: String -> Int
bin' = flip bin'' 0
    where
        bin'' (strH : strT) acc
            | strH `elem` "01" = bin'' strT $ digitToInt strH + acc * 2
            | otherwise = acc
        bin'' _ acc = acc
>>>
<<<
testOct = test [
    "test1" ~: 8 ~=? oct "10",
    "test2" ~: 8 ~=? oct "010",
    "test3" ~: 0 ~=? oct "8",
    "test4" ~: 2 ~=? oct "0b10",
    "test6" ~: 16 ~=? oct "0x10",
    "test7" ~: 65 ~=? oct "1_0_1x",
    "empty1" ~: 0 ~=? oct ""
    ]
>>>

!!String#replace
省略します。

!!String#reverse
reverse関数そのまま。

!!String#rindex
ここで定義したindex関数を用いています。
<<<
rindex :: (Eq a) => [a] -> [a] -> Maybe Int
rindex str substr =
    do len <- index strRev substrRev
       return $ length str - len - length substr
    where
        substrRev = reverse substr
        strRev = reverse str
>>>
<<<
testRindex = test [
    "test1" ~: Just 1 ~=? rindex "hello" "e",
    "test2" ~: Just 3 ~=? rindex "hello" "l",
    "test3" ~: Nothing ~=? rindex "hello" "a",
    "test4" ~: Just 4 ~=? rindex "hogehoge" "ho",
    "test5" ~: Just 4 ~=? rindex "hogeee" "ee",
    "test6" ~: Just 4 ~=? rindex "hoge" "",
    "empty1" ~: Nothing ~=? rindex "" "ho",
    "empty2" ~: Just 0 ~=? rindex "" ""
    ]
>>>

!!String#rjust
<<<
rjust :: String -> Int -> String
rjust str width = spaces ++ str
    where
        spaces = if strWidth < width
            then replicate (width - strWidth) ' '
            else []
        strWidth = length str
>>>
<<<
testRjust = test [
    "test1" ~: "  hoge" ~=? rjust "hoge" 6,
    "test2" ~: "hoge" ~=? rjust "hoge" 1,
    "empty1" ~: "  " ~=? rjust "" 2
    ]
>>>

!!String#scan
省略します。

!!String#slice
引数によって異なった動作をするので、複数の関数に分割しました。

!!!sliceFromTo
<<<
sliceFromTo :: Int -> Int -> [a] -> [a]
sliceFromTo from to str
    | from' <= to' + 1 && 0 <= from' = drop from' $ take (to' + 1) str
    | otherwise = error ""
    where
        to' = if to < 0 then length str + to else to
        from' = if from < 0 then length str + from else from
>>>
<<<
testSliceFromTo = test [
    "test1" ~: "ell" ~=? sliceFromTo 1 3 "hello",
    "test2" ~: "e" ~=? sliceFromTo 1 1 "hello",
    "test3" ~: "lo" ~=? sliceFromTo 3 5 "hello",
    "test4" ~: "llo" ~=? sliceFromTo (-3) (-1) "hello",
    "test5" ~: "" ~=? sliceFromTo 1 0 "hello",
    "test6" ~: "" ~=? sliceFromTo 0 (-1) ""
    ]
>>>

!!!sliceFromLen
<<<
sliceFromLen :: Int -> Int -> [a] -> [a]
sliceFromLen from len = sliceFromTo from (from + len - 1)
>>>
<<<
testSliceFromLen = test [
    "test1" ~: "ell" ~=? sliceFromLen 1 3 "hello",
    "test2" ~: "e" ~=? sliceFromLen 1 1 "hello",
    "test3" ~: "llo" ~=? sliceFromLen (-3) 3 "hello",
    "test4" ~: "lo" ~=? sliceFromLen 3 3 "hello",
    "test5" ~: "" ~=? sliceFromLen 3 0 "hello",
    "empty1" ~: "" ~=? sliceFromLen 0 0 ""
    ]
>>>

!!String#split
省略します。

!!String#squeeze
省略します。

!!String#strip
<<<
strip :: String -> String
strip = reverse . (dropWhile isSpace) . reverse . (dropWhile isSpace)
>>>
<<<
testStrip = test [
    "test1" ~: "hello" ~=? strip "\r hello \n",
    "test2" ~: "hello" ~=? strip "hello ",
    "test3" ~: "hello" ~=? strip " hello",
    "test4" ~: "hello" ~=? strip "hello",
    "empty1" ~: "" ~=? strip ""
    ]
>>>

!!String#sub
省略します。

!!String#succ
Rubyのsuccでは、文字列中にアルファベットや数字が含まれていなかった場合、単に文字コードを一つずつ増分するような仕様になっているが、その機能は実装していない。
<<<
succStr :: String -> String
succStr str
    | carry =
        let
            addNewFig str@(x : xs)
                | isDigit x = '1' : str
                | isLower x = 'a' : str
                | isUpper x = 'A' : str
                | otherwise = x : addNewFig xs
            addNewFig [] = []
        in addNewFig resStr
    | otherwise = resStr
    where
        (carry, resStr) = succStr' str
        succStr' (x : xs)
            | carry = (carrying x, next x : resStr)
            | otherwise = (False, x : resStr)
            where
                carrying x
                    | isDigit x = x == '9'
                    | isLower x = x == 'z'
                    | isUpper x = x == 'Z'
                    | otherwise = carry
                next x
                    | isDigit x = if x == '9' then '0' else succ x
                    | isLower x = if x == 'z' then 'a' else succ x
                    | isUpper x = if x == 'Z' then 'A' else succ x
                    | otherwise = x
                (carry, resStr) = succStr' xs
        succStr' [] = (True, [])
>>>
<<<
testSuccStr = test [
    "test1" ~: "1" ~=? succStr "0",
    "test2" ~: "ab" ~=? succStr "aa",
    "test3" ~: "b0" ~=? succStr "a9",
    "test4" ~: "Ba" ~=? succStr "Az",
    "test5" ~: "10" ~=? succStr "9",
    "test6" ~: "-10" ~=? succStr "-9",
    "test7" ~: "aaa" ~=? succStr "zz",
    "test8" ~: "100" ~=? succStr "99",
    "test9" ~: "2.0.0" ~=? succStr "1.9.9",
    "empty1" ~: "" ~=? succStr ""
    ]
>>>

!!String#sum
省略します。

!!String#swapcase
<<<
swapcase :: String -> String
swapcase = map $ \c -> if isLower c then toUpper c else toLower c
>>>
<<<
testSwapcase = test [
    "test1" ~: "hELLO_" ~=? swapcase "Hello_",
    "empty1" ~: "" ~=? swapcase ""
    ]
>>>

!!String#tr
省略します。

!!String#unpack
省略します。

!!String#upcase
<<<
upcase :: String -> String
upcase = map toUpper
>>>
<<<
testUpcase = test [
    "test1" ~: "HOGE" ~=? upcase "Hoge",
    "empty1" ~: "" ~=? downcase ""
    ]
>>>

!!String#upto
<<<
upto :: (Monad m) => String -> String -> (String -> m a) -> m ()
upto from to f
    | from == to = f from >> return ()
    | otherwise = f from >> upto (succStr from) to f >> return ()
>>>
テストは省略します。

{{comment}}