Advertisement
banovski

Transliteration

Jan 22nd, 2025 (edited)
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Haskell 1.69 KB | Source Code | 0 0
  1. -- The utility reads characters from stdin, removes the characters
  2. -- that are not needed, replaces spaces with underscores and replaces
  3. -- Cyrillic letters with the corresponding Latin letters or their
  4. -- combinations. Then it sends the result to stdout.
  5.  
  6. import Data.Char (toLower, ord)
  7. import System.Exit (die)
  8. import qualified Data.Set as Set
  9.  
  10. main :: IO ()
  11. main = do
  12.   input <- getContents
  13.   if null input
  14.     then die "No input!"
  15.     else
  16.         mapM_ (putStrLn .
  17.                cyrillicToLatin .
  18.                spacesToUnderscores .
  19.                extraCharsToHyphens .
  20.                stringToLower) $
  21.           lines input
  22.  
  23. stringToLower :: String -> String
  24. stringToLower = map toLower
  25.  
  26. requiredChars :: String
  27. requiredChars = ' ' : ['A' .. 'Z'] ++ ['a' .. 'z'] ++ ['А' .. 'Ё'] ++ ['а' .. 'ё']
  28.  
  29. cyrillicReplacements :: [String]
  30. cyrillicReplacements = ["a", "b", "v", "g", "d", "ye", "zh", "z", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "f", "h", "ts", "ch", "sh", "shch", "-", "y", "-", "e", "yu", "ya", "-", "yo"]
  31.  
  32. requiredCharsSet :: Set.Set Char
  33. requiredCharsSet = Set.fromList requiredChars
  34.  
  35. extraCharsToHyphens :: String -> String
  36. extraCharsToHyphens = map (\x -> if Set.member x requiredCharsSet then x else '-')
  37.  
  38. spacesToUnderscores :: String -> String
  39. spacesToUnderscores [] = []
  40. spacesToUnderscores (x:xs)
  41.   | x == ' ' = '_' : spacesToUnderscores xs
  42.   | otherwise = x : spacesToUnderscores xs
  43.  
  44.  
  45. cyrillicToLatin :: String -> String
  46. cyrillicToLatin [] = []
  47. cyrillicToLatin (x:xs) = aux x ++ cyrillicToLatin xs
  48.   where
  49.     aux char
  50.       | char `elem` ['а' .. 'ё'] = cyrillicReplacements !! (ord char - 1072)
  51.       | otherwise = [char]
  52.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement