Advertisement
treyhunner

regex exercise answers

May 19th, 2016
445
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.88 KB | None | 0 0
  1. import re
  2.  
  3.  
  4. # Decimal Numbers
  5.  
  6. def is_number(num_string):
  7.     return bool(re.search(r'^[-+]?(\*\.?\d+|\d+\.)$', num_string))
  8.  
  9.  
  10. # Hex Colors
  11.  
  12. def is_hex_color(string):
  13.     return bool(re.search(r'^#([\da-f]{3}){1,2}$', string, re.IGNORECASE))
  14.  
  15. def is_hex_color(string):
  16.     return bool(re.search(r'^#[\da-f]{3}([\da-f]{3})?$', string, re.IGNORECASE))
  17.  
  18. def is_hex_color(string):
  19.     return bool(re.search(r'^#([\da-f]{3}|[\da-f]{6})$', string, re.IGNORECASE))
  20.  
  21.  
  22. # Palindromes
  23.  
  24. [m.group(0) for m in re.finditer(r'\b(.)(.).\2\1\b', dictionary)]
  25.  
  26.  
  27. # Double Double
  28.  
  29. [m.group(0) for m in re.finditer(r'\b.*(.)\1.\1\1.*\b', dictionary)]
  30.  
  31.  
  32. # Repetitive Words
  33.  
  34. [m.group(0) for m in re.finditer(r'\b(.)(.)\1\2\b', dictionary)]
  35.  
  36.  
  37. # Get File Extension
  38.  
  39. # Works with examples given
  40.  
  41. def get_extension(filename):
  42.     return re.search(r'([^.]*)$', filename).group()
  43.  
  44. # Works with no extension
  45.  
  46. def get_extension(filename):
  47.     match = re.search(r'\.([^.]*)$', filename)
  48.     return match.group(1) if match else ''
  49.  
  50. # Works with only word-based extensions (try ``a.b/c``)
  51.  
  52. def get_extension(filename):
  53.     match = re.search(r'\.(?!.*\W)([^.]*)$', filename)
  54.     return match.group(1) if match else ''
  55.  
  56.  
  57. # Normalize JPEG Extension
  58.  
  59. def normalize_jpeg(filename):
  60.     return re.sub(r'\.jpe?g$', r'.jpg', filename, flags=re.IGNORECASE)
  61.  
  62.  
  63. # Normalize Whitespace
  64.  
  65. def normalize_whitespace(string):
  66.     return re.sub(r'\s+', r' ', string)
  67.  
  68.  
  69. # Compress blank links
  70.  
  71. def compress_blank_lines(string, max_blanks):
  72.     regex = r'\n{{{n},}}'.format(n=max_blanks)
  73.     return re.sub(regex, '\n' * max_blanks, string)
  74.  
  75.  
  76. # Normalize URL
  77.  
  78. def normalize_domain(url):
  79.     return re.sub(r'^https?://(www.)?treyhunner.com', r'https://treyhunner.com', url)
  80.  
  81.  
  82. # Linebreaks
  83.  
  84. def convert_linebreaks(string):
  85.     string = re.sub(r'\n{2,}', '</p><p>', string)
  86.     string = re.sub(r'\n', '<br>', string)
  87.     return '<p>{}</p>'.format(string)
  88.  
  89. def convert_linebreaks(string):
  90.     return ''.join(
  91.         '<p>{}</p>'.format(p)
  92.         for p in re.split(r'\n{2,}', string)
  93.     ).replace('\n', '<br>')
  94.  
  95.  
  96. # All Vowels
  97.  
  98. re.findall(r'\b(?=.*a)(?=.*e)(?=.*i)(?=.*o)(?=.*u).{1,9}\b', dictionary)
  99.  
  100.  
  101. # Unique Letters
  102.  
  103. [m.group(0) for m in re.finditer(r'\b(?!.*(.).*\1).{10}\b', dictionary)]
  104.  
  105.  
  106. # HTML Encode Ampersands
  107.  
  108. def encode_ampersands(string):
  109.     return re.sub(r'&(?![#\w]+;)', '&amp;', string)
  110.  
  111.  
  112. # Broken Markdown Links
  113.  
  114. # With verbose regular expression
  115.  
  116. def find_broken_links(string):
  117.     BROKEN_LINKS_RE = re.compile(r'''
  118.        \[ (?P<text> .*?) \]
  119.        \[ (?P<ref> .+?) \]
  120.        (?!
  121.            [\s\S]+
  122.            \[ (?P=ref) \]: \s+
  123.        )
  124.    ''', re.VERBOSE | re.IGNORECASE)
  125.     return [
  126.         (m.group('text'), m.group('ref'))
  127.         for m in BROKEN_LINKS_RE.finditer(string)
  128.     ]
  129.  
  130. # Supporting implicit link names
  131.  
  132. BROKEN_RE1 = re.compile(r'''
  133.    \[ (?P<text> .*?) \]
  134.    \[ (?P<ref> .+?) \]
  135.    (?!
  136.        [\s\S]+
  137.        \[ (?P=ref) \]: \s+
  138.    )
  139. ''', re.VERBOSE | re.IGNORECASE)
  140. BROKEN_RE2 = re.compile(r'''
  141.    \[ (?P<ref> (?P<text> .+?)) \]
  142.    \[ \]
  143.    (?!
  144.        [\s\S]+
  145.        \[ (?P=text) \]: \s+
  146.    )
  147. ''', re.VERBOSE | re.IGNORECASE)
  148.  
  149. def find_broken_links(string):
  150.     return [
  151.         (m.group('text'), m.group('ref'))
  152.         for regex in (BROKEN_RE1, BROKEN_RE2)
  153.         for m in regex.finditer(string)
  154.     ]
  155.  
  156.  
  157. # Camel Case to Underscore
  158.  
  159. # This acts strangely with ``HTTPResponse``
  160.  
  161. def camel_to_underscore(string):
  162.     return re.sub(r'(.)([A-Z])', r'\1_\2', string).lower()
  163.  
  164. # This turns ``HTTPResponse`` into ``httpresponse``
  165.  
  166. def camel_to_underscore(string):
  167.     return re.sub(r'(?<=[a-z])([A-Z])', r'_\1', string).lower()
  168.  
  169. # This turns ``HTTPResponse`` into ``http_response``
  170.  
  171. def camel_to_underscore(string):
  172.     return re.sub(r'(?<=.)([A-Z])(?=[^A-Z])', r'_\1', string).lower()
  173.  
  174.  
  175. # Get Inline Markdown Links
  176.  
  177. INLINE_RE = re.compile(r'''
  178.    \[ (?P<text> .*?) \]
  179.    \( (?P<url> .+?) \)
  180. ''', re.VERBOSE)
  181.  
  182.  
  183. def get_inline_links(string):
  184.     return [
  185.         (m.group('text'), m.group('url'))
  186.         for m in INLINE_RE.finditer(string)
  187.     ]
  188.  
  189.  
  190. # Get All Markdown Links
  191.  
  192. REF1_RE = re.compile(r'''
  193.    \[ (?P<text> .*?) \]
  194.    \[ (?P<ref> .+?) \]
  195.    (?=
  196.        [\s\S]+
  197.        \[ (?P=ref) \]: \s+
  198.        (?P<url> .+)
  199.    )
  200. ''', re.VERBOSE | re.IGNORECASE)
  201. REF2_RE = re.compile(r'''
  202.    \[ (?P<text> .*?) \]
  203.    \[\]
  204.    (?=
  205.        [\s\S]+
  206.        \[ (?P=text) \]: \s+
  207.        (?P<url> .+)
  208.    )
  209. ''', re.VERBOSE | re.IGNORECASE)
  210. INLINE_RE = re.compile(r'''
  211.    \[ (?P<text> .*?) \]
  212.    \( (?P<url> .+?) \)
  213. ''', re.VERBOSE)
  214.  
  215.  
  216. def get_markdown_links(string):
  217.     results = (
  218.         r.finditer(string)
  219.         for r in (INLINE_RE, REF1_RE, REF2_RE)
  220.     )
  221.     return [
  222.         (m.group('text'), m.group('url'))
  223.         for matches in results
  224.         for m in matches
  225.     ]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement