Advertisement
here2share

# regex_to_get_images_etc.py ^ Apr 25th

Apr 22nd, 2015
412
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.15 KB | None | 0 0
  1. # regex_to_get_images_etc.py
  2.  
  3. '''
  4. This should filter out from any valid domain, including localhost, jpg, jpeg, gif and png strings which might not be what you want, but it could be modified.
  5.  
  6. A more traditional regexp for this might look like the below:
  7.  
  8. ^http(?s)://(?:[a-z\-]+\.)+[a-z]{2,6}(?:/[^/#?]+)+\.(?:jpe?g|gif|png)$
  9.          |---------- domain ---------|--- path ---|--- extension ---|
  10.  
  11. even...
  12.  
  13. print re.findall(\S.+jpg|jpeg|JPG|JPEG', url)
  14. '''-
  15.  
  16. import re
  17.  
  18. '''
  19. Regex cookbook
  20. ^       - start of string
  21. [       - beginning of character group
  22. a-z     - any lowercase letter
  23. A-Z     - any uppercase letter
  24. 0-9     - any digit
  25. _       - underscore
  26. ]       - end of character group
  27. $       - end of string
  28. .       - Matches any character, except for line breaks if dotall is false.
  29. *       - Matches 0 or more of the preceding character.
  30. +       - Matches 1 or more of the preceding character.
  31. ?       - preceed character is optional. Matches 0 or 1 occurrence.
  32. \d      - Matches any single digit
  33. \w      - Matches any word character (alphanumeric & underscore).
  34. [XYZ]   - Matches any single character from the character class.
  35. [XYZ]+  - Matches one or more of any of the characters in the set.
  36. (|)     - one string or the other, either side of the pipe
  37. i       - matches upper or lowercase it doesn't matter
  38. All regexes below are my own thought out regular expressions, if you feel the need to use them then please by all means.
  39. '''
  40.  
  41. variable = [
  42.     '([a-zA-Z])+@([a-zA-Z0-9])+\.[a-zA-Z.]{2,5}', # email address check
  43.     '^(https?:\/\/)?(www.)?([\da-z\.-]+)\.([a-z\.]{2,6})\/?$', # check url
  44.     '^(AB|AC)\d{8,12}$', # Check if string starts with with AB or AC preceeded with 8-10 numbers for example AC12345678
  45.     '([a-zA-Z0-9])+(\.(?i)(gif|jpg|jpeg|tiff|png))+$', # does string contain image extentions
  46.     '^#([a-f0-9]{6}|[a-f0-9]{3})', # matches a colour hex value i.e. #a5a5a5 or #fff
  47.     '^((LD|AZ)\d{5})(\.)([a-zA-Z0-9]+\.[a-zA-Z.]{2,5})$', # Special
  48.     '^<([a-z]+)([^<]+)*(?:>(.*)<\/\1>|\s+\/>)$', # Matching an HTML Tag
  49.     '(.*[?=.*A-Z])([?=.*a-z])(?=.*\d).{6,15}$', # Username check
  50.     '(([A-Z]{2})\s?(\d{2})\s?([A-Z]){3})', # match english car registration
  51.     '(.*[?=.*A-Z])([?=.*a-z])(?=.*\d)(?=.*[^a-zA-Z0-9]).{6,15}$', # Special
  52.     '^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}?:\d{2,4})$', # IP Address Check
  53.     '^is_unique\[(.*)\]', # Get value between is_unique[]
  54.     '([<p>]*?{(\/)?(one_half|one_third|two_third|one_fourth|two_fourth|three_fourth|one_fifth|two_fifth|three_fifth|four_fifth|one_sixth|two_sixth|three_sixth|four_sixth|five_sixth)_?(last)?\}[<\/p>]*)' # column short code for example match {one_half}
  55. ]
  56.  
  57.  
  58. # # Checks if email address is valid
  59.  
  60. def do_check(regex, email):
  61.  
  62.     if not re.match(r'' + regex, email):
  63.  
  64.         print email + " does not match"
  65.  
  66.     else:
  67.  
  68.         print email + " matches"
  69.  
  70.  
  71. def replace_span_with_strong(string):
  72.  
  73.     output = re.sub(r'^(<(span)>)(.*)(</(span)>)$', r'<strong>\3</strong>', string)
  74.  
  75.     print output
  76.  
  77.  
  78. replace_span_with_strong('<span>Replace span with strong</span>')
  79.  
  80. # # Email address check
  81.  
  82. do_check(variable[0], 'simon@logicdesign.co.uk')
  83. do_check(variable[0], 'simon@logicdesign.com')
  84.  
  85. # # URL check
  86.  
  87. do_check(variable[1], 'https://www.google.co.uk/')
  88. do_check(variable[1], 'google.co.uk///')
  89.  
  90.  
  91. # # Check if string starts with with AB or AC preceeded with 8-10 numbers for example AC12345678
  92.  
  93. do_check(variable[2], 'AC12345678')
  94. do_check(variable[2], 'AC123456789999999')
  95.  
  96.  
  97. # # does string contain image extentions
  98.  
  99. do_check(variable[3], 'image.jpg')
  100. do_check(variable[3], 'image.JPG')
  101. do_check(variable[3], 'file.php')
  102.    
  103.  
  104. # # matches a colour hex value i.e. #a5a5a5 or #fff
  105.  
  106. do_check(variable[4], '#a5a5a5')
  107. do_check(variable[4], '#fff')
  108. do_check(variable[4], 'a7a7a7')
  109.  
  110.  
  111. # # does string contain 2 letters either LD or AZ at the start, followed by 5 numbers and a (.) then a domain name. For example LD89780.simonfletcher.co.uk
  112.  
  113. do_check(variable[5], 'LD89780.simonfletcher.co.uk')
  114. do_check(variable[5], 'LD89780.simonfletcher')
  115.  
  116.  
  117. # # Username must contain one uppercase and one lowercase charater and two digits
  118.  
  119. do_check(variable[7], 'Fletcher890')
  120. do_check(variable[7], 'Fletc0her')
  121.  
  122.  
  123. # # Match an generic english car registration
  124.  
  125. do_check(variable[8], 'AD 64 HGV')
  126. do_check(variable[8], 'AD64HGV')
  127. do_check(variable[8], '64ADHGV')
  128.  
  129.  
  130. # # Check password for one capital letter and one special character. The string can only be 6-15 characters long
  131.  
  132. do_check(variable[9], 'fLetch/er89')
  133. do_check(variable[9], 'fLetcher89')
  134.  
  135.  
  136. # # Matching an IP Address including optional port number
  137.  
  138. do_check(variable[10], '73.126.33.108:3000')
  139. do_check(variable[10], '73.126.33')
  140.  
  141. # # match the value within the "is_unique[]"
  142. do_check(variable[11], 'is_unique[user.email]')
  143. do_check(variable[11], 'is_unique[user.email')
  144.  
  145. # # match value between the {} and so you are able to change to the necessary HTML alternative"
  146. do_check(variable[12], '{one_half}')
  147. do_check(variable[12], '{/one_half}')
  148. do_check(variable[12], '{/one_half')
  149.  
  150. code = ''' re.findall(r"(?<!\d)\d\d(?!\d)", "abc123#d$45^&678>9xyz54321*25") '''
  151. print "to reveal all digits in a string that have the length of 2:"
  152. print ">>>", code
  153. print re.findall(r"(?<!\d)\d\d(?!\d)", "abc123#d$45^&678>9xyz54321*25")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement