happy-barney

perl regular expressions

May 4th, 2020
356
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.74 KB | None | 0 0
  1. use Test::More;
  2. use Test::Deep;
  3.  
  4. my $recursion_limit = $] < 5.030
  5. ? 32766
  6. : 65534
  7. ;
  8.  
  9. my $DEFINE_ESCAPE_SEQUENCE = qr/
  10. (?(DEFINE)
  11. (?<Escape_Sequence>(?>
  12. \\
  13. (?:
  14. (?<char_escape> (?: [btnrf\'\"\\] ))
  15. | (?<octal_escape> (?: (?= [0-7]) [0-3]? [0-7]{1,2} ))
  16. | (?: u+ (?<hex_escape> [[:xdigit:]]{4} ))
  17. )
  18. ))
  19. )/x;
  20.  
  21. sub regex;
  22.  
  23. subtest "naive regex" => sub {
  24. note 'Naive approach to match - match single element multiple time';
  25. note 'limit = $recursion limit / (number of branches)';
  26.  
  27. my $regex = qr/
  28. \"
  29. (?: [^\"\\] | (?&Escape_Sequence) )*
  30. \"
  31.  
  32. $DEFINE_ESCAPE_SEQUENCE
  33. /sx;
  34.  
  35. regex "should match string up to recursion limit characters" =>
  36. with_snippet => 'x',
  37. with_size => $recursion_limit,
  38. with_regex => $regex,
  39. expect_match => 1,
  40. expect_warning => 0,
  41. ;
  42.  
  43. regex "should match string longer than recursion limit characters but emits warning" =>
  44. with_snippet => 'x',
  45. with_size => $recursion_limit + 1,
  46. with_regex => $regex,
  47. expect_match => 1,
  48. expect_warning => 1,
  49. ;
  50.  
  51. regex "should match string up to recursion limit escape sequences" =>
  52. with_snippet => '\\\\',
  53. with_size => $recursion_limit,
  54. with_regex => $regex,
  55. expect_match => 1,
  56. expect_warning => 0,
  57. ;
  58.  
  59. regex "should match string with about recursion limit escape sequences but emits warning" =>
  60. with_snippet => '\\\\',
  61. with_size => $recursion_limit + 1,
  62. with_regex => $regex,
  63. expect_match => 1,
  64. expect_warning => 1,
  65. ;
  66.  
  67. regex "should match string with up to recursion limit alternatives" =>
  68. with_snippet => 'x\\\\',
  69. with_size => $recursion_limit / 2,
  70. with_regex => $regex,
  71. expect_match => 1,
  72. expect_warning => 0,
  73. ;
  74.  
  75. regex "should not match string above recursion limit alternatives" =>
  76. with_snippet => 'x\\\\',
  77. with_size => $recursion_limit / 2 + 1,
  78. with_regex => $regex,
  79. expect_match => 0,
  80. expect_warning => 1,
  81. ;
  82.  
  83. regex "should not timeout with 2 * max recursion (but not match)" =>
  84. with_snippet => 'x\\\\',
  85. with_size => 2 + $recursion_limit,
  86. with_regex => $regex,
  87. expect_warning => 1,
  88. expect_timeout => 0,
  89. expect_match => 0,
  90. ;
  91.  
  92. done_testing;
  93. };
  94.  
  95. subtest "grouping regex" => sub {
  96. note 'Match multiple times sequence of chars-escapes';
  97.  
  98. my $regex = qr/
  99. \"
  100. (?: [^\"\\]* (?&Escape_Sequence)* )*
  101. \"
  102.  
  103. $DEFINE_ESCAPE_SEQUENCE
  104. /sx;
  105.  
  106. regex "should match string up to recursion limit characters" =>
  107. with_snippet => 'x',
  108. with_size => $recursion_limit,
  109. with_regex => $regex,
  110. expect_match => 1,
  111. expect_warning => 0,
  112. ;
  113.  
  114. regex "should match string longer than recursion limit characters without warning" =>
  115. with_snippet => 'x',
  116. with_size => $recursion_limit + 1,
  117. with_regex => $regex,
  118. expect_match => 1,
  119. expect_warning => 0,
  120. ;
  121.  
  122. regex "should match string up to recursion limit escape sequences" =>
  123. with_snippet => '\\\\',
  124. with_size => $recursion_limit,
  125. with_regex => $regex,
  126. expect_match => 1,
  127. expect_warning => 0,
  128. ;
  129.  
  130. regex "should match string with about recursion limit escape sequences but emits warning" =>
  131. with_snippet => '\\\\',
  132. with_size => $recursion_limit + 1,
  133. with_regex => $regex,
  134. expect_match => 1,
  135. expect_warning => 1,
  136. ;
  137.  
  138. regex "should match string with up to recursion limit alternatives" =>
  139. with_snippet => 'x\\\\',
  140. with_size => $recursion_limit / 2,
  141. with_regex => $regex,
  142. expect_match => 1,
  143. expect_warning => 0,
  144. ;
  145.  
  146. regex "should match string above recursion limit alternatives" =>
  147. with_snippet => 'x\\\\',
  148. with_size => $recursion_limit / 2 + 1,
  149. with_regex => $regex,
  150. expect_match => 1,
  151. expect_warning => 0,
  152. ;
  153.  
  154. regex "should match string shorter that max recursion without timeout" =>
  155. with_snippet => 'x\\\\',
  156. with_size => 1 + $recursion_limit,
  157. with_regex => $regex,
  158. expect_match => 1,
  159. expect_warning => 1,
  160. expect_timeout => 0,
  161. ;
  162.  
  163. regex "should timeout with 2 * max recursion" =>
  164. with_snippet => 'x\\\\',
  165. with_size => 2 + $recursion_limit,
  166. with_regex => $regex,
  167. expect_warning => 1,
  168. expect_timeout => 1,
  169. ;
  170.  
  171. done_testing;
  172. };
  173.  
  174. subtest "grouping regex with branch reset" => sub {
  175. note 'Match multiple times sequence of chars-escapes';
  176.  
  177. my $regex = qr/
  178. \"
  179. (?> (?: (?> [^\"\\] )* (?> (?&Escape_Sequence) )* )* )
  180. \"
  181.  
  182. $DEFINE_ESCAPE_SEQUENCE
  183. /sx;
  184.  
  185. regex "should match string up to recursion limit characters" =>
  186. with_snippet => 'x',
  187. with_size => $recursion_limit,
  188. with_regex => $regex,
  189. expect_match => 1,
  190. expect_warning => 0,
  191. ;
  192.  
  193. regex "should match string longer than recursion limit characters without warning" =>
  194. with_snippet => 'x',
  195. with_size => $recursion_limit + 1,
  196. with_regex => $regex,
  197. expect_match => 1,
  198. expect_warning => 0,
  199. ;
  200.  
  201. regex "should match string up to recursion limit escape sequences" =>
  202. with_snippet => '\\\\',
  203. with_size => $recursion_limit,
  204. with_regex => $regex,
  205. expect_match => 1,
  206. expect_warning => 0,
  207. ;
  208.  
  209. regex "should match string with about recursion limit escape sequences but emits warning" =>
  210. with_snippet => '\\\\',
  211. with_size => $recursion_limit + 1,
  212. with_regex => $regex,
  213. expect_match => 1,
  214. expect_warning => 1,
  215. ;
  216.  
  217. regex "should match string with up to recursion limit alternatives" =>
  218. with_snippet => 'x\\\\',
  219. with_size => $recursion_limit / 2,
  220. with_regex => $regex,
  221. expect_match => 1,
  222. expect_warning => 0,
  223. ;
  224.  
  225. regex "should match string above recursion limit alternatives" =>
  226. with_snippet => 'x\\\\',
  227. with_size => $recursion_limit / 2 + 1,
  228. with_regex => $regex,
  229. expect_match => 1,
  230. expect_warning => 0,
  231. ;
  232.  
  233. regex "should match string shorter that max recursion without timeout" =>
  234. with_snippet => 'x\\\\',
  235. with_size => 1 + $recursion_limit,
  236. with_regex => $regex,
  237. expect_match => 1,
  238. expect_warning => 1,
  239. expect_timeout => 0,
  240. ;
  241.  
  242. regex "should not timeout with 2 * max recursion (but no match)" =>
  243. with_snippet => 'x\\\\',
  244. with_size => 2 + $recursion_limit,
  245. with_regex => $regex,
  246. expect_match => 0,
  247. expect_warning => 1,
  248. expect_timeout => 0,
  249. ;
  250.  
  251. done_testing;
  252. };
  253.  
  254. done_testing;
  255.  
  256. sub regex {
  257. my ($title, %params) = @_;
  258.  
  259. my @warnings;
  260. local $SIG{__WARN__} = sub { push @warnings, @_; diag @_ };
  261.  
  262. my $snippet = $params{with_snippet};
  263. my $string = join '', '"', ($snippet) x $params{with_size}, '"';
  264. my $regex = $params{with_regex};
  265. my $expect = $params{expect_match};
  266.  
  267. my $had_timeout;
  268. my $got;
  269.  
  270. eval {
  271. local $SIG{ALRM} = sub { $had_timeout = 1; die };
  272. alarm 5;
  273.  
  274. $got = $string =~ m/^$regex/;
  275. };
  276.  
  277. if ($params{expect_timeout}) {
  278. ok $had_timeout, $title
  279. or diag "expected timeout but got ${\ ($got ? 'match' : 'no match') }";
  280. } elsif ($had_timeout) {
  281. fail "unexpected timeout ($title)";
  282. } else {
  283. $expect
  284. ? ok $got, $title
  285. : ok ! $got, $title
  286. ;
  287. }
  288.  
  289. if ($params{expect_warning}) {
  290. # Unrelated - perl warnings and exceptions should be objects or contain warning/error code
  291. # Current test may not be valid
  292. # grep "French locale warning" =~ ...
  293. # Imagine possibility of having one of following tests instead
  294. # grep $_->isa ('CORE::W::Regex::Complex')
  295. # grep $_ =~ m/^PERL5-W-123:/
  296. # grep $_ == PERL5-W-123
  297. # grep $_ == -123
  298. my $got = grep $_ =~ m/Complex regular subexpression recursion limit/, @warnings;
  299. ok $got, "recursion limit warning should appear ($title)";
  300. } else {
  301. ok ! @warnings, "no warning should appear";
  302. }
  303. }
Add Comment
Please, Sign In to add comment