Advertisement
chrissharp123

Untitled

Mar 13th, 2018
315
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 0.87 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4.  
  5. use XML::LibXML;
  6. use XML::LibXML::Reader;
  7. use HTML::TableExtract;
  8. use LWP::UserAgent;
  9. use Data::Dumper;
  10.  
  11. my %relator;
  12. my $code = 0;
  13. my $fullname = 0;
  14. my $code_v;
  15. my $ua = LWP::UserAgent->new;
  16. my $url = "https://www.loc.gov/marc/relators/relacode.html";
  17.  
  18.  
  19. $ua->agent('Mozilla/5.0'); # the default 'lib-www/<version>' UA is not accepted
  20. my $response = $ua->get( $url ) or die "Could not retrieve URL $url\n";
  21. my $content = $response->decoded_content;
  22.  
  23. #print Dumper($content);
  24.  
  25. # extract table data
  26. my $te = HTML::TableExtract->new( headers => [('code', 'relator term')] );
  27. $te->parse($content);
  28. my $table = $te->first_table_found;
  29. #my $table_tree = $table->tree;
  30. my @columns = $table->columns();
  31. my (@codes, @descriptions) = ($columns[0], $columns[1]);
  32. foreach my $col (@codes) {
  33.         print Dumper($col) ."\n";
  34. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement