Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- ##===============================================================================
- #
- # FILE: tags.pl
- #
- # USAGE: ./tags.pl
- #
- # DESCRIPTION: Helps with story tagging and organization, maybe more.
- #
- # AUTHOR: Azuhmier (aka taganon), azuhmier@gmail.com
- # ORGANIZATION: HMOFA
- # VERSION: 1.0
- # CREATED: September 2019
- #===============================================================================
- use strict;
- use warnings;
- use autodie;
- use Storable qw(dclone);
- use Data::Dumper;
- use List::MoreUtils 'uniq';
- use lib ($ENV{HOME}.'/.hmofa/lib/', $ENV{HOME}.'/Documents/hmofa/lib/');
- use Dir_Setup;
- use hmofa_dspt ':all';
- {
- #==================================|| GLOBAL ||===================================#
- # DIRECTORIES #{{{
- my $MASTER = $Dir_Setup::PATHS{MASTER};
- my $output_dir = $Dir_Setup::PATHS{output_dir};
- my $output_dir2 = $Dir_Setup::PATHS{output_dir_2};
- my $target = $Dir_Setup::PATHS{target_dir };
- #}}}
- # FILE PATHS #{{{
- my $Catalog = $MASTER.'/'.$Dir_Setup::PATHS{paste_dir}.$Dir_Setup::PATHS{source_file}; # Library
- my $Copy_Catalog = $target.'/'.$Dir_Setup::PATHS{output_file_copy}; #relative pathname to lib-copy
- my $Kosher_Catalog = $MASTER.'/'.$Dir_Setup::PATHS{paste_dir}.$Dir_Setup::PATHS{output_file_kosher};
- # goto_files/
- my $tag_file = 'tag_bin.txt';
- my $name_file = 'tag_names.txt';
- my $ops_file = 'ops.txt';
- my $ops_names_file = 'op_names.txt';
- my $ops_group_file = 'op_groups.txt';
- #}}}
- # GLOBAL VARS #{{{
- my $fh; # FileHandle of tag catalog
- my @fixed;
- #}}}
- # FORWARD DECLARATIONS #{{{
- sub Lib_analy;
- sub get_tags;
- sub libfix;
- #}}}
- #==================================|| MAIN ||===================================#
- {
- #-----| Catalog Modification |------{{{
- # Analysis of Catalog
- open my $fh_Catalog, '<', $Catalog
- or die "Cannot open '$Catalog' in read-write mode: $!";
- ( my $dspt_Catalog,
- my $original ) = Lib_analy($fh_Catalog, $output_dir);
- close $fh_Catalog
- or die "Cannot close $Catalog: $!";
- # Get Tags
- $dspt_Catalog = get_tags($dspt_Catalog, $tag_file, $name_file, 1, $output_dir);
- # Formating the Catalog
- lib_fmt($original);
- #}}}
- #-----| Catalog Copy Modification |------{{{
- # Clean up the Library
- @fixed = libfix($fh, $Copy_Catalog, $dspt_Catalog, $Catalog);
- # Analysis of Catalog Copy
- open my $fh_Copy_Catalog, '<', $Copy_Catalog
- or die "Cannot open '$Copy_Catalog' in read-write mode: $!";
- ( my $dspt_Copy_Catalog,
- my $modified ) = Lib_analy($fh_Copy_Catalog, $output_dir2);
- close $fh_Copy_Catalog
- or die "Cannot close $Copy_Catalog: $!";
- $dspt_Copy_Catalog = get_tags($dspt_Copy_Catalog, $tag_file, $name_file, 1, $output_dir2);
- #}}}
- #-----| Check |-----{{{
- # my @a = @{${$dspt->{tags}{analy}{tag_bin}}[0]};
- # my @b = @{${$dspt2->{tags}{analy}{tag_bin}}[0]};
- # my @mine;
- # NUM: for my $duck (@b)
- # {
- # for my $TAG (@a)
- # {
- # if ($duck eq $TAG)
- # {
- # shift @a;
- # next NUM;
- # }
- # }
- # push @mine, $duck;
- # }
- # print "$_\n" for @mine;
- # print "*******************\n";
- # my @mine2;
- # NUM: for my $duck (@a)
- # {
- # for my $TAG (@b)
- # {
- # if ($duck eq $TAG)
- # {
- # shift @b;
- # next NUM;
- # }
- # }
- # push @mine2, $duck;
- # }
- # print "$_\n" for @mine2;
- #}}}
- #-----| STDOUT |-----{{{
- # find length of longest key
- my $ub = 0;
- for my $key (keys %$dspt_Catalog) {
- if (length $key > $ub) {
- $ub = length $key
- }
- }
- my $ub2 = 0;
- for my $key (keys %$dspt_Copy_Catalog) {
- if (length scalar @{$dspt_Copy_Catalog->{$key}{LN}} > $ub2) {
- $ub2 = length scalar @{$dspt_Copy_Catalog->{$key}{LN}}
- }
- }
- # print element number of each key
- for my $key (sort keys %$dspt_Catalog) {
- my $bin = scalar @{$dspt_Catalog->{$key}{LN}};
- my $bin2 = scalar @{$dspt_Copy_Catalog->{$key}{LN}};
- printf "$key"." " x ( 2 + ($ub - length $key))."%s\n", '| '.$bin.' '.
- " " x ( $ub2 - length $bin2 ).'| '.$bin2;
- }
- #}}}
- }
- #==================================|| SUBROUTINES ||===================================#
- #-----|| Lib_analy() ||-------{{{
- # Lib_analy:
- sub Lib_analy {
- # FUN ARGS {{{
- my $fh = shift; # filehandle
- my $output_dir_in = shift;
- #}}}
- # FUN VARS #{{{
- my @ORIGINAL;
- my $dspt = gen_dspt();
- #}}}
- my $num_of_keys = scalar keys %$dspt; # number of keys
- #-----| BLOCK: get lines |-----{{{
- {
- while (my $line = <$fh>) {
- # WHILE: line at file handle pointer
- my $count; # number regexp match fails
- $line =~ s/
- //g; #removes carriage returns
- for my $key (keys %$dspt) {
- # FOR: every first level key in the dispatch table
- my $path = \$dspt->{$key}{file_path};
- my $key_reff = $dspt->{$key};
- $$path = $output_dir_in.'/'.$key.'.txt';
- if ($key_reff->{re} && $line =~ /$key_reff->{re}/) {
- push @{$key_reff->{LN}}, $.;
- push @{$key_reff->{match}}, $line;
- if ($key_reff->{group1}) {push @{$key_reff->{group1}} , $1;}
- if ($key_reff->{group2}) {push @{$key_reff->{group2}} , $2;}
- if ($key_reff->{group3}) {push @{$key_reff->{group3}} , $3;}
- if ($key_reff->{group4}) {push @{$key_reff->{group4}} , $4;}
- if ($key_reff->{group5}) {push @{$key_reff->{group5}} , $5;}
- if ($key_reff->{group6}) {push @{$key_reff->{group6}} , $6;}
- }
- else {
- # ELSE: no matches if count = the number of keys
- ++$count;
- }
- }
- if ($flag && $num_of_keys == $count) {
- push @{$dspt->{unkown}{LN}}, $.;
- push @{$dspt->{unkown}{match}}, $line;
- $flag = 0;
- }
- push @ORIGINAL, $line;
- }
- }
- #}}}
- return $dspt, \@ORIGINAL;
- }
- #}}}
- #-----|| get_tags ||-------{{{
- sub get_tags {
- # FUN ARGS #{{{
- my $dspt = shift;
- my $tag_file = shift;
- my $name_file = shift;
- my $output = shift;
- my $output_dir_in = shift;
- #}}}
- # FUN VARS #{{{
- my $tags_raw = ${$dspt->{tags}{analy}{raw}}[0];
- my $tags = ${$dspt->{tags}{analy}{tag_bin}}[0];
- my $tag_lnums = ${$dspt->{tags}{analy}{tag_bin}}[1];
- my $tag_names = ${$dspt->{tags}{analy}{tag_names}}[0];
- my $tag_name_lnums = ${$dspt->{tags}{analy}{tag_names}}[1];
- my $ops_raw = ${$dspt->{tags}{analy}{raw_ops}}[0]; # raw operator
- my $ops_raw_lnums = ${$dspt->{tags}{analy}{raw_ops}}[1];
- my $ops = ${$dspt->{tags}{analy}{ops_bin}}[0];
- my $ops_lnums = ${$dspt->{tags}{analy}{ops_bin}}[1];
- my $ops_names = ${$dspt->{tags}{analy}{ops_names}}[0];
- my $ops_name_lnums = ${$dspt->{tags}{analy}{ops_names}}[1];
- #}}}
- #-----| GET RAW TAGS FROM TAGLINES |-----{{{
- for my $tagln_LN ( @{$dspt->{tags}{LN}} ) {
- # FOR: tagline line numbers
- for my $key ( grep {m/group/} keys %{$dspt->{tags}} ) {
- # FOR: non-operater tag groups
- if ($key =~ /group[^356]/) {
- my $tag_group = shift @{$dspt->{tags}{$key}};
- # get tag group even if UNDEF
- if ($tag_group) {
- while ( $tag_group =~ /([^$d]+)/g ) {
- # WHILE: current tag group contains characters
- # that are not the delemiter: $d
- push( @$tags_raw, $1 );
- # push captured group onto the RAW TAGS array
- push( @$tag_lnums, $tagln_LN );
- # push current tagline line number onto the...
- # ...TAG LINE NUMBERS array.
- }
- }
- }
- else {
- my $op_group = shift @{$dspt->{tags}{$key}}; # get tag group even if UNDEF
- if ($op_group) {
- while ( $op_group =~ /(.+)/g ) {
- # WHILE: current tag group contains characters that
- # are not the delemiter: $d
- my $var = $op_group;
- push( @$ops_raw, $1 );
- # push captured group onto the RAW TAGS array
- push( @$ops_raw_lnums, $tagln_LN );
- # push current tagline line number onto the...
- while ( $var =~ /([^\s])\1*/g ) {
- push( @$ops, $& );
- # push captured group onto the RAW TAGS array
- push( @$ops_lnums, $tagln_LN );
- # push current tagline line number onto the...
- }
- }
- }
- }
- }
- }
- #}}}
- #-----| GETTING OP NAMES |-----{{{
- my @idx = sort {uc($$ops_raw[$a]) cmp uc($$ops_raw[$b])} 0 .. $#$ops_raw;
- @$ops_raw = @$ops_raw[@idx];
- @$ops_raw_lnums = @$ops_raw_lnums[@idx];
- @idx = sort {uc($$ops[$a]) cmp uc($$ops[$b])} 0 .. $#$ops;
- @$ops = @$ops[@idx];
- @$ops_lnums = @$ops_lnums[@idx];
- #}}}
- #-----| GETTING OP NAMES |-----{{{
- @$ops_names = uniq(@$ops);
- #}}}
- #-----| GET TAG NAME LINE NUMBERS IN THE TAG_BIN ARRAY |-----{{{
- my @ops_names_copy = @$ops_names; # make copy of tagnames
- my $count = 0; # set count that will act as the line numbers
- for my $op ( @$ops ) {
- # FOR: every tag in the tag bin
- ++$count;
- if ( $ops_names_copy[0] && $op =~ /\Q$ops_names_copy[0]\E/ ) {
- # IF: current tag matches the current tag name
- push( @$ops_name_lnums, $count );
- # push current count to tag_name line numbers
- shift @ops_names_copy; # get next tag name
- }
- }
- #}}}
- #-----| CLEANING TAGS |-----{{{
- for my $line ( @$tags_raw ) {
- # FOR: raw tags
- $line =~ s/^\s*([^\s])/$1/g; # removes spaces before tag
- $line =~ s/([^\s])\s*$/$1/g; # removes spaces after tag
- $line =~ s/\?//g; # removes "?" from tag
- push @$tags, $line; # push cleaned tag to tag_bin array
- }
- #}}}
- #-----| SORTING TAGS: CASE INSENSITIVE |-----{{{
- @idx = sort {uc($$tags[$a]) cmp uc($$tags[$b])} 0 .. $#$tags;
- @$tags = @$tags[@idx];
- @$tag_lnums = @$tag_lnums[@idx];
- #}}}
- #-----| GET TAG NAMES |-----{{{
- # get tag names and their linenumbers
- @$tag_names = uniq( sort {uc($a) cmp uc($b)} @$tags );
- #}}}
- #-----| GET TAG NAME LINE NUMBERS IN THE TAG_BIN ARRAY |-----{{{
- my @tag_names_copy = @$tag_names; # make copy of tagnames
- $count = 0; # set count that will act as the line numbers
- # FOR: every tag in the tag bin
- for my $tag ( @$tags ) {
- ++$count;
- if ( $tag_names_copy[0] && $tag =~ /\Q$tag_names_copy[0]\E/ ) {
- # IF: current tag matches the current tag name
- push( @$tag_name_lnums, $count );
- # push current count to tag_name line numbers
- shift @tag_names_copy; # get next tag name
- }
- }
- #}}}
- #-----| OUTPUT FILES |-----{{{
- if ( $output ) {
- # IF: output argument was provided and it's true
- # output fmtd_tgln
- open $fh, '>', $output_dir_in.'/'.$name_file;
- print $fh shift @$tag_name_lnums, " $_\n" for @$tag_names;
- close $fh;
- open $fh, '>', $output_dir_in.'/'.'tags_only.txt';
- print $fh "$_\n" for @$tag_names;
- close $fh;
- # output tags
- open $fh, '>', $output_dir_in.'/'.$tag_file;
- print $fh shift @$tag_lnums ," $_\n" for @$tags;
- close $fh;
- open $fh, '>', $output_dir_in.'/'.$ops_names_file;
- print $fh shift @$ops_name_lnums, " $_\n" for @$ops_names;
- close $fh;
- open $fh, '>', $output_dir_in.'/'.$ops_file;
- print $fh shift @$ops_lnums," $_\n" for @$ops;
- close $fh;
- open $fh, '>', $output_dir_in.'/'.$ops_group_file;
- print $fh shift @$ops_raw_lnums ," $_\n" for @$ops_raw;
- close $fh;
- open $fh, '>', $output_dir_in.'/url_only.txt';
- my @line = @{$dspt->{url}{match}};
- print $fh grep { /^https:\/\/pastebin.com\/\w[^\/]/ } @line;
- close $fh;
- for my $key ( keys %$dspt ) {
- # FOR: every key of dispatch table
- my $file_path = $dspt->{$key}{file_path};
- open $fh, '>', $file_path;
- my @line = @{$dspt->{$key}{LN}};
- print $fh shift @line," $_" for @{$dspt->{$key}{match}};
- close $fh;
- }
- }
- else {
- }
- #}}}
- return $dspt;
- }
- #}}}
- #-----|| libfix() ||-------{{{
- sub libfix {
- # FUN ARGS #{{{
- my $fh = shift; # FileHandle brah
- my $Copy_Catalog = shift; # Path to Library Copy
- my $dspt = shift; # Dispatch Table
- my $fname_in = shift; # Path to Library
- #}}}
- # FUN VARS #{{{
- my @COPY; # Array to Store Copy of Library
- my @FIXED; # Array to Store the Modified Copy of the Library
- #}}}
- #-----| MAKE ARRAY COPY OF LIBRARY |-----{{{
- open($fh, '<', $fname_in); # Open Library for Reading
- # WHILE: a Line Exist at the FileHandle Pointer
- while (my $line = <$fh>)
- {
- push @COPY, $line; # Push current line to @COPY
- }
- close $fh;
- #}}}
- #-----| LIBRARY COPY FILE OPENING/CREATION |-----{{{
- open($fh, '>', $Copy_Catalog); # Open or Create File for Library Copy
- close $fh;
- #}}}
- #-----| WRITE TO LIB_COPY FILE AND MODIFY IT |-----{{{
- open($fh, '+<', $Copy_Catalog); # Open Library Copy for Read/Write
- print $fh @COPY; # Write @COPY to Library Copy File
- truncate $fh, tell($fh); # Truncate File at Current Postion of the FileHandle...
- # ... Pointer
- #}}}
- #-----| READ FILE AND MAKE FIXES TO LINES |-----{{{
- seek $fh,0,0; # Put FileHandle Pointer at BOF
- # WHILE: a Line Exist at the FileHandle Pointer
- while (my $line = <$fh>)
- {
- # UNDER A TITLE
- # IF: Current Line is Under a Title
- if ($flag)
- {
- # TAGLINE
- # IF: Current Line is a TagLine
- if ($line =~ /$dspt->{tags}{re}/) # Also sets Regexp Capture Groups
- {
- # Regexp Capture Groups:
- # [atag][btag]$3 | COMPLETE
- # [halftag]$5 | INCOMPLETE
- # $6 | INCOMPLETE
- #-----| FIX INCOMPLETE TAGS |-----{{{
- my $atag = \$1; # Anthro Tags reff
- my $btag = \$2; # Story Tags reff
- my $halftag = \$4; # HalfTag reff
- my @ops = (\$3,\$5,\$6); # Operators reff
- # HALFTAG
- # IF: Current Line is HalfTag
- if ($kind eq "half")
- {
- $line =~ s/.*/\[\]$&/; # Insert Single Tag Bracket
- }
- # Only Operater(s)
- # ELSIF: Current Line only cosists of Operators
- elsif (${$ops[2]})
- {
- $line =~ s/.*/\[\]\[\]$&/; # Insert Empty Tag Brackets
- }
- #}}}
- #-----| TAGLINE CLEANING |-----{{{
- $line =~ /$dspt->{tags}{re}/; # Reset Capture Groups Now That...
- # ... All Taglines are Complete
- $line =~ s/$d+\s*($d)/$1/g; # Remove extra Commas to the Left
- $line =~ s/($d)\s*$d+/$1/g; # Remove extra Commas to the Right
- $line =~ s/\s*(\[)\s*/$1/g; # Remove extra Spaces around Left Brace
- $line =~ s/\s*(\])[ ]*([^ ])[ ]*/$1$2/g; # Remove extra Spaces around Right Brace
- $line =~ s/$d*(\[)$d/$1/g; # Remove extra Commas around Left Brace
- $line =~ s/$d*(\])$d*/$1/g; # Remove extra Commas around Right Brace
- $line =~ s/($d\s)\s*/$1/g; # Remove extra Spaces Right of Comma
- $line =~ s/\s*($d)/$1/g; # Remove extra Spaces Left of Comma
- $line =~ s/($d)([^ ])/$1 $2/g; # IF no space after comma, add one
- $line =~ s/\s*(\s)\s*/$1/g; # Remove Extra Spaces
- #}}}
- #-----| DUPLICATE TAGS |-----{{{
- $line =~ /$dspt->{tags}{re}/; # reset match variables now that...
- my @past_matches = '';
- my $duplicate_found = 0;
- my @dupe;
- while ($line =~ /[^\[\],\n]+/g)
- {
- my $match = $&;
- $match =~ s/^\s//;
- for my $ele (uniq(@past_matches))
- {
- if ($ele eq $match)
- {
- $duplicate_found=1;
- push @dupe, $match;
- }
- else
- {
- $duplicate_found=0;
- }
- }
- push @past_matches, $match;
- }
- no warnings 'uninitialized';
- for (@dupe)
- {
- $line =~ s/(?<!\w)\s$_\?*$d|(\[)$_\?*(\])|$d\s$_\?*(\])|(\[)$_$d\s/$1$2$3$4/;
- }
- use warnings;
- #}}}
- #-----| DUPLICATE OPS |-----{{{
- $line =~ /$dspt->{tags}{re}/; # reset match variables now that...
- @past_matches = '';
- $duplicate_found = 0;
- @dupe =();
- $line =~ /\]\[.*\]\K[^\]\[]+/;
- my $OPS = $&;
- while ($OPS =~ /./g)
- {
- my $match = $&;
- for my $ele (uniq(@past_matches))
- {
- if ($ele eq $match)
- {
- $duplicate_found=1;
- push @dupe, $match;
- }
- else
- {
- $duplicate_found=0;
- }
- }
- push @past_matches, $match;
- }
- for (@dupe)
- {
- $OPS =~ s/\Q$_\E//;
- $line =~ s/\][^\]\[]+/\]$OPS/;
- }#}}}
- #-----| SUBSTITUTING OPERATORS |-----{{{
- $line =~ /$dspt->{tags}{re}/; # reset match variables now that...
- # ...tagline in complete
- my %special = %{$dspt->{tags}{special}};
- my %tag_subs = %{$special{tag_subs}};
- my %OPS = %{$special{ops}};
- for my $key (keys %OPS)
- {
- my $ARRAY = $OPS{$key};
- my $NEW = @$ARRAY[1];
- my $op = @$ARRAY[0];
- my $old_list = @$ARRAY[2];
- if ($line =~ /\].*\Q$op\E/)
- {
- $line =~ s/\Q$op\E//g; # REMOVE $
- #-----| CREATE OLD |-----{{{
- for my $OLD (@$old_list)
- {
- my $EXPR = '';
- my $COUNT = 0;
- my $LEN = scalar @{$tag_subs{full_sub}};
- #-----| CREATE REGEXP |-----#
- for my $string (@{$tag_subs{full_sub}})
- {
- $COUNT++;
- $EXPR .= $string;
- if ($COUNT > $LEN)
- {
- next;
- }
- $EXPR .=$OLD;
- }
- my $regexp = qr/$EXPR/;
- $line =~ s/(?<!\w)\s$OLD\?*$d|(\[)$OLD\?*(\])|$d\s$OLD\?*(\])|(\[)$OLD$d\s/$1$2$3$4/ig;
- }
- #}}}
- #-----| INSERT NEW |-----#
- $line =~ /$dspt->{tags}{re}/;
- # IF:
- if (!$$btag)
- {
- $line =~ s/$regexp{tag_sub}/$&$NEW/;
- }
- # ELSE
- else
- {
- $line =~ s/$regexp{tag_sub}/$&$NEW$d /;
- }
- }
- }
- }
- #}}}
- #-----| MISSING TAGLINES |-----{{{
- # URL
- # ELSIF: current line matches url regexp
- elsif ($line =~ /$dspt->{url}{re}/)
- {
- $line =~ s/$dspt->{url}{re}/\[\]\[\]\n$&/;
- }
- # UNKOWN
- # ELSE: currlent line doesn't follow any regexp put forth
- else
- {
- $flag=0; # set flag to zero to let program know that it is done...
- # ... fixing line that was under title for there is no regexp...
- # ... embedded code for unkown
- }
- #}}}
- }
- #-----| TITLE FINDING AND FLAGGING |-----{{{
- # TITLE
- # ELSIF: current line matches title regexp
- elsif ($line =~ /$dspt->{title}{re}/)
- {
- }
- push @FIXED, $line; # push fixed line to the fixed array
- #}}}
- }# END of WHILE
- #}}}
- #-----| WRITE FIXES BACK |-----{{{
- seek $fh, 0, 0;
- # FOR: every $line of the fixed array
- for my $line (@FIXED)
- {
- $line =~ s/
- //g; # remove carriage returns
- print $fh $line;
- }
- truncate $fh, tell($fh);
- close $fh;
- return @FIXED;
- #}}}
- };
- #}}}
- #-----|| lib_fmt() ||-------{{{
- sub lib_fmt {
- my $lib_array_ref = shift;
- my @lib_array = @$lib_array_ref;
- my @fmt_lib;
- # Master | Julia | Markdown
- # By | by\s | ##
- # >title | "title" | [title](url)
- # # | # | >
- # , | \s | \s
- # | ;; | ;``;
- for my $line (@lib_array) {
- if ($line =~ /^>/) {
- # If: Title
- $line =~ s/>(.*)/"$1"/;
- $line =~ s/\s+(")/$1/;
- }
- elsif ($line =~ /^~/) {
- # ELSIf: Dscr
- $line =~ s/~(.*)/#$1/;
- }
- # IF: Author
- elsif ($line =~ /^By/) {
- $line =~ s/By(.*)/by$1/;
- }
- push @fmt_lib, $line;
- }
- open (my $fh, '>', $Kosher_Catalog);
- print $fh @fmt_lib;
- close $fh;
- }
- #}}}
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement