tags.pl

#!/usr/bin/perl
##===============================================================================
#
#         FILE: tags.pl
#
#        USAGE: ./tags.pl
#
#  DESCRIPTION: Helps with story tagging and organization, maybe more.
#
#       AUTHOR: Azuhmier (aka taganon), [email protected]
# ORGANIZATION: HMOFA
#      VERSION: 1.0
#      CREATED: September 2019
#===============================================================================
use strict;
use warnings;
use autodie;
use Storable qw(dclone);
use Data::Dumper;
use List::MoreUtils 'uniq';
use lib ($ENV{HOME}.'/.hmofa/lib/', $ENV{HOME}.'/Documents/hmofa/lib/');
use Dir_Setup;
use hmofa_dspt ':all';
{
  #==================================|| GLOBAL ||===================================#
    # DIRECTORIES #{{{
    my $MASTER      = $Dir_Setup::PATHS{MASTER};

    my $output_dir  = $Dir_Setup::PATHS{output_dir};
    my $output_dir2 = $Dir_Setup::PATHS{output_dir_2};
    my $target      = $Dir_Setup::PATHS{target_dir };
    #}}}
    # FILE PATHS #{{{
    my $Catalog           = $MASTER.'/'.$Dir_Setup::PATHS{paste_dir}.$Dir_Setup::PATHS{source_file}; # Library
    my $Copy_Catalog        = $target.'/'.$Dir_Setup::PATHS{output_file_copy}; #relative pathname to lib-copy
    my $Kosher_Catalog   = $MASTER.'/'.$Dir_Setup::PATHS{paste_dir}.$Dir_Setup::PATHS{output_file_kosher};

    # goto_files/
    my $tag_file        = 'tag_bin.txt';
    my $name_file       = 'tag_names.txt';
    my $ops_file        = 'ops.txt';
    my $ops_names_file  = 'op_names.txt';
    my $ops_group_file  = 'op_groups.txt';
    #}}}
    # GLOBAL VARS #{{{
    my $fh; # FileHandle of tag catalog
    my @fixed;
    #}}}
    # FORWARD DECLARATIONS #{{{
    sub Lib_analy;
    sub get_tags;
    sub libfix;
    #}}}

  #==================================|| MAIN ||===================================#
  {
      #-----| Catalog Modification |------{{{

        # Analysis of Catalog
        open my $fh_Catalog, '<', $Catalog
          or die "Cannot open '$Catalog' in read-write mode: $!";

          ( my $dspt_Catalog,
            my $original ) =  Lib_analy($fh_Catalog, $output_dir);

        close $fh_Catalog
          or die "Cannot close $Catalog: $!";

        # Get Tags
        $dspt_Catalog = get_tags($dspt_Catalog, $tag_file, $name_file, 1, $output_dir);

        # Formating the Catalog
        lib_fmt($original);

      #}}}
      #-----| Catalog Copy Modification |------{{{
        # Clean up the Library
        @fixed = libfix($fh, $Copy_Catalog, $dspt_Catalog, $Catalog);

        # Analysis of Catalog Copy
        open my $fh_Copy_Catalog, '<', $Copy_Catalog
          or die "Cannot open '$Copy_Catalog' in read-write mode: $!";

          ( my $dspt_Copy_Catalog,
            my $modified ) =  Lib_analy($fh_Copy_Catalog, $output_dir2);

        close $fh_Copy_Catalog
          or die "Cannot close $Copy_Catalog: $!";

        $dspt_Copy_Catalog =  get_tags($dspt_Copy_Catalog, $tag_file, $name_file, 1, $output_dir2);

      #}}}
    #-----| Check |-----{{{
    #  my @a = @{${$dspt->{tags}{analy}{tag_bin}}[0]};
    #  my @b = @{${$dspt2->{tags}{analy}{tag_bin}}[0]};

    #  my @mine;
    #  NUM: for my $duck (@b)
    #  {
    #    for my $TAG (@a)
    #    {
    #      if ($duck eq $TAG)
    #      {
    #        shift @a;
    #        next NUM;
    #      }
    #    }
    #      push @mine, $duck;
    #  }
    #  print "$_\n" for @mine;
    #  print "*******************\n";

    #  my @mine2;
    #  NUM: for my $duck (@a)
    #  {
    #    for my $TAG (@b)
    #    {
    #      if ($duck eq $TAG)
    #      {
    #        shift @b;
    #        next NUM;
    #      }
    #    }
    #      push @mine2, $duck;
    #  }
    #  print "$_\n" for @mine2;
    #}}}
      #-----| STDOUT |-----{{{
      # find length of longest key
      my $ub = 0;
        for my $key (keys %$dspt_Catalog) {
          if (length $key > $ub) {
            $ub = length $key
          }
        }
      my $ub2 = 0;
        for my $key (keys %$dspt_Copy_Catalog) {
          if (length scalar @{$dspt_Copy_Catalog->{$key}{LN}} > $ub2) {
            $ub2 = length scalar @{$dspt_Copy_Catalog->{$key}{LN}}
          }
      }

      # print element number of each key
      for my $key (sort keys %$dspt_Catalog) {
        my $bin =  scalar @{$dspt_Catalog->{$key}{LN}};
        my $bin2 =  scalar @{$dspt_Copy_Catalog->{$key}{LN}};
        printf "$key"." " x ( 2 + ($ub - length $key))."%s\n", '| '.$bin.' '.
               " " x ( $ub2 - length $bin2 ).'| '.$bin2;
      }
      #}}}
  }

  #==================================|| SUBROUTINES ||===================================#
    #-----|| Lib_analy() ||-------{{{
    # Lib_analy:

      sub Lib_analy {
        # FUN ARGS {{{
        my $fh = shift; # filehandle
        my $output_dir_in = shift;
        #}}}
        # FUN VARS #{{{

        my @ORIGINAL;
        my $dspt = gen_dspt();

        #}}}

        my $num_of_keys  = scalar keys %$dspt; # number of keys

        #-----| BLOCK: get lines |-----{{{
          {

            while (my $line = <$fh>) {
            # WHILE: line at file handle pointer

              my $count; # number regexp match fails
              $line =~ s/
//g; #removes carriage returns

              for my $key (keys %$dspt) {
              # FOR: every first level key in the dispatch table

                my $path = \$dspt->{$key}{file_path};
                my $key_reff = $dspt->{$key};
                $$path = $output_dir_in.'/'.$key.'.txt';

                if ($key_reff->{re} && $line =~ /$key_reff->{re}/) {

                  push  @{$key_reff->{LN}}, $.;
                  push  @{$key_reff->{match}}, $line;

                    if ($key_reff->{group1}) {push @{$key_reff->{group1}} , $1;}
                    if ($key_reff->{group2}) {push @{$key_reff->{group2}} , $2;}
                    if ($key_reff->{group3}) {push @{$key_reff->{group3}} , $3;}
                    if ($key_reff->{group4}) {push @{$key_reff->{group4}} , $4;}
                    if ($key_reff->{group5}) {push @{$key_reff->{group5}} , $5;}
                    if ($key_reff->{group6}) {push @{$key_reff->{group6}} , $6;}
                }

                else {
                # ELSE: no matches if count = the number of keys

                  ++$count;

                }
              }

              if ($flag && $num_of_keys == $count) {

                push  @{$dspt->{unkown}{LN}}, $.;
                push  @{$dspt->{unkown}{match}}, $line;
                $flag = 0;

              }

              push @ORIGINAL, $line;

            }
          }

        #}}}

        return $dspt, \@ORIGINAL;
      }

      #}}}
    #-----|| get_tags ||-------{{{
      sub get_tags {

        # FUN ARGS #{{{

          my $dspt = shift;
          my $tag_file = shift;
          my $name_file = shift;
          my $output = shift;
          my $output_dir_in = shift;

        #}}}
        # FUN VARS #{{{

          my $tags_raw       = ${$dspt->{tags}{analy}{raw}}[0];
          my $tags           = ${$dspt->{tags}{analy}{tag_bin}}[0];
          my $tag_lnums      = ${$dspt->{tags}{analy}{tag_bin}}[1];
          my $tag_names      = ${$dspt->{tags}{analy}{tag_names}}[0];
          my $tag_name_lnums = ${$dspt->{tags}{analy}{tag_names}}[1];

          my $ops_raw              = ${$dspt->{tags}{analy}{raw_ops}}[0]; # raw operator
          my $ops_raw_lnums        = ${$dspt->{tags}{analy}{raw_ops}}[1];
          my $ops                  = ${$dspt->{tags}{analy}{ops_bin}}[0];
          my $ops_lnums            = ${$dspt->{tags}{analy}{ops_bin}}[1];
          my $ops_names            = ${$dspt->{tags}{analy}{ops_names}}[0];
          my $ops_name_lnums       = ${$dspt->{tags}{analy}{ops_names}}[1];

        #}}}
        #-----| GET RAW TAGS FROM TAGLINES |-----{{{

          for my $tagln_LN ( @{$dspt->{tags}{LN}} ) {
          # FOR: tagline line numbers

            for my $key ( grep {m/group/} keys %{$dspt->{tags}} ) {
            # FOR:  non-operater tag groups

              if ($key =~ /group[^356]/) {

                my $tag_group = shift @{$dspt->{tags}{$key}};
                # get tag group even if UNDEF

                if ($tag_group) {

                  while ( $tag_group =~ /([^$d]+)/g ) {
                  # WHILE: current tag group contains characters
                  # that are not the delemiter: $d

                    push( @$tags_raw, $1 );
                    # push captured group onto the RAW TAGS array
                    push( @$tag_lnums, $tagln_LN );
                    # push current tagline line number onto the...
                    # ...TAG LINE NUMBERS array.
                  }
                }
              }

              else {

                my $op_group = shift @{$dspt->{tags}{$key}}; # get tag group even if UNDEF
                if ($op_group) {

                  while ( $op_group =~ /(.+)/g ) {
                  # WHILE: current tag group contains characters that
                  # are not the delemiter: $d

                    my $var = $op_group;
                    push( @$ops_raw, $1 );
                    # push captured group onto the RAW TAGS array
                    push( @$ops_raw_lnums, $tagln_LN );
                    # push current tagline line number onto the...
                    while ( $var =~ /([^\s])\1*/g ) {

                      push( @$ops, $& );
                      # push captured group onto the RAW TAGS array
                      push( @$ops_lnums, $tagln_LN );
                      # push current tagline line number onto the...
                    }
                  }
                }
              }
            }
          }

          #}}}
        #-----| GETTING OP NAMES |-----{{{

          my @idx = sort {uc($$ops_raw[$a]) cmp uc($$ops_raw[$b])} 0 .. $#$ops_raw;
          @$ops_raw      = @$ops_raw[@idx];
          @$ops_raw_lnums = @$ops_raw_lnums[@idx];

          @idx = sort {uc($$ops[$a]) cmp uc($$ops[$b])} 0 .. $#$ops;
          @$ops      = @$ops[@idx];
          @$ops_lnums = @$ops_lnums[@idx];
          #}}}
        #-----| GETTING OP NAMES |-----{{{

          @$ops_names = uniq(@$ops);
          #}}}
        #-----| GET TAG NAME LINE NUMBERS IN THE TAG_BIN ARRAY |-----{{{

          my @ops_names_copy = @$ops_names; # make copy of tagnames
          my $count = 0;                    # set count that will act as the line numbers

          for my $op ( @$ops ) {
          # FOR: every tag in the tag bin

            ++$count;

            if ( $ops_names_copy[0] && $op =~ /\Q$ops_names_copy[0]\E/ ) {
            # IF: current tag matches the current tag name

              push( @$ops_name_lnums, $count );
              # push current count to tag_name line numbers

              shift @ops_names_copy;            # get next tag name
            }
          }
          #}}}
        #-----| CLEANING TAGS |-----{{{

          for my $line ( @$tags_raw ) {
          # FOR: raw tags

            $line =~ s/^\s*([^\s])/$1/g; # removes spaces before tag
            $line =~ s/([^\s])\s*$/$1/g; # removes spaces after tag
            $line =~ s/\?//g;            # removes "?" from tag
            push @$tags, $line;          # push cleaned tag to tag_bin array
          }
          #}}}
        #-----| SORTING TAGS: CASE INSENSITIVE |-----{{{

          @idx = sort {uc($$tags[$a]) cmp uc($$tags[$b])} 0 .. $#$tags;
          @$tags      = @$tags[@idx];
          @$tag_lnums = @$tag_lnums[@idx];
          #}}}
        #-----| GET TAG NAMES |-----{{{

          # get tag names and their linenumbers
          @$tag_names = uniq( sort {uc($a) cmp uc($b)} @$tags );
          #}}}
        #-----| GET TAG NAME LINE NUMBERS IN THE TAG_BIN ARRAY |-----{{{

          my @tag_names_copy = @$tag_names; # make copy of tagnames
          $count = 0;                    # set count that will act as the line numbers

          # FOR: every tag in the tag bin
          for my $tag ( @$tags ) {

            ++$count;

            if ( $tag_names_copy[0] && $tag =~ /\Q$tag_names_copy[0]\E/ ) {
            # IF: current tag matches the current tag name

              push( @$tag_name_lnums, $count );
              # push current count to tag_name line numbers
              shift @tag_names_copy;            # get next tag name

            }
          }

          #}}}
        #-----| OUTPUT FILES |-----{{{

          if ( $output ) {
          # IF: output argument was provided and it's true

            # output fmtd_tgln
            open $fh, '>', $output_dir_in.'/'.$name_file;
              print $fh shift @$tag_name_lnums, " $_\n" for @$tag_names;
            close $fh;

            open $fh, '>', $output_dir_in.'/'.'tags_only.txt';
              print $fh "$_\n" for @$tag_names;
            close $fh;

            # output tags
            open $fh, '>', $output_dir_in.'/'.$tag_file;
              print $fh shift @$tag_lnums ," $_\n" for @$tags;
            close $fh;

            open $fh, '>', $output_dir_in.'/'.$ops_names_file;
              print $fh shift @$ops_name_lnums, " $_\n" for @$ops_names;
            close $fh;

            open $fh, '>', $output_dir_in.'/'.$ops_file;
              print $fh shift @$ops_lnums," $_\n" for @$ops;
            close $fh;

            open $fh, '>', $output_dir_in.'/'.$ops_group_file;
              print $fh shift @$ops_raw_lnums ," $_\n" for @$ops_raw;
            close $fh;

            open $fh, '>', $output_dir_in.'/url_only.txt';
              my @line = @{$dspt->{url}{match}};
              print $fh grep { /^https:\/\/pastebin.com\/\w[^\/]/ } @line;
            close $fh;

            for my $key ( keys %$dspt ) {
            # FOR: every key of dispatch table

              my $file_path =  $dspt->{$key}{file_path};
              open $fh, '>', $file_path;
                my @line = @{$dspt->{$key}{LN}};
                print $fh shift @line," $_" for @{$dspt->{$key}{match}};
              close $fh;

            }
          }

          else {

          }

          #}}}

        return $dspt;
      }
      #}}}
    #-----|| libfix() ||-------{{{
      sub libfix {
        # FUN ARGS #{{{
        my $fh        = shift;  # FileHandle brah
        my $Copy_Catalog  = shift;  # Path to Library Copy
        my $dspt      = shift;  # Dispatch Table
        my $fname_in     = shift;  # Path to Library
        #}}}
        # FUN VARS #{{{
        my @COPY;  # Array to Store Copy of Library
        my @FIXED; # Array to Store the Modified Copy of the Library
        #}}}
        #-----| MAKE ARRAY COPY OF LIBRARY |-----{{{
        open($fh, '<', $fname_in); # Open Library for Reading

          # WHILE: a Line Exist at the FileHandle Pointer
          while (my $line = <$fh>)
          {
            push @COPY, $line; # Push current line to @COPY
          }

        close $fh;
        #}}}
        #-----| LIBRARY COPY FILE OPENING/CREATION |-----{{{
        open($fh, '>', $Copy_Catalog); # Open or Create File for Library Copy
        close $fh;
        #}}}
        #-----| WRITE TO LIB_COPY FILE AND MODIFY IT |-----{{{
        open($fh, '+<', $Copy_Catalog); # Open Library Copy for Read/Write
          print $fh @COPY;          # Write @COPY to Library Copy File
          truncate $fh, tell($fh);  # Truncate File at Current Postion of the FileHandle...
                                    # ... Pointer
        #}}}
          #-----| READ FILE AND MAKE FIXES TO LINES |-----{{{
          seek $fh,0,0; # Put FileHandle Pointer at BOF

          # WHILE: a Line Exist at the FileHandle Pointer
          while (my $line = <$fh>)
          {
            # UNDER A TITLE
            # IF: Current Line is Under a Title
            if ($flag)
            {
              # TAGLINE
              # IF: Current Line is a TagLine
              if ($line =~ /$dspt->{tags}{re}/) # Also sets Regexp Capture Groups
              {
                # Regexp Capture  Groups:
                  # [atag][btag]$3 | COMPLETE
                  # [halftag]$5    | INCOMPLETE
                  # $6             | INCOMPLETE

                #-----| FIX INCOMPLETE TAGS |-----{{{
                my $atag = \$1;          # Anthro Tags reff
                my $btag = \$2;          # Story Tags reff
                my $halftag = \$4;       # HalfTag reff
                my @ops = (\$3,\$5,\$6); # Operators reff
                # HALFTAG
                # IF: Current Line is HalfTag
                if ($kind eq "half")
                {
                   $line =~ s/.*/\[\]$&/; # Insert Single Tag Bracket
                }

                # Only Operater(s)
                # ELSIF: Current Line only cosists of Operators
                elsif (${$ops[2]})
                {
                   $line =~ s/.*/\[\]\[\]$&/; # Insert Empty Tag Brackets
                }
                #}}}
                #-----| TAGLINE CLEANING |-----{{{
                $line =~ /$dspt->{tags}{re}/; # Reset Capture Groups Now That...
                                              # ... All Taglines are Complete

                $line =~ s/$d+\s*($d)/$1/g;              # Remove extra Commas to the Left
                $line =~ s/($d)\s*$d+/$1/g;              # Remove extra Commas to the Right
                $line =~ s/\s*(\[)\s*/$1/g;              # Remove extra Spaces around Left Brace
                $line =~ s/\s*(\])[ ]*([^ ])[ ]*/$1$2/g; # Remove extra Spaces around Right Brace
                $line =~ s/$d*(\[)$d/$1/g;               # Remove extra Commas around Left Brace
                $line =~ s/$d*(\])$d*/$1/g;              # Remove extra Commas around Right Brace
                $line =~ s/($d\s)\s*/$1/g;               # Remove extra Spaces Right of Comma
                $line =~ s/\s*($d)/$1/g;                 # Remove extra Spaces Left of Comma
                $line =~ s/($d)([^ ])/$1 $2/g;           # IF no space after comma, add one
                $line =~ s/\s*(\s)\s*/$1/g;              # Remove Extra Spaces
                #}}}
                #-----| DUPLICATE TAGS |-----{{{
                $line =~ /$dspt->{tags}{re}/; # reset match variables now that...
                my @past_matches = '';
                my $duplicate_found = 0;
                my @dupe;
                while ($line =~ /[^\[\],\n]+/g)
                {
                  my $match = $&;
                  $match =~ s/^\s//;

                  for my $ele (uniq(@past_matches))
                  {
                    if ($ele eq $match)
                    {
                      $duplicate_found=1;
                      push @dupe, $match;
                    }
                    else
                    {
                      $duplicate_found=0;
                    }
                  }
                  push @past_matches, $match;
                }
                no warnings 'uninitialized';
                for (@dupe)
                {
                 $line =~ s/(?<!\w)\s$_\?*$d|(\[)$_\?*(\])|$d\s$_\?*(\])|(\[)$_$d\s/$1$2$3$4/;
                }
                use warnings;
                #}}}
               #-----| DUPLICATE OPS |-----{{{
               $line =~ /$dspt->{tags}{re}/; # reset match variables now that...
               @past_matches = '';
               $duplicate_found = 0;
               @dupe =();
               $line =~ /\]\[.*\]\K[^\]\[]+/;
               my $OPS = $&;

               while ($OPS =~ /./g)
               {
                 my $match = $&;
                 for my $ele (uniq(@past_matches))
                 {
                   if ($ele eq $match)
                   {
                     $duplicate_found=1;
                     push @dupe, $match;
                   }
                   else
                   {
                     $duplicate_found=0;
                   }
                 }
                 push @past_matches, $match;
               }
               for (@dupe)
               {
                $OPS =~ s/\Q$_\E//;

                $line =~ s/\][^\]\[]+/\]$OPS/;
               }#}}}
                #-----| SUBSTITUTING OPERATORS |-----{{{
                $line =~ /$dspt->{tags}{re}/; # reset match variables now that...
                                              # ...tagline in complete
                my %special =  %{$dspt->{tags}{special}};
                my %tag_subs = %{$special{tag_subs}};
                my %OPS = %{$special{ops}};
                for my $key (keys %OPS)
                {
                  my $ARRAY = $OPS{$key};
                  my $NEW = @$ARRAY[1];
                  my $op = @$ARRAY[0];
                  my $old_list = @$ARRAY[2];
                  if ($line =~ /\].*\Q$op\E/)
                  {
                    $line =~ s/\Q$op\E//g; # REMOVE $
                    #-----| CREATE OLD |-----{{{
                    for my $OLD (@$old_list)
                    {
                      my $EXPR = '';
                      my $COUNT = 0;
                      my $LEN =  scalar @{$tag_subs{full_sub}};

                      #-----| CREATE REGEXP |-----#
                      for my $string (@{$tag_subs{full_sub}})
                      {
                        $COUNT++;
                        $EXPR .= $string;
                        if ($COUNT > $LEN)
                        {
                          next;
                        }
                        $EXPR .=$OLD;
                      }
                      my $regexp = qr/$EXPR/;
                      $line =~ s/(?<!\w)\s$OLD\?*$d|(\[)$OLD\?*(\])|$d\s$OLD\?*(\])|(\[)$OLD$d\s/$1$2$3$4/ig;
                    }
                    #}}}

                    #-----| INSERT NEW |-----#
                    $line =~ /$dspt->{tags}{re}/;
                    # IF:
                    if (!$$btag)
                    {
                      $line =~ s/$regexp{tag_sub}/$&$NEW/;
                    }
                    # ELSE
                    else
                    {
                      $line =~ s/$regexp{tag_sub}/$&$NEW$d /;
                    }
                  }
                }
              }
              #}}}
              #-----| MISSING TAGLINES |-----{{{
              # URL
              # ELSIF: current line matches url regexp
              elsif ($line =~ /$dspt->{url}{re}/)
              {
                $line =~ s/$dspt->{url}{re}/\[\]\[\]\n$&/;
              }
              # UNKOWN
              # ELSE: currlent line doesn't follow any regexp put forth
              else
              {
                $flag=0; # set flag to zero to let program know that it is done...
                         # ... fixing line that was under title for there is no regexp...
                         # ... embedded code for unkown
              }
              #}}}

            }

            #-----| TITLE FINDING AND FLAGGING |-----{{{
            # TITLE
            # ELSIF: current line matches title regexp
            elsif ($line =~ /$dspt->{title}{re}/)
            {
            }
            push @FIXED, $line; # push fixed line to the fixed array
            #}}}

          }# END of WHILE
          #}}}
          #-----| WRITE FIXES BACK |-----{{{
          seek $fh, 0, 0;

          # FOR: every $line of the fixed array
          for my $line  (@FIXED)
          {
            $line =~ s/
//g; # remove carriage returns
            print $fh $line;
          }
          truncate $fh, tell($fh);

        close $fh;

        return @FIXED;
        #}}}
      };

      #}}}
    #-----|| lib_fmt() ||-------{{{
      sub lib_fmt {
        my $lib_array_ref = shift;
        my @lib_array = @$lib_array_ref;
        my @fmt_lib;

        # Master | Julia   | Markdown
        # By     | by\s    | ##
        # >title | "title" | [title](url)
        # #      | #       | >
        # ,      | \s      | \s
        #        | ;;      | ;``;

        for my $line (@lib_array) {

          if ($line =~ /^>/) {
          # If: Title

            $line =~ s/>(.*)/"$1"/;
            $line =~ s/\s+(")/$1/;

          }

          elsif ($line =~ /^~/) {
          # ELSIf: Dscr
            $line =~ s/~(.*)/#$1/;
          }

          # IF: Author
          elsif ($line =~ /^By/) {
            $line =~ s/By(.*)/by$1/;

          }

          push @fmt_lib, $line;

        }

        open (my $fh, '>', $Kosher_Catalog);
          print $fh @fmt_lib;
        close $fh;
      }

      #}}}
}