Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/perl
- use warnings;
- use strict;
- use autodie;
- use feature qw(say);
- use File::Slurp qw(read_file);
- use String::Util qw(trim);
- my @lines=read_file('file');
- my $grps=getGroups(\@lines);
- processGroups(\@lines,$grps);
- printLines(\@lines,$grps);
- sub printLines {
- my ($lines,$grps) = @_;
- for my $grp (@$grps) {
- my $ind=$grp->{ind};
- my $line=$lines->[$ind->[0]];
- if (@$ind==1) {
- print $line;
- } else {
- my ($spc)=$line=~/^(\s+)/;
- $spc="" if ! defined $spc;
- my @fld=split(/(\s+)/,trim($line));
- $fld[2]=$grp->{min};
- $fld[4]=$grp->{max};
- $fld[8]=$grp->{score};
- $fld[10]=$grp->{strand};
- say $spc,join "",@fld;
- }
- }
- }
- sub processGroups {
- my ($lines,$grps) = @_;
- for my $grp (@$grps) {
- my $ind=$grp->{ind};
- if (@$ind==2) {
- my $gscore=0; my $gstrand;
- my $min=$ind->[0];
- my $max=$ind->[1];
- for ($min..$max) {
- my @fld=split(" ",$lines->[$_]);
- my ($score,$strand)=@fld[4,5];
- if ($score>$gscore) {
- $gscore=$score;
- $gstrand=$strand;
- }
- }
- my $gmin=@{[split(" ",$lines->[$min])]}[1];
- my $gmax=@{[split(" ",$lines->[$max])]}[2];
- $grp->{score}=$gscore;
- $grp->{strand}=$gstrand;
- $grp->{min}=$gmin;
- $grp->{max}=$gmax;
- }
- }
- }
- # Assume start and end columns are sorted.
- sub getGroups {
- my ($lines) = @_;
- my @grps;
- my $grp=[];
- my $pstart=-1; my $pend=-1;
- my $start; my $end; my $gend=0;
- for my $i (0..$#$lines) {
- my @fld=split(" ",$lines->[$i]);
- if (@fld == 6) { #skip irrelevant lines.. e.g. starting with stars
- ($start,$end)=@fld[1,2];
- if ($pstart==-1) {
- $pstart=$start; $pend=$end;
- $grp=[$i];
- }
- #assert ( $start<$end)
- #assert ( $start>=$pstart)
- if ($start<=$pend) {
- $gend=$i;
- } else {
- push(@$grp,$gend) if $gend;
- push(@grps,{ind=>$grp});
- $grp=[$i]; $gend=0;
- }
- $pstart=$start; $pend=$end;
- }
- }
- return \@grps;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement