Advertisement
hakonhagland

Updated perl script

May 7th, 2014
462
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.10 KB | None | 0 0
  1. #! /usr/bin/perl
  2.  
  3. use warnings;
  4. use strict;
  5. use autodie;
  6. use feature qw(say);
  7. use File::Slurp qw(read_file);
  8. use String::Util qw(trim);
  9.  
  10. my @lines=read_file('file');
  11.  
  12. my $grps=getGroups(\@lines);
  13.  
  14. processGroups(\@lines,$grps);
  15.  
  16. printLines(\@lines,$grps);
  17.  
  18.  
  19. sub printLines {
  20. my ($lines,$grps) = @_;
  21.  
  22. for my $grp (@$grps) {
  23. my $ind=$grp->{ind};
  24. my $line=$lines->[$ind->[0]];
  25. if (@$ind==1) {
  26. print $line;
  27. } else {
  28. my ($spc)=$line=~/^(\s+)/;
  29. $spc="" if ! defined $spc;
  30. my @fld=split(/(\s+)/,trim($line));
  31. $fld[2]=$grp->{min};
  32. $fld[4]=$grp->{max};
  33. $fld[8]=$grp->{score};
  34. $fld[10]=$grp->{strand};
  35. say $spc,join "",@fld;
  36. }
  37. }
  38. }
  39.  
  40. sub processGroups {
  41. my ($lines,$grps) = @_;
  42.  
  43. for my $grp (@$grps) {
  44. my $ind=$grp->{ind};
  45. if (@$ind==2) {
  46. my $gscore=0; my $gstrand;
  47. my $min=$ind->[0];
  48. my $max=$ind->[1];
  49. for ($min..$max) {
  50. my @fld=split(" ",$lines->[$_]);
  51. my ($score,$strand)=@fld[4,5];
  52. if ($score>$gscore) {
  53. $gscore=$score;
  54. $gstrand=$strand;
  55. }
  56. }
  57. my $gmin=@{[split(" ",$lines->[$min])]}[1];
  58. my $gmax=@{[split(" ",$lines->[$max])]}[2];
  59. $grp->{score}=$gscore;
  60. $grp->{strand}=$gstrand;
  61. $grp->{min}=$gmin;
  62. $grp->{max}=$gmax;
  63. }
  64. }
  65. }
  66.  
  67. # Assume start and end columns are sorted.
  68. sub getGroups {
  69. my ($lines) = @_;
  70.  
  71. my @grps;
  72. my $grp=[];
  73. my $pstart=-1; my $pend=-1;
  74. my $start; my $end; my $gend=0;
  75. for my $i (0..$#$lines) {
  76. my @fld=split(" ",$lines->[$i]);
  77. if (@fld == 6) { #skip irrelevant lines.. e.g. starting with stars
  78. ($start,$end)=@fld[1,2];
  79. if ($pstart==-1) {
  80. $pstart=$start; $pend=$end;
  81. $grp=[$i];
  82. }
  83. #assert ( $start<$end)
  84. #assert ( $start>=$pstart)
  85. if ($start<=$pend) {
  86. $gend=$i;
  87. } else {
  88. push(@$grp,$gend) if $gend;
  89. push(@grps,{ind=>$grp});
  90. $grp=[$i]; $gend=0;
  91. }
  92. $pstart=$start; $pend=$end;
  93. }
  94. }
  95. return \@grps;
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement