Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/bash
- op_perl0() {
- perl -lnE '
- BEGIN{ say "filename A G C T N" }
- unless (($. + 2) % 4) {
- $i = 0;
- ++$b{$c} while ($c=substr($_, $i++, 1));
- # s/(.)/++$b{$1}/ge;
- # for ($i = 0; $i < length; $i++) {
- # ++$b{substr($_, $i, 1)}
- # }
- }
- $,=" ", say($c, @b{A,G,C,T,N}), $c=$ARGV, undef %b if (eof() || (($c//=$ARGV) ne $ARGV))
- ' *.fastq | column -t -o ' | '
- }
- op_perl1() {
- perl -lnE '
- BEGIN{ say "filename A G C T N" }
- map {++$b{$_}} split// unless ($.+2)%4;
- $,=" ", say($c, @b{A,G,C,T,N}), $c=$ARGV, undef %b if (eof() || (($c//=$ARGV) ne $ARGV))
- ' *.fastq | column -t -o ' | '
- }
- op_perl2() {
- perl -lE '
- BEGIN {say "filename A G C T N"}
- while ($f = shift) {
- %b = ();
- open(F, $f) and @d = <F>;
- for ($i = 1; $i <= $#d; $i += 4) {
- map {++$b{$_}} split //, $d[$i];
- }
- $,=" ", say $f, @b{A, G, C, T, N};
- }
- ' *.fastq | column -t -o ' | '
- }
- op_python() {
- python -c 'print("filename A G C T N") or [
- (c := __import__("collections").Counter())
- or [[
- c.update(d.strip())
- for i, d in enumerate(open(f).readlines()) if i % 4 == 1
- ],
- print(f, " ".join(str(c[k]) for k in "AGCTN"))]
- for f in __import__("sys").argv[1:]
- ]
- ' *.fastq | column -t -o ' | '
- }
- op_pipeline() {
- {
- echo filename A G C T N
- for f in *.fastq; do
- sed -n '2~4p' $f | grep -o . | sort | uniq -c | \
- sort -k2 | awk 'BEGIN{printf "'$f' "} {print $1}' | tr '\n' ' '
- echo
- done
- } | awk '{print $1, $2, $4, $3, $6, $5}' | column -t -o ' | '
- }
- op_awk() {
- awk '
- BEGIN{
- print "filename A G C G N"
- for (fn = 1; fn < ARGC; fn++) {
- ln = 0
- while ((getline line < ARGV[fn]) > 0) {
- if (++ln % 4 == 2) {
- len = length(line)
- for (i = 1; i <= len; i++)
- ++count[substr(line, i, 1)];
- }
- }
- print ARGV[fn], count["A"], count["G"], count["C"], count["T"], count["N"]
- close(ARGV[fn])
- delete count
- }
- }
- ' *.fastq | column -t -o ' | '
- }
- op_bash() {
- {
- echo filename A C G N T
- for f in *.fastq; do
- declare -A alphaCount
- lineNumber=0
- while IFS= read -r line; do
- if [[ $(($((lineNumber++)) % 4)) = 1 ]]; then
- for (( i=0; i<${#line}; i++ )); do
- alphaCount["${line:$i:1}"]=$((${alphaCount["${line:$i:1}"]:-0}+1))
- done
- fi
- done < $f
- echo "$f ${alphaCount[A]} ${alphaCount[G]} ${alphaCount[C]} ${alphaCount[T]} ${alphaCount[N]}"
- unset alphaCount
- done
- } | column -t -o ' | '
- }
- op_tcl() {
- {
- tclsh /dev/stdin *.fastq <<'EOF'
- puts "filename A G C T N"
- foreach filename $argv {
- set fh [open $filename]
- set line_number 0
- set counter [dict create]
- while {[gets $fh line] >= 0} {
- if {[expr $line_number % 4 == 1]} {
- foreach letter [split $line ""] {
- dict incr counter $letter
- }
- }
- incr line_number
- }
- # close $fh
- puts -nonewline "$filename"
- foreach letter [split AGCTN ""] {
- puts -nonewline " [dict get $counter $letter]"
- }
- puts ""
- }
- EOF
- } | column -t -o ' | '
- }
- timeit() {
- start_time="$(date -u +%s.%N)"
- $@
- end_time="$(date -u +%s.%N)"
- elapsed="$(bc <<<"$end_time-$start_time")"
- printf "eplapsed time: %.3fs\n" $elapsed
- }
- echo 'perl0:' && timeit op_perl1 && echo
- echo 'perl1:' && timeit op_perl1 && echo
- echo 'perl2:' && timeit op_perl2 && echo
- echo 'python:' && timeit op_python && echo
- echo 'awk:' && timeit op_awk && echo
- echo 'tcl:' && timeit op_tcl && echo
- echo 'pipeline:' && timeit op_pipeline && echo
- echo 'bash:' && timeit op_bash
- # python: 1.553s
- # perl2: 3.242s
- # perl0: 3.335s
- # perl1: 3.354s
- # awk: 4.512s
- # tcl: 17.465s
- # pipeline: 19.209s
- # bash: 667.201s
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement