Advertisement
jhangyu

VCF Combination.sh

Jun 15th, 2018
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.78 KB | None | 0 0
  1. sum_id=
  2. for ID in $(ls | grep -P -o '.*(?=(.vcf$))')
  3. #List all VCF files and Compress / Index
  4. do
  5.     echo Compress and index the ${ID}.vcf files
  6.     if [ ! -f ${ID}.vcf.gz.tbi ] ;then
  7.         if [ ! -f ${ID}.vcf.gz ] ;then
  8.             bgzip ${ID}.vcf
  9.             pids="$pids $!"
  10.             wait $pids
  11.         fi
  12.         tabix -p vcf ${ID}.vcf.gz
  13.         pids="$pids $!"
  14.         wait $pids
  15.     fi
  16. done
  17.  
  18. for ID in $(ls | grep -P -o '.*(?=(\.vcf.gz$))')
  19. # Get the ID files in list
  20. do
  21.     sum_id="$sum_id $ID"
  22. done
  23. echo $sum_id
  24.  
  25. echo Pick 2 in all IDs
  26. set -- $sum_id
  27. # Create ID sets
  28. for a; do
  29.     if [ "$#" -gt 0 ]; then shift; fi
  30.     for b; do
  31.         printf "%s - %s\n" "$a" "$b"
  32.         bcftools isec -p ${a}_${b} ${a}.vcf.gz ${b}.vcf.gz
  33.         pids="$pids $!"
  34.         echo Finish intersection ${a}_${b}
  35.         wait $pids
  36.         if [ ! -d two_pair ] ;then
  37.             mkdir two_pair
  38.         fi
  39.         cd ${a}_${b}
  40.         mv 0002.vcf ../two_pair/${a}_${b}.vcf
  41.         cd ..
  42.         echo
  43.         echo
  44.     done
  45. done
  46.  
  47. echo Compress and index the two_pair vcf files
  48. cd two_pair
  49.     for ID in $(ls | grep -P -o '.*(?=(.vcf$))')
  50.     do
  51.         if [ ! -f ${ID}.vcf.gz.tbi ] ;then
  52.             if [ ! -f ${ID}.vcf.gz ] ;then
  53.                 echo Compress ${ID}.vcf
  54.                 bgzip ${ID}.vcf
  55.                 pids="$pids $!"
  56.                 wait $pids
  57.             fi
  58.             echo Index ${ID}.vcf
  59.             tabix -p vcf ${ID}.vcf.gz
  60.             pids="$pids $!"
  61.             wait $pids
  62.         fi
  63.     done
  64. cd ..
  65.  
  66. echo Pick 3 in all IDs
  67. set -- $sum_id
  68. for a; do
  69.     if [ "$#" -gt 0 ]; then shift; fi
  70.     for b; do
  71.         if [ "$#" -gt 0 ]; then shift; fi
  72.         for c; do
  73.             printf "%s - %s - %s\n" "$a" "$b" "$c"
  74.             bcftools isec -p ${a}_${b}_${c} two_pair/${a}_${b}.vcf.gz ${c}.vcf.gz
  75.             pids="$pids $!"
  76.             echo Finish intersection ${a}_${b}_${c}
  77.             wait $pids
  78.             if [ ! -d three_pair ] ;then
  79.                 mkdir three_pair
  80.             fi
  81.             cd ${a}_${b}_${c}
  82.             mv 0002.vcf ../three_pair/${a}_${b}_${c}.vcf
  83.             cd ..
  84.             echo
  85.             echo
  86.         done
  87.     done
  88. done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement