Advertisement
SpaceRiver

Untitled

Feb 17th, 2025 (edited)
126
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. # kmer is 2k bits (represented as an integer)
  2.  
  3.  
  4.  
  5. def abb_minimizer_dna(kmer, w, k, seed):
  6.     # First bits of score are the first 2 bits of the k-mer (since DNA)
  7.     score = kmer >> (2*(k-1))
  8.     for i in range(1, k):
  9.         # Extract the next bit
  10.         bits = kmer >> (2*(k-1-i)) & 3
  11.         if (bits == 0):
  12.             # if A-> shift score to the left and insert 1
  13.             score = (score << 1) + 1
  14.         else:
  15.             # If not A -> just shift score the left
  16.             score = (score << 1)
  17.     # Hash for tie breaking (less than 32**k)
  18.     unique_id= get_kmer_id_dna(k, kmer, seed)
  19.     return 32**k * score + unique_id
  20.  
  21.  
  22. def threshold_minimizer_dna(kmer, w, k, seed):
  23.     # First bits of score are the first 2 bits of the k-mer (since DNA)
  24.     first_nuc = kmer >> (2*(k-1))
  25.     streak = k-1
  26.     for i in range(1, k):
  27.         # Extract the next bit
  28.         bits = kmer >> (2*(k-1-i)) & 3
  29.         if (bits == 0):
  30.             streak = i
  31.             break
  32.     # Hash for tie breaking (less than 32**k)
  33.     unique_id= get_kmer_id_dna(k, kmer, seed)
  34.     return 32**k * first_nuc + 16**k * (k-streak) + unique_id
Advertisement
Comments
Add Comment
Please, Sign In to add comment
Advertisement