Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # kmer is 2k bits (represented as an integer)
- def abb_minimizer_dna(kmer, w, k, seed):
- # First bits of score are the first 2 bits of the k-mer (since DNA)
- score = kmer >> (2*(k-1))
- for i in range(1, k):
- # Extract the next bit
- bits = kmer >> (2*(k-1-i)) & 3
- if (bits == 0):
- # if A-> shift score to the left and insert 1
- score = (score << 1) + 1
- else:
- # If not A -> just shift score the left
- score = (score << 1)
- # Hash for tie breaking (less than 32**k)
- unique_id= get_kmer_id_dna(k, kmer, seed)
- return 32**k * score + unique_id
- def threshold_minimizer_dna(kmer, w, k, seed):
- # First bits of score are the first 2 bits of the k-mer (since DNA)
- first_nuc = kmer >> (2*(k-1))
- streak = k-1
- for i in range(1, k):
- # Extract the next bit
- bits = kmer >> (2*(k-1-i)) & 3
- if (bits == 0):
- streak = i
- break
- # Hash for tie breaking (less than 32**k)
- unique_id= get_kmer_id_dna(k, kmer, seed)
- return 32**k * first_nuc + 16**k * (k-streak) + unique_id
Advertisement
Advertisement