Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- gen = ''
- f = open('sequence.fasta', 'r')
- f = f.readlines()
- for i in range(0,len(f)):
- if (i % 10000 == 0):
- print(f'{i} strings passed')
- f[i] = f[i][:len(f[i])-1]
- gen += f[i]
- print(f'Total len is {len(gen)}')
- print(f'Amount of N is {gen.count("N")}')
- e = []
- gen += 'g'
- last = 0
- for i in range(1, len(gen)):
- if (gen[i] == 'N' and gen[i-1] != 'N'):
- last = i
- elif (gen[i] != 'N' and gen[i-1] == 'N'):
- e.append(i - last)
- e.sort()
- ans = 0
- min_len = 1000
- for i in e:
- if i >= min_len:
- ans += 1
- print(f'{ans} strings of N are longer than {min_len}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement