Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from Bio import SeqIO
- import re
- def cercapatterns(input_filename, patterns, output_filename):
- results = {}
- with open(input_filename, 'r') as file:
- for record in SeqIO.parse(file, 'fasta'):
- trovati = set()
- results[record.description] = []
- for pattern in patterns:
- matches = [(m.start(), m.group()) for m in re.finditer(pattern, str(record.seq))]
- for match in matches:
- if match not in trovati:
- trovati.add(match)
- else:
- matches.remove(match)
- if matches:
- results[record.description].append((len(record.seq), pattern, matches))
- with open(output_filename, 'w') as outfile:
- for gene, values in results.items():
- outfile.write(f"Risultati per il gene: {gene}\n")
- for length, pattern, matches in values:
- outfile.write(f"Trovato/i {len(matches)} occorrenze del motivo {pattern}\n")
- for position, match_str in matches:
- match_str = match_str[:1].lower() + match_str[1:-1] + match_str[-1:].lower()
- outfile.write(f"Trovato il motivo {match_str} alla posizione {position}\n")
- outfile.write("\n")
- motivi = ['.AGT.{3,11}ACT.', '.TAGT.{3,11}ATC.', '.AG.{3,11}ACT.', '.AGT.{3,11}CT.']
- cercapatterns(input_file, motivi, output_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement