Advertisement
icarussiano

Untitled

Feb 19th, 2024 (edited)
774
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.44 KB | None | 0 0
  1. from Bio import SeqIO
  2. import re
  3.  
  4.  
  5. def cercapatterns(input_filename, patterns, output_filename):
  6.     results = {}
  7.     with open(input_filename, 'r') as file:
  8.         for record in SeqIO.parse(file, 'fasta'):
  9.             trovati = set()
  10.             results[record.description] = []
  11.             for pattern in patterns:
  12.                 matches = [(m.start(), m.group()) for m in re.finditer(pattern, str(record.seq))]
  13.                 for match in matches:
  14.                     if match not in trovati:
  15.                         trovati.add(match)
  16.                     else:
  17.                         matches.remove(match)
  18.                 if matches:
  19.                     results[record.description].append((len(record.seq), pattern, matches))
  20.     with open(output_filename, 'w') as outfile:
  21.         for gene, values in results.items():
  22.             outfile.write(f"Risultati per il gene: {gene}\n")
  23.             for length, pattern, matches in values:
  24.                 outfile.write(f"Trovato/i {len(matches)} occorrenze del motivo {pattern}\n")
  25.                 for position, match_str in matches:
  26.                     match_str = match_str[:1].lower() + match_str[1:-1] + match_str[-1:].lower()
  27.                     outfile.write(f"Trovato il motivo {match_str} alla posizione {position}\n")
  28.                 outfile.write("\n")
  29.  
  30.  
  31. motivi = ['.AGT.{3,11}ACT.', '.TAGT.{3,11}ATC.', '.AG.{3,11}ACT.', '.AGT.{3,11}CT.']
  32. cercapatterns(input_file, motivi, output_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement