Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import sys
- # Read and Write in python
- regex_pattern1 = r'<p.*>' # Do not delete 'r'.
- regex_pattern2 = r'</p>'
- regex_pattern3 = r'\n+'
- regex_pattern4 = r'<body>|</body>|<timedtext.*>|</timedtext>'
- regex_pattern5 = r'^This XML.*.'
- regex_pattern6 = r'\n'
- regex_pattern7 = r'\.'
- with open('text.txt', 'r') as rf:
- file_contents = rf.read();
- # print(file_contents)
- match = re.sub(regex_pattern1, "", file_contents)
- match = re.sub(regex_pattern2, "", match)
- match = re.sub(regex_pattern4, "", match)
- match = re.sub(regex_pattern5, "", match)
- match = re.sub(regex_pattern3, "\n\n", match)
- # match = re.sub(regex_pattern6, "", match)
- # match = re.sub(regex_pattern7, ".\n", match)
- print(match, end='')
- with open('text2.txt', 'w') as wf:
- wf.write(match)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement