Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import glob
- import xml.etree.ElementTree as ET
- import pandas as pd
- current_dir = os.getcwd()
- directory = os.path.join(current_dir, "data")
- xml_files = glob.glob(os.path.join(directory, "*.xml"))
- data = pd.DataFrame()
- # Обработваме всички XML файлове и добавяме данни директно в DataFrame
- for file in xml_files:
- tree = ET.parse(file)
- xml_root = tree.getroot()
- # Извличаме данни в DataFrame
- for child in xml_root:
- item = {elem.tag: elem.text for elem in child} # dict comprehension
- # Добавяме текущия ред към DataFrame
- data = pd.concat([data, pd.DataFrame([item])], ignore_index=True)
- data
- #Check type
- print(data.dtypes)
- # Change type
- data['price'] = pd.to_numeric(data['price'], errors='coerce')
- data['quantity'] = pd.to_numeric(data['quantity'], errors='coerce')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement