Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import requests
- from bs4 import BeautifulSoup
- import csv
- # Base URL of the pharmacie.lu site
- base_url = "https://www.pharmacie.lu"
- pharmacies_url = f"{base_url}/pharmacies"
- # Output CSV file
- output_csv = "pharmacy_contacts.csv"
- # Initialize a session for faster performance
- session = requests.Session()
- def get_pharmacy_links():
- """Fetch all pharmacy links from the main pharmacies page."""
- response = session.get(pharmacies_url)
- if response.status_code != 200:
- print("Failed to fetch the pharmacies page.")
- return []
- soup = BeautifulSoup(response.text, "html.parser")
- pharmacy_links = []
- # Find all pharmacy cards and extract links
- for card in soup.select(".pharmacies-card a"):
- pharmacy_link = requests.compat.urljoin(base_url, card["href"])
- pharmacy_links.append(pharmacy_link)
- print(f"Found {len(pharmacy_links)} pharmacy links.")
- return pharmacy_links
- def extract_pharmacy_info(pharmacy_url):
- """Extract pharmacy name and email from the pharmacy page."""
- response = session.get(pharmacy_url)
- if response.status_code != 200:
- print(f"Failed to fetch {pharmacy_url} (status code: {response.status_code})")
- return None
- soup = BeautifulSoup(response.text, "html.parser")
- # Extract the pharmacy name
- name_tag = soup.find("h1", class_="pharmacie-page-title")
- pharmacy_name = name_tag.get_text(strip=True) if name_tag else "Unknown"
- # Extract the email address
- email_tag = soup.find("pharmacie", email=True)
- email = email_tag["email"] if email_tag else "No email provided"
- print(f"Extracted info for {pharmacy_name}")
- return {
- "Name": pharmacy_name,
- "Email": email
- }
- def save_to_csv(pharmacy_data):
- """Save the list of pharmacy information to a CSV file."""
- with open(output_csv, mode="w", newline='', encoding="utf-8") as file:
- writer = csv.DictWriter(file, fieldnames=["Name", "Email"])
- writer.writeheader()
- writer.writerows(pharmacy_data)
- print(f"Data saved to {output_csv}")
- def main():
- # Get all individual pharmacy links from the main page
- pharmacy_links = get_pharmacy_links()
- # List to hold extracted data
- pharmacy_data = []
- # Fetch and extract data for each pharmacy
- for pharmacy_url in pharmacy_links:
- info = extract_pharmacy_info(pharmacy_url)
- if info:
- pharmacy_data.append(info)
- # Save all data to CSV
- save_to_csv(pharmacy_data)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement