List of Pharmacies

import os
import requests
from bs4 import BeautifulSoup
import csv

# Base URL of the pharmacie.lu site
base_url = "https://www.pharmacie.lu"
pharmacies_url = f"{base_url}/pharmacies"

# Output CSV file
output_csv = "pharmacy_contacts.csv"

# Initialize a session for faster performance
session = requests.Session()

def get_pharmacy_links():
    """Fetch all pharmacy links from the main pharmacies page."""
    response = session.get(pharmacies_url)
    if response.status_code != 200:
        print("Failed to fetch the pharmacies page.")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    pharmacy_links = []

    # Find all pharmacy cards and extract links
    for card in soup.select(".pharmacies-card a"):
        pharmacy_link = requests.compat.urljoin(base_url, card["href"])
        pharmacy_links.append(pharmacy_link)

    print(f"Found {len(pharmacy_links)} pharmacy links.")
    return pharmacy_links

def extract_pharmacy_info(pharmacy_url):
    """Extract pharmacy name and email from the pharmacy page."""
    response = session.get(pharmacy_url)
    if response.status_code != 200:
        print(f"Failed to fetch {pharmacy_url} (status code: {response.status_code})")
        return None

    soup = BeautifulSoup(response.text, "html.parser")

    # Extract the pharmacy name
    name_tag = soup.find("h1", class_="pharmacie-page-title")
    pharmacy_name = name_tag.get_text(strip=True) if name_tag else "Unknown"

    # Extract the email address
    email_tag = soup.find("pharmacie", email=True)
    email = email_tag["email"] if email_tag else "No email provided"

    print(f"Extracted info for {pharmacy_name}")
    return {
        "Name": pharmacy_name,
        "Email": email
    }

def save_to_csv(pharmacy_data):
    """Save the list of pharmacy information to a CSV file."""
    with open(output_csv, mode="w", newline='', encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["Name", "Email"])
        writer.writeheader()
        writer.writerows(pharmacy_data)
    print(f"Data saved to {output_csv}")

def main():
    # Get all individual pharmacy links from the main page
    pharmacy_links = get_pharmacy_links()

    # List to hold extracted data
    pharmacy_data = []

    # Fetch and extract data for each pharmacy
    for pharmacy_url in pharmacy_links:
        info = extract_pharmacy_info(pharmacy_url)
        if info:
            pharmacy_data.append(info)

    # Save all data to CSV
    save_to_csv(pharmacy_data)

if __name__ == "__main__":
    main()