Advertisement
Diadon81

List of Pharmacies

Nov 7th, 2024
35
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.58 KB | None | 0 0
  1. import os
  2. import requests
  3. from bs4 import BeautifulSoup
  4. import csv
  5.  
  6. # Base URL of the pharmacie.lu site
  7. base_url = "https://www.pharmacie.lu"
  8. pharmacies_url = f"{base_url}/pharmacies"
  9.  
  10. # Output CSV file
  11. output_csv = "pharmacy_contacts.csv"
  12.  
  13. # Initialize a session for faster performance
  14. session = requests.Session()
  15.  
  16. def get_pharmacy_links():
  17.     """Fetch all pharmacy links from the main pharmacies page."""
  18.     response = session.get(pharmacies_url)
  19.     if response.status_code != 200:
  20.         print("Failed to fetch the pharmacies page.")
  21.         return []
  22.  
  23.     soup = BeautifulSoup(response.text, "html.parser")
  24.     pharmacy_links = []
  25.    
  26.     # Find all pharmacy cards and extract links
  27.     for card in soup.select(".pharmacies-card a"):
  28.         pharmacy_link = requests.compat.urljoin(base_url, card["href"])
  29.         pharmacy_links.append(pharmacy_link)
  30.    
  31.     print(f"Found {len(pharmacy_links)} pharmacy links.")
  32.     return pharmacy_links
  33.  
  34. def extract_pharmacy_info(pharmacy_url):
  35.     """Extract pharmacy name and email from the pharmacy page."""
  36.     response = session.get(pharmacy_url)
  37.     if response.status_code != 200:
  38.         print(f"Failed to fetch {pharmacy_url} (status code: {response.status_code})")
  39.         return None
  40.  
  41.     soup = BeautifulSoup(response.text, "html.parser")
  42.  
  43.     # Extract the pharmacy name
  44.     name_tag = soup.find("h1", class_="pharmacie-page-title")
  45.     pharmacy_name = name_tag.get_text(strip=True) if name_tag else "Unknown"
  46.  
  47.     # Extract the email address
  48.     email_tag = soup.find("pharmacie", email=True)
  49.     email = email_tag["email"] if email_tag else "No email provided"
  50.  
  51.     print(f"Extracted info for {pharmacy_name}")
  52.     return {
  53.         "Name": pharmacy_name,
  54.         "Email": email
  55.     }
  56.  
  57. def save_to_csv(pharmacy_data):
  58.     """Save the list of pharmacy information to a CSV file."""
  59.     with open(output_csv, mode="w", newline='', encoding="utf-8") as file:
  60.         writer = csv.DictWriter(file, fieldnames=["Name", "Email"])
  61.         writer.writeheader()
  62.         writer.writerows(pharmacy_data)
  63.     print(f"Data saved to {output_csv}")
  64.  
  65. def main():
  66.     # Get all individual pharmacy links from the main page
  67.     pharmacy_links = get_pharmacy_links()
  68.  
  69.     # List to hold extracted data
  70.     pharmacy_data = []
  71.  
  72.     # Fetch and extract data for each pharmacy
  73.     for pharmacy_url in pharmacy_links:
  74.         info = extract_pharmacy_info(pharmacy_url)
  75.         if info:
  76.             pharmacy_data.append(info)
  77.  
  78.     # Save all data to CSV
  79.     save_to_csv(pharmacy_data)
  80.  
  81. if __name__ == "__main__":
  82.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement