Advertisement
shoaib-santo

Keyword Count Analysis

Oct 26th, 2024
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.28 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4. import re
  5.  
  6. # Function to get the webpage content
  7. def get_webpage_content(url):
  8.     try:
  9.         response = requests.get(url)
  10.         response.raise_for_status()  # Raise an error for bad responses
  11.         return response.text
  12.     except requests.exceptions.RequestException as e:
  13.         print(f"Error fetching the URL: {e}")
  14.         return None
  15.  
  16. # Function to read keywords from a file
  17. def read_keywords_from_file(filename):
  18.     try:
  19.         with open(filename, 'r', encoding='utf-8') as file:
  20.             keywords = [line.strip().lower() for line in file if line.strip()]
  21.         return keywords
  22.     except FileNotFoundError:
  23.         print(f"File {filename} not found.")
  24.         return []
  25.  
  26. # Function to count keyword occurrences accurately using regular expressions
  27. def count_keywords(content, keywords):
  28.     # Remove HTML tags using BeautifulSoup
  29.     soup = BeautifulSoup(content, 'html.parser')
  30.     text = soup.get_text().lower()  # Convert text to lowercase for case-insensitive search
  31.  
  32.     # Dictionary to store keyword counts
  33.     keyword_count = {}
  34.  
  35.     # Count occurrences of each keyword using regex
  36.     for keyword in keywords:
  37.         # Create a regex pattern to match the keyword as a whole word
  38.         pattern = rf'\b{re.escape(keyword)}\b'
  39.         count = len(re.findall(pattern, text))
  40.         keyword_count[keyword] = count
  41.  
  42.     return keyword_count
  43.  
  44. # Function to export the result to CSV
  45. def export_to_csv(keyword_count, filename):
  46.     df = pd.DataFrame(list(keyword_count.items()), columns=['Keyword', 'Count'])
  47.     df.to_csv(filename, index=False, encoding='utf-8')
  48.     print(f"Keyword counts have been exported to {filename}")
  49.  
  50. # Main execution
  51. if __name__ == "__main__":
  52.     # Read keywords from the file
  53.     keywords = read_keywords_from_file('keywords.txt')
  54.  
  55.     # URL of the webpage to analyze
  56.     url = 'https://alllandlordcertificates.co.uk/fire-risk-assessment'  # Replace with your desired URL
  57.  
  58.     # Fetch the webpage content
  59.     content = get_webpage_content(url)
  60.  
  61.     if content and keywords:
  62.         # Count keywords
  63.         keyword_count = count_keywords(content, keywords)
  64.  
  65.         # Export result to CSV
  66.         export_to_csv(keyword_count, 'keyword_counts.csv')
  67.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement