Advertisement
pleasedontcode

"History Parser" rev_01

Mar 8th, 2025
249
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /********* Pleasedontcode.com **********
  2.  
  3.     Pleasedontcode thanks you for automatic code generation! Enjoy your code!
  4.  
  5.     - Terms and Conditions:
  6.     You have a non-exclusive, revocable, worldwide, royalty-free license
  7.     for personal and commercial use. Attribution is optional; modifications
  8.     are allowed, but you're responsible for code maintenance. We're not
  9.     liable for any loss or damage. For full terms,
  10.     please visit pleasedontcode.com/termsandconditions.
  11.  
  12.     - Project: "History Parser"
  13.     - Source Code NOT compiled for: Arduino Uno
  14.     - Source Code created on: 2025-03-09 05:46:40
  15.  
  16. ********* Pleasedontcode.com **********/
  17.  
  18. /****** SYSTEM REQUIREMENTS *****/
  19. /****** SYSTEM REQUIREMENT 1 *****/
  20.     /* Takes an AO3 history page's HTML content.  Locates */
  21.     /* the section containing the list of works. */
  22.     /* Extracts information about each work in the list. */
  23.     /* Handles potential errors during extraction.  Skips */
  24.     /* the page if the expected structure is not found. */
  25. /****** END SYSTEM REQUIREMENTS *****/
  26.  
  27. /* START CODE */
  28.  
  29. /****** DEFINITION OF LIBRARIES *****/
  30. #include <BeautifulSoup.h> // Include the BeautifulSoup library for HTML parsing
  31. #include <re.h> // Include the regex library for pattern matching
  32.  
  33. /****** FUNCTION PROTOTYPES *****/
  34. void setup(void);
  35. void loop(void);
  36.  
  37. // USER CODE START
  38. // Function to parse works on one history page from an AO3 account
  39. // and adds to works DataFrame if it's from the year specified
  40. // params:
  41. // - soup: BeautifulSoup object of an AO3 history page
  42. void parse_hist_page(BeautifulSoup soup) {    
  43.     // Initialize an empty string to hold the work list
  44.     String work_list = "";    
  45.     // Compile a regex pattern to match the work blurb group
  46.     Regex regex = re.compile("reading work blurb group *");    
  47.     // Find the <ol> element that contains the works
  48.     Element work_list_ol = soup.find("ol", {"class": "reading work index group"});    
  49.     // Check if the <ol> element exists before proceeding    
  50.     if (work_list_ol != NULL) {        
  51.         // Find all <li> elements that match the regex
  52.         work_list = work_list_ol.find_all("li", {"class": regex});        
  53.         for (Element w : work_list) {            
  54.             try {              
  55.                 // Extract work information (title, author, etc.)
  56.                 // ... (rest of your code to extract work details) ...
  57.             } catch (RuntimeError e) {                
  58.                 Serial.println("Error adding work.");                
  59.                 // Optionally print the work element for debugging
  60.                 // Serial.println(w);                
  61.                 continue; // Skip to the next work
  62.             }    
  63.         }
  64.     } else {        
  65.         Serial.println("Warning: Could not find the expected <ol> element. Skipping this page.");        
  66.         // Additional debugging information can be added here if needed.
  67.     }
  68. }
  69. // USER CODE END
  70.  
  71. void setup(void)
  72. {
  73.     // Initialize serial communication for debugging
  74.     Serial.begin(9600);
  75.     // Additional setup code can go here
  76. }
  77.  
  78. void loop(void)
  79. {
  80.     // Main code to run repeatedly can go here
  81. }
  82.  
  83. /* END CODE */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement