Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- "use strict";
- // Node script to sequentially perform HTTP requests on a site
- // - with a varying query parameter,
- // - extracting data from the HTML response
- // - collecting results in an array
- // - finally writing them to a file
- //
- // The HTTP requests are performed one by one, synchronously, waiting for the result,
- // in order to avoid server overload
- // Starting the requests asynchronously instead could result in a server side HTTP error 502
- //
- // In this example, we fetch data from our local version of https://divinumofficium.com/
- // incrementing the query parameter 'date' one by one
- //
- // For very large result sets, one might use a buffered write stream instead
- // In this case, in Node, one may use a stream with fs.createWriteStream(),
- // or the low level API fs.openSync(), generating a file descriptor like in C
- //
- // We are using the simple library node-html-parser for extracting the data from the HTML response.
- // jsdom would be an alternative, but would be too heavyweight for a simple selector extraction,
- // the purpose of jsdom seems to be more to provide a kind of browser emulation
- const fs = require('fs');
- const html = require('node-html-parser');
- const request = require('request');
- const RESULT_FILE = 'missa.json';
- const URL = 'http://localhost/missa/cgi-bin/missa/missa.pl';
- // startDate (yyyy-mm-dd) and number of iterations can be passed as command line parameters
- const startDate = process.argv.length > 2 ? process.argv[2] : '2021-01-01';
- const numDays = process.argv.length > 3 ? process.argv[3] : 10;
- run(startDate,numDays);
- // Main loop: call getMissa() for successive dates, collecting results
- async function run(startDate,numDays) {
- let date = new Date(startDate),
- results = [];
- for (let i=0;i<numDays;i++) {
- try {
- results.push( await getMissa(date) );
- } catch(e) {
- console.error( e );
- }
- date.setDate(date.getDate()+1)
- }
- writeResults(results);
- }
- // Perform the HTTP request and return extracted data as a promise
- function getMissa(date) {
- const dateFormatted = format(date);
- return fetchURL(`${URL}?date=${dateFormatted}`,extractResult);
- function extractResult(response,body) {
- let title = extract(body);
- return {date:dateFormatted, title:title};
- }
- }
- // Parse HTML document and extract some data
- function extract( htmlSourceCode ) {
- const doc = html.parse( htmlSourceCode );
- return( doc.querySelector("font").text); // Extract content of first <font>-Tag in HTML doc.
- }
- // Performing an HTTP request as promise
- function fetchURL(url,onResponse) {
- return new Promise((resolve,reject)=>{
- request(url, (error, response, body) => {
- if (error) return reject(error);
- if (response.statusCode != 200) {
- return reject('Invalid status code <' + response.statusCode + '> for url '+url);
- }
- let result = onResponse(response,body);
- resolve(result);
- });
- })
- }
- // Date format mm-dd-yyyy
- function format(date) {
- const day = date.getUTCDate();
- const month = date.getUTCMonth()+1;
- const year = date.getUTCFullYear();
- return `${month}-${day}-${year}`;
- }
- // Write results as JSON array to file
- function writeResults(results) {
- console.log(`${results.length} entries generated`);
- fs.writeFileSync(
- RESULT_FILE,
- JSON.stringify(results,null,2)
- );
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement