Advertisement
artur99

Untitled

Sep 4th, 2016
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. var http = require("http");
  2. var https = require("https");
  3. var fs = require('fs');
  4.  
  5. var start = 116106;
  6. var stop  = 117107;
  7.  
  8. // Câte requesturi să se facă maxim odată
  9. var at_once = 200;
  10. // Pauza de la momentul când a fost atins maximul
  11. var delay = 2000;
  12.  
  13.  
  14. var in_quee = 0;
  15. var found = 0;
  16. var checked = 0;
  17. start = parseInt(start, 10);
  18. stop = parseInt(stop, 10);
  19. function process(url, id){
  20.     //var url = 'http://www.internetculturale.it/jmms/iccuviewer/iccu.jsp?id=oai%3Awww.internetculturale.sbn.it%2FTeca%3A20%3ANT0000%3ACNMD\\0000117105';
  21.     var req = (url.startsWith("https://")?https:http).get(url, function(res) {
  22.         var resp = 0;
  23.         in_quee--;
  24.         checked++;
  25.         res.on('data', function(chunk){resp+=chunk.length;});
  26.         res.on('end', function(){
  27.             //resp;
  28.             if(resp > 50){
  29.                 //este valid
  30.                 found++;
  31.                 link = 'http://www.internetculturale.it/jmms/iccuviewer/iccu.jsp?id=oai%3Awww.internetculturale.sbn.it%2FTeca%3A20%3ANT0000%3ACNMD\\\\'+id+'&mode=all&teca=MagTeca+-+ICCU';
  32.                 fs.appendFile('valid_links.txt', link+"\r\n");
  33.                 console.log('['+id+'] Found document... ('+found+' found, '+checked+' checked)');
  34.             }else{
  35.                 console.log('['+id+'] Invalid document... ('+found+' found, '+checked+' checked)');
  36.             }
  37.         });
  38.     });
  39.     req.on("error", function(err,data) {
  40.       console.log("[Error]");
  41.       setTimeout(function(){process(url, id);}, parseInt(Math.random()*8000));
  42.     });
  43.     req.setTimeout(parseInt(Math.random()*8000)+10000, function( ) {
  44.       process(url, id);
  45.     });
  46. }
  47. function repeater(i){
  48.     if(i>stop) return;
  49.     if(in_quee>=at_once){
  50.         //console.log(checked+" checked documents / "+found+" valid documents found");
  51.         setTimeout(function(){
  52.             repeater(i);
  53.         }, 2000);
  54.     }else{
  55.         in_quee++;
  56.         var id = ("0000000000" + i).slice(-10);
  57.         console.log("Requesting check for "+id+"...");
  58.         process('http://www.internetculturale.it/jmms/magparser?teca=MagTeca+-+ICCU&mode=all&fulltext=0&id=oai%3Awww.internetculturale.sbn.it%2FTeca%3A20%3ANT0000%3ACNMD%5C%5C'+id, id);
  59.         return repeater(i+1);
  60.     }
  61. }
  62. repeater(start);
  63. //for(var i=start;i<=stop;i++){
  64.    
  65.    
  66. //}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement