Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- * Качаем данные с anidb
- */
- set_time_limit(0);
- $folder ='/tmp';// change save folder
- $cookie_string=':NO:COOKIE:';// insert here default_tabs, adbsess, anidbsettings & adbuin
- @mkdir($folder);
- $sad_contype ='text/plain; ';
- header('Content-type: '.$sad_contype);
- $max_id =10500;// change this constant
- // Берём список прокси
- $ch =curl_init('http://example.com/proxy/get.php?show_ip=0&order=rand');// lol, change this to any trusted proxy list
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- $buf =curl_exec($ch);
- $json =json_decode($buf);
- if (gettype($json)!='object'){
- echo 'Can not get proxy list';
- return;
- }
- if ($json->status!=0){
- echo 'Can not get proxy list. Status #'.$json->status;
- return;
- }
- // Создаём треды
- $threads=array();
- foreach ($json->list as $proxy){
- $threads[]=(object)array(
- 'proxy' =>$proxy,
- 'user_agent'=>'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',// array_rand($useragents),
- 'last_get' =>0,// Последнее время использования
- 'next_get' =>0,// Время следующего использования
- );
- }
- // Начинаем парсить
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 3);
- curl_setopt($ch, CURLOPT_TIMEOUT, 3);
- while (true){
- $k =0;
- $u =false;
- while ($k++<100000){
- $id=mt_rand(1, $max_id);
- $filename=$folder.'/dmp-'.$id.'.html';
- if (!file_exists($filename)){
- $u=true;break;
- }
- if (filemtime($filename)<time()-7*24*3600){
- $u=true;break;
- }
- }
- if (!$u){
- echo 'There is no undownloaded files. Bye bye';
- return;
- }
- echo "\nanime #".$id;
- $min_last_get_id=null;
- foreach ($threads as $thread_id => &$thread){
- if ($thread->next_get>microtime(true)){continue;}
- if ($min_last_get_id===null){
- $min_last_get_id=$thread_id;
- }else{
- if ($thread->last_get<$threads[$min_last_get_id]->last_get){
- $min_last_get_id=$thread_id;
- }
- }
- }
- if ($min_last_get_id===null){
- echo "\nNot a single thread is alive";
- flush();
- sleep(10);
- }
- $thread=&$threads[$min_last_get_id];
- echo ", thread #".$min_last_get_id.' (his last get was '.
- (($thread->last_get==0) ? 'never' : gmdate('Y-m-d H:i:sO', $thread->last_get)).')';
- flush();
- curl_setopt($ch, CURLOPT_PROXY, $thread->proxy);
- curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
- curl_setopt($ch, CURLOPT_URL, 'http://anidb.net/perl-bin/animedb.pl?show=anime&aid='.$id);
- curl_setopt($ch, CURLOPT_HTTPHEADER, array(
- 'Cookie: '.$cookie_string,
- 'Cache-Control: max-age=0',
- 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
- 'User-agent: '.$thread->user_agent,
- 'Referer: http://anidb.net/perl-bin/animedb.pl?show=main',
- 'Accept-Encoding: gzip,deflate,sdch',
- 'Accept-Language: ru,en;q=0.8',
- ));
- $a=microtime(true);
- $buf=curl_exec($ch);
- $b=microtime(true);
- $thread->last_get=$b;
- $thread->next_get=$thread->last_get+10;
- echo '; bytes='.strlen($buf).', '.round($b-$a,2).' sec';
- if (strlen($buf)==0){
- echo "; can not load that page, null response. Reload";flush();
- $thread->next_get=$thread->last_get+300;
- continue;
- }
- flush();
- $buf=@gzdecode($buf);
- echo ', real size is '.strlen($buf).' bytes';
- if (strlen($buf)<1000){
- echo "; Too few bytes. Reload";flush();
- $thread->next_get=$thread->last_get+300;
- continue;
- }
- if (preg_match('|YOU HAVE BEEN AUTO\\-BANNED|i', $buf)){
- echo "; This proxy is banned. Reload";flush();
- $thread->next_get=$thread->last_get+6*3600+10;
- continue;
- }
- if (preg_match('|Unknown anime id|i', $buf)){
- echo "; This anime is not exists. It is okay";flush();
- }
- if (preg_match('|show\\=signup|i', $buf)){
- echo "; Warning, we have signed out!";flush();
- }
- // Сохраняем
- file_put_contents($filename, $buf);
- }
- ?>
Add Comment
Please, Sign In to add comment