Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- require("konektor.php");
- //GET STOPLIST DATA
- $sql="SELECT stoplist from tb_stoplist";
- $result3 = mysql_query ($sql) or die ("Kesalahan pada perintah SQL!");
- while($row=mysql_fetch_object($result3))
- {
- $db_stoplist[]=$row->stoplist;
- }
- //AWAL TOKENIZING
- $keyword=$this->key;
- $tokenizing=explode(' ',$keyword);
- $ct=count($tokenizing);
- ?>
- <strong>Keyword: </strong><?php echo $keyword;?>
- <br>
- <strong>Panjang String: </strong><?php echo ($ps-2);?>
- <?php
- //AKHIR TOKENIZING
- //FILTERING
- $filtering=array_diff($tokenizing,$db_stoplist);
- //ANALYZING
- //Ambil Jumlah Dokumen
- $D=0;
- $sqldok = "SELECT terjemah FROM tb_hadits ORDER BY id_hadits ASC";
- $result2 = mysql_query ($sqldok) or die ("Kesalahan pada perintah SQL!");
- $dokumen=array();
- while($row2 = mysql_fetch_array($result2))
- {
- ++$D;
- //Ambil Kata Tiap Dokumen, Dilakukan Tokenizing,Filtering,Penghilangan Duplikasi Token
- //TOKENIZING DOKUMEN
- $replace=preg_replace('/[":;.,!?]/', ' ',$row2['terjemah']);
- $replace=preg_replace('/\s+/', ' ', $replace);
- $token_dok=explode(' ',trim($replace);
- $ct=count($token_dok);
- //FILTERING DOKUMEN
- $temp_dok=array_diff($token_dok,$db_stoplist);
- $dokumen=array_merge($dokumen,$temp_dok);
- }
- //Penghilangan Duplikasi Token
- $token=array_unique($dokumen);
- $jml_token=count($token);
- echo("<center><strong><-- HASIL PENCARIAN --></strong></center>");
- //Area TF-IDF
- $jumlahkosong=0;
- $counttf=0;
- $countw=0;
- $df=0;
- for($i=0;$i<$jml_token;$i++)
- {
- //Area TF
- for($j=0;$j<=$D+1;$j++)
- {
- if($j==0)
- {
- //cek kata kunci(kk)
- $adakunci=0;
- $kunci=$token[$i];
- for($k=0;$k<count($filtering);$k++)
- {
- $katakey=$filtering[$k];
- if($kunci==$katakey)
- {
- $adakunci=1;
- }
- }
- $tf[$i][$j]=$adakunci;
- if($adakunci==0)
- {
- $jumlahkosong++;
- }
- }
- else if($j==($D+1))
- {
- $hasilidf=$D/$df;
- $hasiltemp=log10($hasilidf);
- $tf[$i][$j]=$hasiltemp;
- }
- else
- {
- //cek di dokumen
- $kunci=$token[$i];
- $urutandok=0;
- $akdok=0;
- $nodok=$j;
- $sqldok = "SELECT * FROM tb_hadits ORDER BY id_hadits ASC";
- $result4 = mysql_query ($sqldok) or die ("Kesalahan pada perintah SQL!");
- while($row4 = mysql_fetch_array($result4))
- {
- if($urutandok==$nodok-1)
- {
- //Tokenizing Dokumen
- $isidokumen=preg_replace('/[":;.,!?]/', '',$row4['terjemah'])
- $isidokumen=strtolower($isidokumen);
- $isidokumen=$isidokumen."#e";
- $ps2=strlen($isidokumen);
- $y=1;
- $x=0;
- $ct=0;
- unset($tokendok2);
- for ($ii=0;$ii<$ps2;$ii++)
- {
- if(substr($isidokumen,$ii,1)==" " || substr($isidokumen,$ii,2)==substr($isidokumen,$ps2-2,2))
- {
- $tempkata=substr($isidokumen,$x,$y-1);
- $tokendok2[++$ct]=$tempkata;
- $x=$ii+1;
- $y=0;
- }
- $y=$y+1;
- }
- //Filtering Dokumen
- $jmlada=0;
- $cnfilter=0;
- unset($filterdok2);
- for($ii=0;$ii<count($tokendok2);$ii++)
- {
- $katatoken=$tokendok2[$ii];
- $sqlfil = "SELECT stoplist from tb_stoplist";
- $result5 = mysql_query ($sqlfil) or die ("Kesalahan pada perintah SQL!");
- while($row5 = mysql_fetch_array($result5))
- {
- $katafilter=$row5['stoplist'];
- if($katatoken==$katafilter){
- $jmlada=$jmlada+1;
- }
- }
- if($jmlada==0)
- {
- $filterdok2[$cnfilter]=$katatoken;
- $cnfilter=$cnfilter+1;
- }
- $jmlada=0;
- }
- for ($kk=0;$kk<count($filterdok2);$kk++)
- {
- $hsfilter=$filterdok2[$kk];
- if($kunci==$hsfilter){
- $akdok=$akdok+1;
- }
- }
- }
- $urutandok++;
- }
- $tf[$i][$j]=$akdok;
- }
- $tnilaidf=$tf[$i][$j];
- if($tnilaidf>=1 && $j!=0)
- {
- $df=$df+1;
- }
- }
- $df=0;
- //Akhir Area TF
- //Area W
- for ($k=0;$k<=$D;$k++)
- {
- $w[$i][$k]=$tf[$i][$k]*$tf[$i][($D+1)];
- }
- //Akhir Area W
- }
- //Awal Area Pehitungan Algoritma TF-IDF
- for($i=0;$i<$jml_token;$i++)
- {
- for($j=0;$j<=$D;$j++)
- {
- $temptf=$tf[$i][0];
- if($temptf=="1")
- {
- $tfidf[$j]=$tfidf[$j]+$w[$i][$j];
- }
- }
- }
- //Akhir Area Perhitungan Algoritma TF-IDF
- //NB:
- //Area Sorting Nilai tfidf menggunakan Algoritma Bubble Sort
- //array tfidf ada kk ke 0, jadi dimulai dari index 1
- for($j=0;$j<=$D;$j++){
- $tfidfno[$j]="Dokumen ".$j;
- }
- for($j=1;$j<=$D;$j++)
- {
- for($jj=1;$jj<$D;$jj++)
- {
- $tempj=$tfidf[$j];
- $tempjj=$tfidf[$jj];
- if($tempj>=$tempjj)
- {
- $tj=$tfidf[$jj+1];
- $tjn=$tfidfno[$jj+1];
- $tfidf[($jj+1)]=$tfidf[$jj];
- $tfidfno[($jj+1)]=$tfidfno[$jj];
- $tfidf[$jj]=$tj;
- $tfidfno[$jj]=$tjn;
- }
- }
- }
- //Akhir Area TF-IDF
- //Area VSM
- //hitung transformasi w menjadi kuadrat dan sqrt total dari tiap dokumen
- for($i=0;$i<=$jml_token;$i++)
- {
- if($i==$jml_token)
- {//menghitung sum akar/sqrt tiap dokumen+kk
- for($j=0;$j<=$D;$j++)
- {
- $tvsmtot=0;
- for($k=0;$k<$jml_token;$k++)
- {
- $tnvsm=$vsmw[$k][$j];
- $tvsmtot=$tvsmtot+$tnvsm;
- }
- $tvsmtot=sqrt($tvsmtot);
- $vsmw[$i][$j]=$tvsmtot;
- }
- }
- else
- {//menghitung kuadrat tiap token dalam dokumen(kuadrat w)
- for($l=0;$l<=$D;$l++)
- {
- $tvsm=$w[$i][$l];
- $tvsm=$tvsm*$tvsm;
- $vsmw[$i][$l]=$tvsm;
- }
- }
- }
- //akhir area hitung transformasi w menjadi kuadrat dan sqrt total dari tiap dokumen
- //hitung kk*D memanfaatkan vsmw index ke 0(kk) * D tiap dokumen
- for($i=0;$i<=$jml_token;$i++)
- {
- if($i==$jml_token)
- {//untuk menghitung sum kk dot D tiap dokumen
- for($j=0;$j<=$D;$j++)
- {//index ke 0 (kk*kk) dimasukan tapi tidak akan dipakai
- $tvsmkk=0;
- for($k=0;$k<$jml_token;$k++)
- {
- $tvsmkkd=$vsmkk[$k][$j];
- $tvsmkk=$tvsmkk+$tvsmkkd;
- }
- $vsmkk[$i][$j]=$tvsmkk;
- }
- }
- else
- {//untuk menghitung kk*D tiap dokumen
- for($l=0;$l<=$D;$l++)
- {
- if($l>=1)
- {
- $temptf=$tf[$i][$l];
- $tempvsmkk=$vsmw[$i][0];
- $tempvsmkk2=$temptf*$tempvsmkk;
- $vsmkk[$i][$l]=$tempvsmkk2;
- }
- }
- }
- }
- //perhitungan penilaian cosine setiap dokumen dengan vsm
- for($j=0;$j<=$D;$j++){
- $sumkkd=$vsmkk[$jml_token][$j];
- $sqrtkk=$vsmw[$jml_token][0];
- $sqrtd=$vsmw[$jml_token][$j];
- @$tcosine=$sumkkd/($sqrtkk*$sqrtd);
- $cosine[$j]=$tcosine;
- }
- for($j=0;$j<=$D;$j++)
- {
- $cosineno[$j]=$j;
- }
- //NB:
- //Area Sorting Nilai cosine vsm menggunakan Algoritma Bubble Sort
- //array vsm ada ke 0(tidak dipakai), jadi dimulai dari index 1
- for($j=1;$j<=$D;$j++)
- {
- for($jj=1;$jj<$D;$jj++)
- {
- $tempj=$cosine[$j];
- $tempjj=$cosine[$jj];
- if($tempj>=$tempjj)
- {
- $tj=$cosine[$jj+1];
- $tjn=$cosineno[$jj+1];
- $cosine[($jj+1)]=$cosine[$jj];
- $cosineno[($jj+1)]=$cosineno[$jj];
- $cosine[$jj]=$tj;
- $cosineno[$jj]=$tjn;
- }
- }
- }
- //Akhir Area VSM
- //Tampil Hadist
- $sqldok = "SELECT
- b.id_bab,
- b.bab,
- h.id_hadits,
- h.judul,
- h.hadits,
- h.terjemah,
- h.no_hadits
- FROM
- tb_bab b,
- JOIN tb_hadits h USING(id_bab)
- ORDER BY h.id_hadits ASC";
- $result7 = mysql_query($sqldok) or die ("Kesalahan pada perintah SQL!");
- $no=0;
- while($row7 = mysql_fetch_array($result7))
- {
- $bab[$no]=$row7['bab'];
- $judul[$no]=$row7['judul'];
- $nomor[$no]=$row7['no_hadits'];
- $hadits[$no]=$row7['hadits'];
- $terjemah[$no]=$row7['terjemah'];
- $no=$no+1;
- }
- echo("<br>");
- for($i=0;$i<count($cosine);$i++)
- {
- $tmpcosine=$cosine[$i];
- $tmpcosineno=$cosineno[$i];
- $tmpcosineno=$tmpcosineno-1;
- if($tmpcosine>0){
- echo <<<HASIL
- <table>
- <tr>
- <td style="padding:5px;">
- Score : <strong>$tmpcosine</strong>
- </td>
- </tr>
- <tr>
- <td style="padding:5px;">
- Bab Hadits: <strong>{$bab[$tmpcosineno]}</strong> No. <strong>{$nomor[$tmpcosineno]}</strong>. Judul: <strong>{$judul[$tmpcosineno]}</strong>
- </td></tr>
- <tr>
- <td align="right" style="padding:5px; font-size:12pt;">
- {$hadits[$tmpcosineno]}
- </td>
- </tr>
- <tr><td style="padding:5px;">
- {nl2br($terjemah[$tmpcosineno])}
- </td></tr>
- </table>
- HASIL;
- }
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement