Advertisement
cdsatrian

apriori algorithm (example - not yet finished)

Sep 13th, 2012
351
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 11.78 KB | None | 0 0
  1. <?php
  2. /*****************************/
  3. /*   APPRIORI ALGORITHM      */
  4. /*                           */
  5. /*    cahyadsn@yahoo.com     */
  6. /*     adzan.16mb.com        */
  7. /*****************************/
  8. function combination($member,$num){
  9.   $n = count($member);  
  10.   //jumlah total kombinasi yang mungkin
  11.   $total = pow(2, $n);
  12.   $list =array();
  13.   //Loop untuk setiap kombinasi yang mungkin
  14.   $k=0;
  15.   for ($i = 0; $i < $total; $i++) {  
  16.     $list[$k]=array();
  17.     //Untuk setiap kombinasi, dicek untuk setiap bit
  18.     for ($j = 0; $j < $total; $j++) {  
  19.        //apakah data bit $j ada pada bit $i?  
  20.         if ((pow(2, $j) & $i)) $list[$k][]=$member[$j];      
  21.     }
  22.     if(count($list[$k])==$num){
  23.       //jika jumlah anggota sama dengan jumlah yang diinginkan increment $k
  24.       $k++;
  25.     }else{
  26.       //jika tidak sesuai, hapus array $list[$k]
  27.       unset($list[$k]);
  28.     }
  29.   }
  30.   return $list;
  31. }
  32. //-------------------
  33. function array_consolidate($list){
  34.   $r=$list[0];
  35.   for($i=1;$i<count($list);$i++){
  36.     $r=array_merge($r,$list[$i]);
  37.   }
  38.   $r=array_values(array_unique($r));
  39.   sort($r);
  40.   return $r;
  41. }
  42. //--------------------
  43. function build_candidate($data,$level){
  44.   $result=combination($data,$level);
  45.   sort($result);
  46.   return $result;
  47. }
  48. //-----------------------
  49. function join_step($source,$data){
  50.   $n=count($data[0]);
  51.   for($i=0;$i<count($data);$i++){
  52.     for($j=0;$j<count($source);$j++){
  53.       $freq=array_intersect($source[$j],$data[$i]);
  54.       $support[$i]=(count($freq)==count($data[$i])?(isset($support[$i])?$support[$i]+1:1):(isset($support[$i])?$support[$i]:0));
  55.     }
  56.   }
  57.   $result=array($data,$support);
  58.   return $result;
  59. }
  60. //-----------------------
  61. function prune_data($data,$no_trans,$min_support){
  62.   $result=array();
  63.   foreach ($data[1] as $key=>$value){
  64.     if($value/$no_trans<$min_support){
  65.       unset($data[0][$key]);
  66.       unset($data[1][$key]);
  67.     }else{
  68.       $result[0][]=$data[0][$key];
  69.       $result[1][]=$data[1][$key];
  70.     }
  71.   }
  72.   return $result;
  73. }
  74. //-----------------------
  75. function print_transaction($data,$title=NULL){
  76.   echo (isset($title)?"<h3>Transaction for ".$title."</h3>\n":"");
  77.   echo "<div class=\"table\">\n"
  78.       ."<span><div class=\"header\">Trans No</div><div class=\"header\">Items</div></span>\n";
  79.   for($i=0;$i<count($data);$i++){
  80.     echo "<span><div class=\"cell\">".($i+1)."</div><div class=\"cell\">{";
  81.     for($j=0;$j<count($data[$i]);$j++){
  82.       echo ($j==0?"":",").$data[$i][$j];
  83.     }
  84.     echo "}</div></span>\n";
  85.   }    
  86.   echo "</div><br>\n";  
  87. }
  88. //-----------------------
  89. function print_itemset($data,$level,$title=NULL){
  90.   echo (isset($title)?"<h3>Candidate C-".$level."</h3>\n":"");
  91.   echo "<div class=\"table1\">\n"
  92.       ."<span><div class=\"header\">".$level."-Itemset</div></span>\n";
  93.   for($i=0;$i<count($data);$i++){
  94.     echo "<span><div class=\"cell\">{";
  95.     for($j=0;$j<count($data[$i]);$j++){
  96.       echo ($j==0?"":",").$data[$i][$j];
  97.     }
  98.     echo "}</div></span>\n";
  99.   }    
  100.   echo "</div><br>\n";
  101. }
  102. //-----------------------
  103. function print_support($data,$level,$title=NULL){
  104.   echo (isset($title)?"<h3>".$title." ".$level."</h3>\n":"");
  105.   if(!empty($data)){
  106.     echo "<div class=\"table\">\n"
  107.         ."<span><div class=\"header\">".($level?$level."-":"")."Itemset</div><div class=\"header\">Count</div></span>\n";
  108.     for($i=0;$i<count($data[0]);$i++){
  109.       echo "<span><div class=\"cell\">{";
  110.       for($j=0;$j<count($data[0][$i]);$j++){
  111.         echo ($j==0?"":",").$data[0][$i][$j];
  112.       }
  113.       echo "}</div><div class=\"cell\">".$data[1][$i]."</div></span>\n";
  114.     }    
  115.     echo "</div><br>\n";
  116.   }
  117. }
  118. //-------------------------
  119. function array_rebuild($data){
  120.   $result=array();
  121.   for($i=0;$i<count($data[0]);$i++){
  122.     $result[0][$i]=implode("|",$data[0][$i]);
  123.   }
  124.   $result[1]=$data[1];
  125.   return $result;
  126. }
  127. //==================================
  128. // courtesy of Rully Ramanda
  129. // http://pastebin.com/M37KkCHJ
  130. function get_combinations($base,$n){
  131.   $baselen = count($base);
  132.   if($baselen == 0){
  133.     return;
  134.   }
  135.   if($n == 1){
  136.     $return = array();
  137.     foreach($base as $b){
  138.       $return[] = array($b);
  139.     }
  140.     return $return;
  141.   }else{
  142.     //get one level lower combinations
  143.     $oneLevelLower = get_combinations($base,$n-1);
  144.     /*for every one level lower combinations add one element to them that the last
  145.         element of a combination is preceeded by the element which follows it in base
  146.         array if there is none, does not add*/
  147.     $newCombs = array();
  148.     foreach($oneLevelLower as $oll){
  149.       $lastEl = $oll[$n-2];
  150.       $found = false;
  151.       foreach($base as  $key => $b){
  152.         if($b == $lastEl){
  153.           $found = true;
  154.           continue;
  155.            //last element found
  156.         }
  157.         if($found == true){
  158.           //add to combinations with last element
  159.           if($key < $baselen){
  160.             $tmp = $oll;
  161.             $newCombination = array_slice($tmp,0);
  162.             $newCombination[]=$b;
  163.             $newCombs[] = array_slice($newCombination,0);
  164.           }
  165.         }
  166.       }
  167.     }
  168.   }
  169.   return $newCombs;
  170. }
  171. //-----------------------------------
  172. // courtesy of Rully Ramanda
  173. // http://pastebin.com/sKXfaaKC
  174. // with some modification
  175. function generate_all($data){
  176.   $result=array();
  177.     $n=count($data);
  178.     //generate kombinasi 1 -> (jumlahdata-1)
  179.     for($i = 1; $i < $n; $i++){
  180.         $combination[$i] = get_combinations($data, $i);
  181.     }
  182.     //set pertama
  183.     //--------------------
  184.     if($n<=2){
  185.     $result[]=$data[0]."->".$data[1];
  186.     $result[]=$data[1]."->".$data[0];
  187.   }
  188.   else
  189.   {
  190.         foreach($combination[2] as $x => $y){
  191.           $result[]=implode("->", $y); //simpan kombinasi 2 nya
  192.       }
  193.       $rev_comb_2 = array_reverse($combination[2]); //kombinasi 2 dibalik
  194.       foreach($rev_comb_2 as $x => $y){
  195.           $result[]=implode("->", array_reverse($y)); //simpan kombinasi 2 yang sudah dibalik posisinya
  196.       }
  197.       //set ke 2,3,4,,,,n
  198.       //--------------------
  199.       for($i = 2; $i < $n; $i++){
  200.           if($i == 2){ // ini utk set ke 2
  201.               $set[$i] = array_merge($combination[2],$combination[1]); //gabungkan kombinasi 2 dan 1
  202.           }else{ // ini utk set ke 3,4,5, dst...
  203.               $set[$i] = array_merge($combination[$i],$set[$i-1]); //gabungkan kombinasi $i dengan $set[$i-1]
  204.           }
  205.           $rev_set = array_reverse($set[$i]); //set nya dibalik
  206.           foreach($set[$i] as $x => $y){
  207.               list($x2, $y2) = each($rev_set);
  208.               $result[]=implode("|", $y)."->".implode("|", $y2); //simpan set nya dengan format (set -> set_yg_dibalik)
  209.           }
  210.         }
  211.     }
  212.     return $result;
  213. }
  214. //===================================
  215.  
  216. function get_confidence($data,$x,$y)
  217. {
  218.    $a=explode("|",$x);
  219.    $b=explode("|",$y);
  220.    $xy=array_merge($a,$b);
  221.    $xy=array_values(array_unique($xy));
  222.    sort($xy);
  223.    $data_xy=implode("|",$xy);
  224.    $idx_x=array_keys($data[0],$x);
  225.    $idx_xy=array_keys($data[0],$data_xy);
  226.    if(!empty($idx_x) && !empty($idx_xy)){
  227.       $support_x = $data[1][$idx_x[0]];
  228.       $support_xy = $data[1][$idx_xy[0]];
  229.    }
  230.    $confidence=isset($support_x)?($support_xy / $support_x):0;
  231.    $result=(isset($support_x)?array($support_xy,$support_x,$confidence):array(0,0,0));
  232.    return $result;
  233. }
  234.  
  235. function association_rule($data)
  236. {
  237.   $result=array();
  238.   for($i=0;$i<count($data[0]);$i++){
  239.     $x=$data[0][$i];
  240.     if(count($x)>1){
  241.       $result=array_merge($result,generate_all($data[0][$i]));
  242.     }
  243.   }
  244.   $rule=array_values(array_unique($result));
  245.   $a=array_rebuild($data);
  246.   sort($rule);
  247.   $result=array();
  248.   for($i=0;$i<count($rule);$i++){
  249.     $item=explode("->",$rule[$i]);
  250.     $rule[$i]="{".str_replace("|"," ",$rule[$i])."}";
  251.     $rule[$i]=str_replace("->","}->{",$rule[$i]);
  252.     $result[]=array_merge(array($rule[$i]),get_confidence($a,$item[0],$item[1]));
  253.   }
  254.   return $result;
  255. }
  256. //--------------------------------
  257. function print_association_rule($data,$title){
  258.   echo (isset($title)?"<h3>".$title."</h3>\n":"");
  259.   echo "<div class=\"table_rule\">\n"
  260.       ."<span><div class=\"header_rule\">Rules</div>"
  261.       ."<div class=\"header_rule\">Support (XY)</div>"
  262.       ."<div class=\"header_rule\">Support (X)</div>"
  263.       ."<div class=\"header_rule\">Confidence</div>"
  264.       ."</span>\n";
  265.   for($i=0;$i<count($data);$i++){
  266.     echo "<span>";
  267.     for($j=0;$j<count($data[$i]);$j++){
  268.       echo "<div class=\"".($j==0?" left":"cell")."\">".substr($data[$i][$j],0,12)."</div>";
  269.     }
  270.     echo "</span>\n";
  271.   }    
  272.   echo "</div><br>\n";    
  273. }
  274. /***************************/
  275. function apriori($data,$title=NULL){
  276.   echo isset($title)?"<h1>".$title."</h1>":"";
  277.   //echo "<pre>";print_r($data);echo "</pre>";
  278.   print_transaction($data,$title);
  279.   $no_trans=count($data);
  280.   echo "<h2>Step 1: Find all Frequent Itemsets</h2>";
  281.   $result[0]=$data;
  282.   $step=1;
  283.   $frequent_item=array(array(),array());
  284.   do{
  285.     $c=array_consolidate($result[0]);
  286.     //echo "<pre>c-->";print_r($c);echo "</pre>";
  287.     $bl=build_candidate($c,$step);
  288.     //echo "<pre>b_$step-->";print_r($bl);echo "</pre>";
  289.     print_itemset($bl,$step,1);
  290.     $jo=join_step($data,$bl);
  291.     //echo "<pre>j_$step-->";print_r($jo);echo "</pre>";
  292.     print_support($jo,$step,"Join C-");
  293.     $result=prune_data($jo,$no_trans,MIN_SUPPORT);
  294.     $frequent_item[0]=array_merge($frequent_item[0],$result[0]);
  295.     $frequent_item[1]=array_merge($frequent_item[1],$result[1]);
  296.     //echo "<pre>p_$step-->";print_r($result);echo "</pre>";  
  297.     print_support($result,$step,"Prune L-");
  298.     $step++;
  299.   }while(!empty($result) && count($result[0])>1);
  300.   //echo "<pre>frequent_item-->";print_r($frequent_item);echo "</pre>";
  301.   print_support($frequent_item,"","Frequent Itemsets");
  302.   echo "<h2>Step 2: Generate strong association rules from the frequent itemsets</h2>\n";
  303.   echo "\n******************************************************\n";
  304.   $rule=association_rule($frequent_item);
  305.   //echo "<pre>rule-->";print_r($rule);echo "</pre>";
  306.   print_association_rule($rule,"Association Rules");  
  307. }
  308. ?>
  309. <!doctype html>
  310. <head>
  311. <title>Apriori Algorithm</title>
  312. <style type="text/css">
  313. span.row {
  314.  width:100%;
  315.  display: table-row;
  316. }
  317. div.table{
  318.  border:solid 1px #999;
  319.  width:200px;
  320.  text-align:center;
  321.  padding:2px;
  322.  -moz-box-shadow:    4px 4px 4px 4px #ccc;
  323.   -webkit-box-shadow: 4px 4px 4px 4px #ccc;
  324.   box-shadow:         4px 4px 4px 4px #ccc;
  325. }
  326. div.table1{
  327.  border:solid 1px #999;
  328.  width:100px;
  329.  text-align:center;
  330.  padding:2px;
  331.  -moz-box-shadow:    4px 4px 4px 4px #ccc;
  332.   -webkit-box-shadow: 4px 4px 4px 4px #ccc;
  333.   box-shadow:         4px 4px 4px 4px #ccc;
  334. }
  335. div.table_rule{
  336.  border:solid 1px #999;
  337.  width:480px;
  338.  text-align:center;
  339.  padding:2px;
  340.  -moz-box-shadow:    4px 4px 4px 4px #ccc;
  341.   -webkit-box-shadow: 4px 4px 4px 4px #ccc;
  342.   box-shadow:         4px 4px 4px 4px #ccc;
  343. }
  344. div.header{
  345.  border:solid 1px #999;
  346.  color:#eee;
  347.  background-color:#333;
  348.  font-weight:bold;
  349.  text-align:center;
  350.  width:100px;
  351.  height:20px;
  352.  display: table-cell;
  353.  padding:2px;
  354. }
  355. div.header_rule{
  356.  border:solid 1px #999;
  357.  color:#eee;
  358.  background-color:#333;
  359.  font-weight:bold;
  360.  text-align:center;
  361.  width:120px;
  362.  height:20px;
  363.  display: table-cell;
  364.  padding:2px;
  365. }
  366. div.cell {
  367.  border:dashed 1px #eee;
  368.  text-align:center;
  369.  width:120px;
  370.  height:20px;
  371.  display: table-cell;
  372.  padding:2px;
  373. }
  374. div.left{
  375.  border:dashed 1px #eee;
  376.  text-align:left;
  377.  width:100px;
  378.  height:20px;
  379.  display: table-cell;
  380.  padding:2px;
  381.  padding-left:5px;
  382. }
  383. </style>
  384. <body>
  385. <div class="container">
  386. <?php
  387. /************************/
  388. define('MIN_SUPPORT',0.5);
  389. define('MIN_CONFIDENCE',0.8);
  390. //-------- EXAMPLE -------
  391. $data1=array(
  392.        array('a','c','d'),
  393.        array('b','c','e'),
  394.        array('a','b','c','e'),
  395.        array('b','e')
  396.       );
  397. apriori($data1,'Example #1');
  398. $data2=array(
  399.        array(1,3,4),
  400.        array(2,3,5),
  401.        array(1,2,5),
  402.        array(2,5)
  403.       );
  404. apriori($data2,'Example #2');
  405. ?>
  406. </div>
  407. </body>
  408. </html>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement