2010-07-23 04:24:00 +00:00
< ? php
/*
* This is a wiki - scraping class to extract Policy Group actions .
* It reads in the entire PolicyDecisions page , extracts the acts ,
* and prepares summaries .
*
2010-07-23 05:34:28 +00:00
* // PHP use code Example:
2010-07-23 04:24:00 +00:00
* require_once ( " policydecisions.class.php " ); // adjust path...
* $pd = new \Misc\PolicyDecisions ();
* $pd -> load_from_webpage (); // read in the data
* // $pd->save_to_cache_files(); // (or add these two.)
* // $pd->read_from_cache_files();
* $pd -> html (); // see this for options on output
*/
namespace Misc {
2010-07-23 05:34:28 +00:00
class PolicyDecisions {
2010-07-23 04:24:00 +00:00
const WIKIURL = 'wiki.cacert.org/' ;
const HTTP = 'http:' ;
const HTTPS = 'https:' ;
const WIKI = 'https://wiki.cacert.org' ;
const POLICY_DECISIONS = 'http://wiki.cacert.org/PolicyDecisions' ;
const POLICY_DECISIONSS = 'https://wiki.cacert.org/PolicyDecisions#' ;
private $votes ;
private $urls ;
private $names ;
private $decisions ;
private $state ;
private $line_num ;
private function & read_all ( & $handle )
{
$contents = array ();
while ( ! feof ( $handle )) {
$contents [] = fread ( $handle , 8192 );
}
fclose ( $handle );
$x = join ( '' , $contents );
// echo "opened $url, found " . strlen($x) . " chars\n<BR>";
return $x ;
}
private function & read_page ( $url )
{
$handle = fopen ( $url , 'rb' );
$x = $this -> read_all ( $handle );
return $x ;
}
private function & read_file ( $name )
{
$x = '' ;
if ( ! file_exists ( $name )) return $x ;
$handle = fopen ( $name , 'rb' );
$x = $this -> read_all ( $handle );
return $x ;
}
public function save_to_cache_files ()
{
$test = true ;
$this -> write_cache_file ( 'decisions' , $this -> decisions , 'decision' , $test );
$this -> write_cache_file ( 'votes' , $this -> votes , '' , $test );
$this -> write_cache_file ( 'names' , $this -> names , 'index' , $test );
}
public function read_from_cache_files ()
{
$this -> decisions = $this -> read_cache_file ( 'decisions' );
$this -> votes = $this -> read_cache_file ( 'votes' );
$this -> names = $this -> read_cache_file ( 'names' );
}
public function write_cache_file ( $name , & $list , $keyname , $test = false )
{
$handle = fopen ( $this -> name_to_filename ( $name ), 'wb' );
$time = time ();
$count = count ( $list );
$lines = array ( " v1; $name ; $time ; $count ;json-array; $keyname " );
foreach ( $list as $entry ) {
$lines [] = json_encode ( $entry );
}
$data = join ( " \n " , $lines );
fwrite ( $handle , $data );
fclose ( $handle );
if ( $test ) {
$result = $this -> read_cache_file ( $name );
foreach ( $list as $key => $entry ) {
$res_entry = $result [ $key ];
if ( ! $res_entry ) {
$this -> error ( " failed to recovered $key from cache. " );
// echo "<hr>\n";
// print_r($entry);
// echo "<hr>\n";
// print_r($result);
exit ( 1 );
}
if ( $entry != $res_entry ) {
$this -> error ( " recovered $name not the same. " );
// echo "<hr>\n";
// print_r(array_diff($res_entry, $entry));
// echo "<hr>\n";
// print_r(array_diff($entry, $res_entry));
// echo "<hr>\n";
// print_r($entry);
// echo "<hr>\n";
// print_r($res_entry);
// echo "<hr>\n";
exit ();
}
}
if ( $result != $list ) {
$this -> error ( " recovered $name not the same. " );
// echo "<hr>\n";
// print_r(array_diff($list, $result));
// echo "<hr>\n";
// print_r(array_diff($result, $list));
// echo "<hr>\n";
// print_r($result);
// echo "<hr>\n";
// print_r($list);
// echo "<hr>\n";
exit ();
}
}
}
/*
* Prepended to cache files , don 't forget a ' / ' if needed .
*/
public function & setCachePrefix ( $prefix )
{
$this -> cache_prefix = $prefix ;
}
2010-07-23 05:34:28 +00:00
private function & name_to_filename ( $name )
2010-07-23 04:24:00 +00:00
{
return $this -> cache_prefix . $name . '.txt' ;
}
/*
* The array key can be set to some string
* from the array entry named keyname , if supplied .
*/
public function & read_cache_file ( $name )
{
$data = $this -> read_file ( $this -> name_to_filename ( $name ) );
$lines = explode ( " \n " , $data );
$head = explode ( ';' , $lines [ 0 ]);
$count = $head [ 3 ];
$keyname = $head [ 5 ];
$count2 = count ( $lines );
if ( $count + 1 != count ( $lines )) {
$this -> error ( " $name : $count +1 != $count2 , lines mismatch " );
}
// echo "decision key: $keyname / count == $count<br/>\n";
$output = array ();
for ( $i = 1 ; $i <= $count ; $i ++ ) {
$recovered = json_decode ( $lines [ $i ], true );
// echo "$count: from " . $lines[i] . "\n<br/>";
// print_r($recovered);
if ( $keyname ) {
$key = $recovered [ $keyname ];
// echo "\n<br/> recovered $key from $keyname\n<br/>";
$output [ $key ] = $recovered ;
} else {
$output [] = $recovered ;
}
// exit();
}
return $output ;
}
private function error ( $e )
{
echo $this -> line_num . " : " . $e . " <br/> \n " ;
}
private function herror ( $e )
{
echo $this -> line_num . " : " . htmlentities ( $e ) . " <br/> \n " ;
}
private function vote_plus_one ( $name , $url , $act )
{
$best_uniq = $this -> name_of_contributor ( $name , $url );
$current_decision = $this -> decision ;
// note this does not pick up duplicates...
// it could, but they shouldn't be in the source anyway.
$this -> votes [] = array ( $current_decision , $best_uniq , $act );
}
private function count_a_vote ( $vote , $act )
{
$matches = array ();
$v1 = trim ( $vote );
if ( ! $v1 )
return ;
if ( preg_match ( '/<a [^>]*href="([^"]*)">([^<]*)<\/a>/i' , $v1 , $matches )) {
$name = $matches [ 2 ];
$wikiurl = $matches [ 1 ];
$this -> vote_plus_one ( $name , $wikiurl , $act );
} elseif ( preg_match ( '/([<>;,])/' , $v1 , $matches )) {
$chars = $matches [ 0 ];
$this -> herror ( " garbled name with $chars in $vote " );
} elseif ( preg_match ( '/^([^<>]*)$/i' , $v1 , $matches )) {
$name = $matches [ 1 ];
$this -> vote_plus_one ( $name , '' , $act );
} else {
$this -> herror ( " unparsed name in $vote " );
}
}
private function count_td ( $line , $act )
{
$v0 = preg_replace ( '/<td[^>]*>/i' , '' , $line );
$v1 = preg_replace ( '/<p[^>]*>/i' , '' , $v0 );
$v2 = preg_replace ( '/<\/td>/i' , '' , $v1 );
$v3 = preg_replace ( '/<\/p>/i' , '' , $v2 );
// $this->error("stripped:\n$v3 ~~~~~~~~~~~~\n");
$votes = explode ( " , " , $v3 );
foreach ( $votes as $v ) {
// echo " [$v] ";
$this -> count_a_vote ( $v , $act );
}
}
private function get_name ( $url )
{
return $this -> names [ $url ][ 'name' ];
}
private function get_best_uniq_name ( $name , $url )
{
return $url ? $url : $name ;
}
private function get_wikiname ( $name )
{
foreach ( $this -> names as $n ) {
if ( $n [ 'name' ] == $name )
return $n [ 'url' ]; // if there is one, else empty string
}
return '' ;
}
private function name_of_contributor ( $name , $url = '' )
{
if ( $url ) {
if ( ! array_key_exists ( $url , $this -> names )) {
$this -> names [ $url ] = array (
'url' => $url ,
'name' => $name ,
'index' => $url ,
);
}
return $url ;
} else {
if ( ! array_key_exists ( $name , $this -> names ))
$this -> names [ $name ] = $name ;
$this -> names [ $name ] = array (
'url' => '' ,
'name' => $name ,
'index' => $name ,
);
return $name ;
}
}
private function resolved_by ( $name , $url = '' )
{
$decision = $this -> decision ;
$this -> decisions [ $decision ][ 'name' ] = $name ; // not needed
$this -> decisions [ $decision ][ 'url' ] = $url ;
$this -> vote_plus_one ( $name , $url , 'res' );
}
private function state ( $l )
{
/*
* Extract the decision number and title from the Heading < h2 >
*/
if ( preg_match ( '/<h2 id="([^_"]*)([^"]*)">/i' , $l , $matches )) {
$decision = trim ( $matches [ 1 ]);
$title = trim ( $matches [ 2 ]);
$title = preg_replace ( '/_/' , ' ' , $title );
$this -> decision = $decision ;
$this -> decisions [ $decision ] = array (
'title' => trim ( $title ),
'decision' => trim ( $decision ),
);
$this -> resolved = 1 ;
}
/*
* If we ' ve just seen a decision number , then look out for who
* called for the resolution .
*/
if ( 1 == $this -> resolved )
{
if ( preg_match ( '/<a [^>]*href="([^"]*)">([^<]*)<\/a>: *Resolved/i' , $l , $matches )) {
$url = trim ( $matches [ 1 ]);
$name = trim ( $matches [ 2 ]);
$this -> resolved_by ( $name , $url );
$this -> name_of_contributor ( $name , $url );
$this -> resolved = 0 ;
// echo "{$this->decision} matching '$url' / $name / <br>\n";
} elseif ( preg_match ( '/>([^:<>]*): *Resolved/i' , $l , $matches )) {
$name = trim ( $matches [ 1 ]);
$this -> resolved_by ( $name );
// echo "{$this->decision} matching '$name' <br>\n";
$this -> resolved = 0 ;
}
// else, the name of resolution proposer is not properly recorded.
}
/*
* Search for the Votes , set state to count them on next line .
*/
if ( '' == $this -> vote_state ) {
if ( preg_match ( " /aye:/i " , $l ))
$this -> vote_state = 'aye' ;
elseif ( preg_match ( " /nay:/i " , $l ))
$this -> vote_state = 'nay' ;
elseif ( preg_match ( " /abstain:/i " , $l ))
$this -> vote_state = 'abs' ;
}
elseif ( $this -> vote_state )
{
/*
* Counting votes .
*/
// $this->error("looking for TD\n===================\n$l\n-------------------");
if ( preg_match ( " /<td>/i " , $l )) {
$this -> count_td ( $l , $this -> vote_state );
}
$this -> vote_state = '' ;
}
}
public function load_from_webpage ()
{
$data = $this -> read_page ( self :: POLICY_DECISIONS );
$lines = explode ( " \n " , $data );
$this -> vote_state = '' ;
$this -> resolved = 0 ;
$this -> names = array ();
$this -> votes = array ();
$this -> decisions = array ();
$this -> line_num = 0 ;
foreach ( $lines as $l ) {
$this -> line_num ++ ;
$this -> state ( $l );
}
}
public function html_head ( $title )
{
$this -> l ( " <?xml version= \" 1.0 \" encoding= \" utf-8 \" ?> " );
$this -> l ( " <!DOCTYPE html PUBLIC \" -//W3C//DTD XHTML 1.1//EN \" " .
" \" http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd \" > " );
$this -> l ( " <html xmlns= \" http://www.w3.org/1999/xhtml \" ><head> " );
$this -> l ( " <meta http-equiv= \" CONTENT-TYPE \" content= \" text/html; charset=utf-8 \" /> " );
$this -> l ( " <title> $title </title> " );
$this -> l ( " </head><body> \n " );
}
function setLineWriter ( $x ) { $this -> lw = $x ; }
private function l ( $s , $and_this_comment = '' )
{
if ( $this -> lw ) {
$this -> lw -> l ( $s , $and_this_comment );
} else {
$this -> out [] = $s ;
}
}
public function init_output ( & $lines = null )
{
if ( $lines ) {
$this -> out &= $lines ;
} else {
$this -> out = array ();
}
}
/*
* This is the major entry point for a simple output .
* Copy this and vary the output initialisation , and choice of tables .
*/
public function html ()
{
$this -> init_output ();
$this -> html_head ( 'Policy Group Decisions Summary' );
$this -> html_by_contributors ();
$this -> html_by_decisions ();
$this -> html_tail ( 'Policy Group Decisions Summary' );
$this -> l ( " " );
echo join ( " \n " , $this -> out );
}
public function html_by_contributors ()
{
$count_contributors = count ( $this -> names );
$votes_by_contributor = $this -> get_votes_by_contributor ();
$count_acts = count ( $this -> votes );
$count_decisions = count ( $this -> decisions );
$this -> l ( " <h2>The Policy Group Hall of Fame</h2> " );
$this -> l ( " <p>Policy Group has $count_contributors contributors, with a total $count_acts acts of participation. Each act is a vote cast or a decision proposed.</p> " );
$this -> l ( " <br/><br/> " );
$this -> table_votes ( $votes_by_contributor );
}
public function html_tail ()
{
$this -> l ( " <hr> " );
$url = self :: POLICY_DECISIONS ;
$link = " <a href= \" $url\ " > $url </ a > " ;
$this -> l ( " <i> This summary wiki-scraped from $link , the formal record of Policy Decisions of the Policy Group. </i> " );
$this -> l ( " </body></html> " );
}
public function html_by_decisions ()
{
$this -> l ( " <br/><br/> " );
$this -> l ( " <h2>Summary of Decisions</h2> \n <p> $count_decisions decisions issued by Policy Group</p> " );
$this -> l ( " <br/><br/> " );
$this -> table_decisions ();
}
private function count_votes ( & $votes_by_contributor )
{
$count = 0 ;
foreach ( $votes_by_contributor as $entry ) {
$count += $entry [ 3 ];
}
return $count ;
}
static function cmp ( $a , $b )
{
if ( $a [ 'count' ] < $b [ 'count' ]) return + 1 ;
if ( $a [ 'count' ] > $b [ 'count' ]) return - 1 ;
return 0 ;
}
private function & get_votes_by_contributor ()
{
$sort = array ();
foreach ( $this -> votes as $vote ) {
$decision = $vote [ 0 ];
$uniq = $vote [ 1 ];
$act = $vote [ 2 ];
if ( ! array_key_exists ( $uniq , $sort )) {
$sort [ $uniq ] = array (
'uniq' => $uniq ,
'decisions' => array (), 'count' => 0 ,
'aye' => 0 , 'nay' => 0 , 'res' => 0 , 'abs' => 0 ,
);
}
$sort [ $uniq ][ 'decisions' ][] = $decision ;
switch ( $act ) {
case 'aye' : case 'nay' : case 'res' : case 'abs' :
$sort [ $uniq ][ $act ] += 1 ;
break ;
default :
$this -> error ( " $decision : $uniq = $act ? is what act? " );
}
$sort [ $uniq ][ 'count' ] += 1 ;
}
// echo "OK, class name is " . get_class($this) . "<br/>\n";
2010-07-23 05:34:28 +00:00
usort ( $sort , array ( get_class ( $this ), " cmp " )); // class name
2010-07-23 04:24:00 +00:00
return $sort ;
}
private function table_votes ( & $votes_by_contributor )
{
$this -> l ( " <table> " );
$this -> l ( " <tr><th>Name</th><th>#</th> " );
foreach ( $votes_by_contributor as $entry ) {
$most_actions = $entry [ 'count' ];
break ;
}
$this -> l ( " <td colspan= \" " . $most_actions . " \" ><b>Acts</b></td> " );
$this -> l ( " <td>Aye</td><td>Nay</td><td>Abs</td><td>Res</td> " );
$this -> l ( " </tr> " );
$td = " <td width= \" 10 \" ></td> " ;
$widths = str_repeat ( $td , $most_actions );
$this -> l ( " <tr><td colspan= \" 2 \" ><hr></td> $widths <td colspan= \" 4 \" ><hr></td></tr> " );
foreach ( $votes_by_contributor as $url => $entry ) {
$this -> l ( $this -> get_contributor_line ( $entry , $most_actions ));
}
$this -> l ( " </table> \n " );
}
private function get_contributor_line ( $entry , $most_actions )
{
$name = $this -> get_name_link ( $entry [ 'uniq' ]);
$count = $entry [ 'count' ];
$decisions = $entry [ 'decisions' ];
$aye = $entry [ 'aye' ] ? $entry [ 'aye' ] : ' ' ;
$nay = $entry [ 'nay' ] ? $entry [ 'nay' ] : ' ' ;
$abs = $entry [ 'abs' ] ? $entry [ 'abs' ] : ' ' ;
$res = $entry [ 'res' ] ? $entry [ 'res' ] : ' ' ;
$lead = " <tr> " .
" <td> $name </td> " .
" <td> { $entry [ 'count' ] } </td> " .
" \n " ;
$mix = array ();
$purl = self :: POLICY_DECISIONSS ;
for ( $i = 1 ; $i <= $most_actions ; $i ++ ) {
$d = array_pop ( $decisions );
if ( $d ) {
$mix [] = " <td bgcolor= \" lightgreen \" > " .
" <a href= \" $purl $d\ " title = \ " $p\ " >*</ a > " .
" </td> " ;
} else {
$mix [] = " <td></td> " ;
}
}
$final = " <td> $aye </td> " .
" <td> $nay </td> " .
" <td> $abs </td> " .
" <td><b> $res </b></td> " .
" </td> " .
" </tr> \n " ;
$x = $lead . join ( '' , $mix ) . $final ;
return $x ;
}
private function table_decisions ()
{
$this -> l ( " <table> " );
$this -> l ( " <tr><th>Number</th><th>Proposor</th><th>Title</th> " );
foreach ( $this -> decisions as $number => $decision ) {
$title = $decision [ 'title' ];
$name = $decision [ 'name' ];
$url = $decision [ 'url' ];
$name = $this -> to_name_link ( $name , $url );
$pnumber = '<a href="' . self :: POLICY_DECISIONSS . $number . " \" > $number </a> " ;
$this -> l ( " <tr><td> $pnumber </td><td> $name </td><td> $title </td></tr> " );
}
$this -> l ( " </table> " );
}
private function to_name_link ( $name , $url )
{
if ( $url && ( $url != $name )) {
if ( preg_match ( " ,^/, " , $url ))
$url = self :: WIKI . $url ;
$name = " <a href= \" $url\ " > $name </ a > " ;
}
return $name ;
}
private function get_name_link ( $uniq )
{
if ( '/' == $uniq [ 0 ]) { // is a url-based name
$url = $uniq ;
$name = $this -> get_name ( $uniq );
$display = " <a href= \" " . self :: WIKI . " $url\ " > $name </ a > " ;
} else {
$display = $uniq ;
}
return $display ;
}
}
}
2010-07-23 05:34:28 +00:00
/* local test code ...
2010-07-23 04:24:00 +00:00
$pd = new PolicyDecisions ();
$pd -> load_from_webpage ();
// $pd->save_to_cache_files();
// $pd->read_from_cache_files();
$pd -> html ();
*/
?>