cacert-policies/code/policydecisions.class.php
Ian Grigg 8248a7b6a8 slight tidyup
git-svn-id: http://svn.cacert.org/CAcert/Policies@1976 14b1bab8-4ef6-0310-b690-991c95c89dfd
2010-07-23 05:34:28 +00:00

613 lines
16 KiB
PHP

<?php
/*
* This is a wiki-scraping class to extract Policy Group actions.
* It reads in the entire PolicyDecisions page, extracts the acts,
* and prepares summaries.
*
* // PHP use code Example:
* require_once("policydecisions.class.php"); // adjust path...
* $pd = new \Misc\PolicyDecisions();
* $pd->load_from_webpage(); // read in the data
* // $pd->save_to_cache_files(); // (or add these two.)
* // $pd->read_from_cache_files();
* $pd->html(); // see this for options on output
*/
namespace Misc {
class PolicyDecisions {
const WIKIURL = 'wiki.cacert.org/';
const HTTP = 'http:';
const HTTPS ='https:';
const WIKI = 'https://wiki.cacert.org';
const POLICY_DECISIONS = 'http://wiki.cacert.org/PolicyDecisions';
const POLICY_DECISIONSS = 'https://wiki.cacert.org/PolicyDecisions#';
private $votes;
private $urls;
private $names;
private $decisions;
private $state;
private $line_num;
private function &read_all(&$handle)
{
$contents = array();
while (!feof($handle)) {
$contents[] = fread($handle, 8192);
}
fclose($handle);
$x = join('', $contents);
// echo "opened $url, found " . strlen($x) . " chars\n<BR>";
return $x;
}
private function &read_page($url)
{
$handle = fopen($url, 'rb');
$x = $this->read_all($handle);
return $x;
}
private function &read_file($name)
{
$x = '';
if (!file_exists($name)) return $x;
$handle = fopen($name, 'rb');
$x = $this->read_all($handle);
return $x;
}
public function save_to_cache_files()
{
$test = true;
$this->write_cache_file('decisions', $this->decisions, 'decision', $test);
$this->write_cache_file('votes', $this->votes, '', $test);
$this->write_cache_file('names', $this->names, 'index', $test);
}
public function read_from_cache_files()
{
$this->decisions = $this->read_cache_file('decisions');
$this->votes = $this->read_cache_file('votes');
$this->names = $this->read_cache_file('names');
}
public function write_cache_file($name, &$list, $keyname, $test = false)
{
$handle = fopen( $this->name_to_filename($name), 'wb' );
$time = time();
$count = count($list);
$lines = array("v1;$name;$time;$count;json-array;$keyname");
foreach ($list as $entry) {
$lines[] = json_encode($entry);
}
$data = join("\n", $lines);
fwrite($handle, $data);
fclose($handle);
if ($test) {
$result = $this->read_cache_file($name);
foreach ($list as $key => $entry) {
$res_entry = $result[$key];
if (! $res_entry ) {
$this->error("failed to recovered $key from cache.");
// echo "<hr>\n";
// print_r($entry);
// echo "<hr>\n";
// print_r($result);
exit(1);
}
if ($entry != $res_entry) {
$this->error("recovered $name not the same.");
// echo "<hr>\n";
// print_r(array_diff($res_entry, $entry));
// echo "<hr>\n";
// print_r(array_diff($entry, $res_entry));
// echo "<hr>\n";
// print_r($entry);
// echo "<hr>\n";
// print_r($res_entry);
// echo "<hr>\n";
exit();
}
}
if ($result != $list) {
$this->error("recovered $name not the same.");
// echo "<hr>\n";
// print_r(array_diff($list, $result));
// echo "<hr>\n";
// print_r(array_diff($result, $list));
// echo "<hr>\n";
// print_r($result);
// echo "<hr>\n";
// print_r($list);
// echo "<hr>\n";
exit();
}
}
}
/*
* Prepended to cache files, don't forget a '/' if needed.
*/
public function &setCachePrefix($prefix)
{
$this->cache_prefix = $prefix;
}
private function &name_to_filename($name)
{
return $this->cache_prefix . $name . '.txt';
}
/*
* The array key can be set to some string
* from the array entry named keyname, if supplied.
*/
public function &read_cache_file($name)
{
$data = $this->read_file( $this->name_to_filename($name) );
$lines = explode("\n", $data);
$head = explode(';', $lines[0]);
$count = $head[3];
$keyname = $head[5];
$count2 = count($lines);
if ($count + 1 != count($lines)) {
$this->error("$name: $count+1 != $count2, lines mismatch");
}
// echo "decision key: $keyname / count == $count<br/>\n";
$output = array();
for ($i = 1; $i <= $count; $i ++) {
$recovered = json_decode($lines[$i], true);
// echo "$count: from " . $lines[i] . "\n<br/>";
// print_r($recovered);
if ($keyname) {
$key = $recovered[$keyname];
// echo "\n<br/> recovered $key from $keyname\n<br/>";
$output[$key] = $recovered;
} else {
$output[] = $recovered;
}
// exit();
}
return $output;
}
private function error($e)
{
echo $this->line_num . ": " . $e . " <br/>\n";
}
private function herror($e)
{
echo $this->line_num . ": " . htmlentities($e) . "<br/>\n";
}
private function vote_plus_one($name, $url, $act)
{
$best_uniq = $this->name_of_contributor($name, $url);
$current_decision = $this->decision;
// note this does not pick up duplicates...
// it could, but they shouldn't be in the source anyway.
$this->votes[] = array($current_decision, $best_uniq, $act);
}
private function count_a_vote($vote, $act)
{
$matches = array();
$v1 = trim($vote);
if (!$v1)
return;
if (preg_match('/<a [^>]*href="([^"]*)">([^<]*)<\/a>/i', $v1, $matches)) {
$name = $matches[2];
$wikiurl = $matches[1];
$this->vote_plus_one($name, $wikiurl, $act);
} elseif (preg_match('/([<>;,])/', $v1, $matches)) {
$chars = $matches[0];
$this->herror("garbled name with $chars in $vote");
} elseif (preg_match('/^([^<>]*)$/i', $v1, $matches)) {
$name = $matches[1];
$this->vote_plus_one($name, '', $act);
} else {
$this->herror("unparsed name in $vote");
}
}
private function count_td($line, $act)
{
$v0 = preg_replace('/<td[^>]*>/i', '', $line);
$v1 = preg_replace('/<p[^>]*>/i', '', $v0);
$v2 = preg_replace('/<\/td>/i', '', $v1);
$v3 = preg_replace('/<\/p>/i', '', $v2);
// $this->error("stripped:\n$v3 ~~~~~~~~~~~~\n");
$votes = explode(",", $v3);
foreach ($votes as $v) {
// echo " [$v] ";
$this->count_a_vote($v, $act);
}
}
private function get_name($url)
{
return $this->names[$url]['name'];
}
private function get_best_uniq_name($name, $url)
{
return $url ? $url : $name;
}
private function get_wikiname($name)
{
foreach ($this->names as $n) {
if ($n['name'] == $name)
return $n['url']; // if there is one, else empty string
}
return '';
}
private function name_of_contributor($name, $url = '')
{
if ($url) {
if (! array_key_exists($url, $this->names)) {
$this->names[$url] = array(
'url' => $url,
'name' => $name,
'index' => $url,
);
}
return $url;
} else {
if (! array_key_exists($name, $this->names))
$this->names[$name] = $name;
$this->names[$name] = array(
'url' => '',
'name' => $name,
'index' => $name,
);
return $name;
}
}
private function resolved_by($name, $url = '')
{
$decision = $this->decision;
$this->decisions[$decision]['name'] = $name; // not needed
$this->decisions[$decision]['url'] = $url;
$this->vote_plus_one($name, $url, 'res');
}
private function state($l)
{
/*
* Extract the decision number and title from the Heading <h2>
*/
if (preg_match('/<h2 id="([^_"]*)([^"]*)">/i', $l, $matches)) {
$decision = trim($matches[1]);
$title = trim($matches[2]);
$title = preg_replace('/_/', ' ', $title);
$this->decision = $decision;
$this->decisions[$decision] = array(
'title' => trim($title),
'decision' => trim($decision),
);
$this->resolved = 1;
}
/*
* If we've just seen a decision number, then look out for who
* called for the resolution.
*/
if (1 == $this->resolved)
{
if (preg_match('/<a [^>]*href="([^"]*)">([^<]*)<\/a>: *Resolved/i', $l, $matches)) {
$url = trim($matches[1]);
$name = trim($matches[2]);
$this->resolved_by($name, $url);
$this->name_of_contributor($name, $url);
$this->resolved = 0;
// echo "{$this->decision} matching '$url' / $name / <br>\n";
} elseif (preg_match('/>([^:<>]*): *Resolved/i', $l, $matches)) {
$name = trim($matches[1]);
$this->resolved_by($name);
// echo "{$this->decision} matching '$name' <br>\n";
$this->resolved = 0;
}
// else, the name of resolution proposer is not properly recorded.
}
/*
* Search for the Votes, set state to count them on next line.
*/
if ('' == $this->vote_state) {
if (preg_match("/aye:/i", $l))
$this->vote_state = 'aye';
elseif (preg_match("/nay:/i", $l))
$this->vote_state = 'nay';
elseif (preg_match("/abstain:/i", $l))
$this->vote_state = 'abs';
}
elseif ($this->vote_state)
{
/*
* Counting votes.
*/
// $this->error("looking for TD\n===================\n$l\n-------------------");
if (preg_match("/<td>/i", $l)) {
$this->count_td($l, $this->vote_state);
}
$this->vote_state = '';
}
}
public function load_from_webpage()
{
$data = $this->read_page(self::POLICY_DECISIONS);
$lines = explode("\n", $data);
$this->vote_state = '';
$this->resolved = 0;
$this->names = array();
$this->votes = array();
$this->decisions = array();
$this->line_num = 0;
foreach ($lines as $l) {
$this->line_num++;
$this->state($l);
}
}
public function html_head($title)
{
$this->l("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
$this->l("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" .
" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">");
$this->l("<html xmlns=\"http://www.w3.org/1999/xhtml\"><head>");
$this->l(" <meta http-equiv=\"CONTENT-TYPE\" content=\"text/html; charset=utf-8\" />");
$this->l(" <title>$title</title>");
$this->l("</head><body>\n");
}
function setLineWriter($x) { $this->lw = $x; }
private function l($s, $and_this_comment = '')
{
if ($this->lw) {
$this->lw->l($s, $and_this_comment);
} else {
$this->out[] = $s;
}
}
public function init_output(&$lines = null)
{
if ($lines) {
$this->out &= $lines;
} else {
$this->out = array();
}
}
/*
* This is the major entry point for a simple output.
* Copy this and vary the output initialisation, and choice of tables.
*/
public function html()
{
$this->init_output();
$this->html_head('Policy Group Decisions Summary');
$this->html_by_contributors();
$this->html_by_decisions();
$this->html_tail('Policy Group Decisions Summary');
$this->l("");
echo join("\n", $this->out);
}
public function html_by_contributors()
{
$count_contributors = count($this->names);
$votes_by_contributor = $this->get_votes_by_contributor();
$count_acts = count($this->votes);
$count_decisions = count($this->decisions);
$this->l("<h2>The Policy Group Hall of Fame</h2>");
$this->l("<p>Policy Group has $count_contributors contributors, with a total $count_acts acts of participation. Each act is a vote cast or a decision proposed.</p>");
$this->l("<br/><br/>");
$this->table_votes($votes_by_contributor);
}
public function html_tail()
{
$this->l("<hr>");
$url = self::POLICY_DECISIONS;
$link = "<a href=\"$url\">$url</a>";
$this->l("<i> This summary wiki-scraped from $link, the formal record of Policy Decisions of the Policy Group. </i>");
$this->l("</body></html>");
}
public function html_by_decisions()
{
$this->l("<br/><br/>");
$this->l("<h2>Summary of Decisions</h2>\n<p>$count_decisions decisions issued by Policy Group</p>");
$this->l("<br/><br/>");
$this->table_decisions();
}
private function count_votes(&$votes_by_contributor)
{
$count = 0;
foreach ($votes_by_contributor as $entry) {
$count += $entry[3];
}
return $count;
}
static function cmp($a, $b)
{
if ($a['count'] < $b['count']) return +1;
if ($a['count'] > $b['count']) return -1;
return 0;
}
private function &get_votes_by_contributor()
{
$sort = array();
foreach ($this->votes as $vote) {
$decision = $vote[0];
$uniq = $vote[1];
$act = $vote[2];
if (! array_key_exists($uniq, $sort)) {
$sort[$uniq] = array(
'uniq' => $uniq,
'decisions' => array(), 'count' => 0,
'aye' => 0, 'nay' => 0, 'res' => 0, 'abs' => 0,
);
}
$sort[$uniq]['decisions'][] = $decision;
switch ($act) {
case 'aye': case 'nay': case 'res': case 'abs':
$sort[$uniq][$act] += 1;
break;
default:
$this->error("$decision: $uniq = $act? is what act?");
}
$sort[$uniq]['count'] += 1;
}
// echo "OK, class name is " . get_class($this) . "<br/>\n";
usort($sort, array(get_class($this), "cmp")); // class name
return $sort;
}
private function table_votes(&$votes_by_contributor)
{
$this->l("<table>");
$this->l("<tr><th>Name</th><th>#</th>");
foreach ($votes_by_contributor as $entry) {
$most_actions = $entry['count'];
break;
}
$this->l(" <td colspan=\"" . $most_actions . "\"><b>Acts</b></td>");
$this->l(" <td>Aye</td><td>Nay</td><td>Abs</td><td>Res</td>");
$this->l("</tr>");
$td = "<td width=\"10\"></td>";
$widths = str_repeat($td, $most_actions);
$this->l("<tr><td colspan=\"2\"><hr></td> $widths <td colspan=\"4\"><hr></td></tr>");
foreach ($votes_by_contributor as $url => $entry) {
$this->l($this->get_contributor_line($entry, $most_actions));
}
$this->l("</table>\n");
}
private function get_contributor_line($entry, $most_actions)
{
$name = $this->get_name_link($entry['uniq']);
$count = $entry['count'];
$decisions = $entry['decisions'];
$aye = $entry['aye'] ? $entry['aye'] : ' ';
$nay = $entry['nay'] ? $entry['nay'] : ' ';
$abs = $entry['abs'] ? $entry['abs'] : ' ';
$res = $entry['res'] ? $entry['res'] : ' ';
$lead = " <tr>" .
"<td>$name</td>" .
"<td>{$entry['count']}</td>" .
"\n";
$mix = array();
$purl = self::POLICY_DECISIONSS;
for ($i = 1; $i <= $most_actions; $i++) {
$d = array_pop($decisions);
if ($d) {
$mix[] = "<td bgcolor=\"lightgreen\">" .
"<a href=\"$purl$d\" title=\"$p\">*</a>" .
"</td>";
} else {
$mix[] = "<td></td>";
}
}
$final = "<td>$aye</td>" .
"<td>$nay</td>" .
"<td>$abs</td>" .
"<td><b>$res</b></td>" .
"</td>" .
"</tr>\n";
$x = $lead . join('', $mix) . $final;
return $x;
}
private function table_decisions()
{
$this->l("<table>");
$this->l("<tr><th>Number</th><th>Proposor</th><th>Title</th>");
foreach ($this->decisions as $number => $decision) {
$title = $decision['title'];
$name = $decision['name'];
$url = $decision['url'];
$name = $this->to_name_link($name, $url);
$pnumber = '<a href="' . self::POLICY_DECISIONSS . $number . "\">$number</a>";
$this->l(" <tr><td>$pnumber</td><td>$name</td><td>$title</td></tr>");
}
$this->l("</table>");
}
private function to_name_link($name, $url)
{
if ($url && ($url != $name)) {
if (preg_match(",^/,", $url))
$url = self::WIKI . $url;
$name = "<a href=\"$url\">$name</a>";
}
return $name;
}
private function get_name_link($uniq)
{
if ('/' == $uniq[0]) { // is a url-based name
$url = $uniq;
$name = $this->get_name($uniq);
$display = "<a href=\"" . self::WIKI . "$url\">$name</a>";
} else {
$display = $uniq;
}
return $display;
}
}
}
/* local test code...
$pd = new PolicyDecisions();
$pd->load_from_webpage();
// $pd->save_to_cache_files();
// $pd->read_from_cache_files();
$pd->html();
*/
?>