Occam's Razor Recent Visitor's Log
› ';
$db_server = 'hidden';
$db_username = 'hidden';
$db_password = 'hidden';
$db_name = 'hidden'; // Name of MySQL database containing MovableType blog
$geo_ip_free_path = '/home/potomact/www/occams-razor/geoipfree/GeoLiteCity.dat';
$log_file_path = '/home/potomact/domlogs/occams-razor.info.log';
$records_to_fetch = 500; // Gets this many records from the tail of the log file
$geoipcity_path = './';
include($geoipcity_path . "geoipcity.inc");
$gi = geoip_open($geo_ip_free_path,GEOIP_STANDARD);
$timezone = date('T');
// Array used to generate a text readable monntly archives name
$months = array();
$months['01'] = 'January';
$months['02'] = 'February';
$months['03'] = 'March';
$months['04'] = 'April';
$months['05'] = 'May';
$months['06'] = 'June';
$months['07'] = 'July';
$months['08'] = 'August';
$months['09'] = 'September';
$months['10'] = 'October';
$months['11'] = 'November';
$months['12'] = 'December';
// The way my Apache weblog works I see a lot of ip's that are dressed up like:
// crawl-66-249-71-13.googlebot.com
// This pattern helps me determine if a real IP address is embedded in the IP field. If I can get
// a real IP, then I can use GeoLiteCity to determine the geographical location
$ip_pattern = '[0-9]{1,3}(-|.)[0-9]{1,3}(-|.)[0-9]{1,3}(-|.)[0-9]{1,3}';
// Useful patterns that help determine type of entry from the resource field in the web log
$index_pattern = '/';
$not_a_page_pattern = '/*.[a-zA-Z0-9]{3}$';
$monthly_archives_pattern = '/archives/[0-9]{4}/[0-9]{2}/?';
$category_archives_pattern = '/archives/[a-z]*/?';
$entry_pattern = '/*.html';
$atom_pattern = 'atom.xml';
$rdf_pattern = 'index.rdf';
// Get the last X records from the log file then reverse the order of presentation, so last come first
$command = "tail -n$records_to_fetch -q $log_file_path | tac";
$handle = popen($command, "r");
if (!($handle))
{
die("Cannot execute command " . $command . "\n");
}
$db_link = mysql_connect($db_server, $db_username, $db_password);
if (!$db_link)
{
die('Not connected : ' . mysql_error());
}
// Make the MovableType MySQL database the database to use
$db_selected = mysql_select_db($db_name, $db_link);
if (!$db_selected)
{
die ("Can\'t use $db_name : " . mysql_error());
}
$record = fgets($handle); // Get first record, which is also the latest
$last_ip = '';
$last_resource = '';
while (!feof($handle))
{
$action = 'read the';
// Grab record from the file and parse into usable fields
list($ip, $client, $userid, $day, $month, $year, $hour, $min, $sec, $tz, $method, $resource, $protocol, $status, $size, $referer, $agent1, $agent2, $agent3, $agent4, $agent5, $agent6, $agent7, $agent8, $agent9, $agent10, $agent11, $agent12) =
sscanf($record, "%s %s %s [%2s/%3s/%4s:%2s:%2s:%2s %5s] \"%s %s %s %s %s %s \"%s %s %s %s %s %s %s %s %s %s %s\"");
$agent = str_replace('"','',$agent1 . ' ' . $agent2 . ' ' . $agent3 . ' ' . $agent4 . ' ' . $agent5 . ' ' . $agent6 . ' ' . $agent7 . ' ' . $agent8 . ' ' . $agent9 . ' ' . $agent10 . ' ' . $agent11 . ' ' . $agent12);
// Don't show those hitting the same file repeatedly in close succession
$skip_it = (($last_ip == $ip) && ($last_resource == $resource));
// Don't show obvious robots and crawlers or HTTP codes other than OK (200) or No Change (304)
if (!$skip_it)
{
if (stristr($ip,'crawl') || stristr($ip,'search') || stristr($ip,'cache') || stristr($agent,'search') || stristr($agent,'crawl') || stristr($agent,'cache') || stristr($agent,'bot') || stristr($agent,'bot') || !(($status == '200') || ($status == '304')))
{
$skip_it = true;
}
}
// Is there an IP in the log for this record?
$ip_pattern_found = ereg($ip_pattern, $ip, $raw_ip);
if ($ip_pattern_found)
{
$raw_ip = str_replace('-','.',$raw_ip[0]);
$ip_parts = explode('.',$raw_ip);
foreach ($ip_parts as $node)
{
$node = intval($node);
}
$constructed_ip = implode('.',$ip_parts);
if (substr_count($constructed_ip,'.') <> 3) // Need a 4 part IP to be valid
{
$ip_pattern_found = false;
$constructed_ip = $ip;
}
}
else
{
$constructed_ip = $ip;
if ($constructed_ip == '')
{
$constructed_ip = '?';
}
}
// Determine the kind of entry it is
$entry_pattern_found = ereg($entry_pattern, $resource, $entry_name); // Individual entry
$not_a_page_pattern_found = ereg($not_a_page_pattern, $resource, $entry_name); // gets rid of .gifs, .icos and the like
$monthly_archives_pattern_found = ereg($monthly_archives_pattern, $resource, $monthly_archive_name);
$category_archives_pattern_found = ereg($category_archives_pattern, $resource, $category_archive_name);
$index_pattern_found = ereg($index_pattern, $resource, $index_name); // Main Index
$atom_pattern_found = ereg($atom_pattern, $resource, $atom_name); // Atom newsfeed
$rdf_pattern_found = ereg($rdf_pattern, $resource, $rdf_name); // RSS Newfeed
// Now mark the type of blog page requested
if ($entry_pattern_found)
{
$type = 'Individual Entry';
$action .= ' entry';
$last_slash_pos = strrpos($resource,'/');
$dot_html_pos = strrpos($resource,'.html');
$entry_name = substr($resource,strrpos($resource,'/')+1,(strlen($resource) - $last_slash_pos - 6));
// Query the entry name so a usefully named link can be created
$result = mysql_query("SELECT entry_title FROM mt_entry WHERE entry_basename = '" . $entry_name. "'");
if (!$result)
{
die('Invalid query: ' . mysql_error());
}
$num_rows = mysql_num_rows($result);
if ($num_rows == 1)
{
$row = mysql_fetch_assoc($result);
$display_name = $row['entry_title'];
}
else
{
$display_name = ucwords($entry_name);
}
mysql_free_result($result);
}
else if ($atom_pattern_found)
{
$type = 'Atom Fetch';
$action = 'fetched the';
$display_name = 'Atom Feed';
}
else if ($rdf_pattern_found)
{
$type = 'RSS Fetch';
$action = 'fetched the';
$display_name = 'RSS Feed';
}
else if ($not_a_page_pattern_found)
{
$type = 'Not a Page';
$display_name = 'Not a Blog Page';
}
else if ($monthly_archives_pattern_found)
{
$type = 'Monthly Archive';
$display_year = substr($monthly_archive_name[0],10,4);
$display_month = substr($monthly_archive_name[0],15,2);
$display_name = $months[$display_month] . ' ' . $display_year . ' Archive';
}
else if ($category_archives_pattern_found)
{
$type = 'Category Archive';
$category_name = substr($resource,strpos($resource,'/',1)+1);
$category_name = str_replace('_', ' ', $category_name);
$category_name = str_replace('/', '', $category_name);
$display_name = ucwords($category_name) . ' Category Archive';
}
else if ($index_pattern_found)
{
$type = 'Main Index';
$display_name = 'Main Index';
}
else
{
$type = 'Unknown';
$display_name = 'Unknown';
}
if (!$skip_it && ($type == 'Not a Page'))
{
$skip_it = true;
}
if (!$skip_it)
{
if ($ip_pattern_found)
{
$record = geoip_record_by_addr($gi,$constructed_ip);
if ($record->country_name == '')
{
$ip_pattern_found == false;
}
}
if ($ip_pattern_found)
{
if ($record->country_name == 'United States')
{
if ($record->city <> '')
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->city . ', ' . $record->region . ' ' . $action . " $display_name
\n";
}
else if ($record->region <> '')
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->region . ' ' . $action . " $display_name
\n";
}
else
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor at ' . $constructed_ip . ' living in the ' . $record->country_name . ' ' . $action . " $display_name
\n";
}
}
else
{
if ($record->city <> '')
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->city . ', ' . $record->country_name . ' ' . $action . " $display_name
\n";
}
else if ($record->country_name <> '')
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->country_name . ' ' . $action . " $display_name
\n";
}
else
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor at ' . $constructed_ip . ' ' . $action . " $display_name
\n";
}
}
}
else
{
print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor at ' . $constructed_ip . ' ' . $action . " $display_name
\n";;
}
}
$last_ip = $ip;
$last_resource = $resource;
$record = fgets($handle);
}
pclose($handle);
mysql_close($db_link);
geoip_close($gi);
?>