Occam's Razor Recent Visitor's Log

› '; $db_server = 'hidden'; $db_username = 'hidden'; $db_password = 'hidden'; $db_name = 'hidden'; // Name of MySQL database containing MovableType blog $geo_ip_free_path = '/home/potomact/www/occams-razor/geoipfree/GeoLiteCity.dat'; $log_file_path = '/home/potomact/domlogs/occams-razor.info.log'; $records_to_fetch = 500; // Gets this many records from the tail of the log file $geoipcity_path = './'; include($geoipcity_path . "geoipcity.inc"); $gi = geoip_open($geo_ip_free_path,GEOIP_STANDARD); $timezone = date('T'); // Array used to generate a text readable monntly archives name $months = array(); $months['01'] = 'January'; $months['02'] = 'February'; $months['03'] = 'March'; $months['04'] = 'April'; $months['05'] = 'May'; $months['06'] = 'June'; $months['07'] = 'July'; $months['08'] = 'August'; $months['09'] = 'September'; $months['10'] = 'October'; $months['11'] = 'November'; $months['12'] = 'December'; // The way my Apache weblog works I see a lot of ip's that are dressed up like: // crawl-66-249-71-13.googlebot.com // This pattern helps me determine if a real IP address is embedded in the IP field. If I can get // a real IP, then I can use GeoLiteCity to determine the geographical location $ip_pattern = '[0-9]{1,3}(-|.)[0-9]{1,3}(-|.)[0-9]{1,3}(-|.)[0-9]{1,3}'; // Useful patterns that help determine type of entry from the resource field in the web log $index_pattern = '/'; $not_a_page_pattern = '/*.[a-zA-Z0-9]{3}$'; $monthly_archives_pattern = '/archives/[0-9]{4}/[0-9]{2}/?'; $category_archives_pattern = '/archives/[a-z]*/?'; $entry_pattern = '/*.html'; $atom_pattern = 'atom.xml'; $rdf_pattern = 'index.rdf'; // Get the last X records from the log file then reverse the order of presentation, so last come first $command = "tail -n$records_to_fetch -q $log_file_path | tac"; $handle = popen($command, "r"); if (!($handle)) { die("Cannot execute command " . $command . "\n"); } $db_link = mysql_connect($db_server, $db_username, $db_password); if (!$db_link) { die('Not connected : ' . mysql_error()); } // Make the MovableType MySQL database the database to use $db_selected = mysql_select_db($db_name, $db_link); if (!$db_selected) { die ("Can\'t use $db_name : " . mysql_error()); } $record = fgets($handle); // Get first record, which is also the latest $last_ip = ''; $last_resource = ''; while (!feof($handle)) { $action = 'read the'; // Grab record from the file and parse into usable fields list($ip, $client, $userid, $day, $month, $year, $hour, $min, $sec, $tz, $method, $resource, $protocol, $status, $size, $referer, $agent1, $agent2, $agent3, $agent4, $agent5, $agent6, $agent7, $agent8, $agent9, $agent10, $agent11, $agent12) = sscanf($record, "%s %s %s [%2s/%3s/%4s:%2s:%2s:%2s %5s] \"%s %s %s %s %s %s \"%s %s %s %s %s %s %s %s %s %s %s\""); $agent = str_replace('"','',$agent1 . ' ' . $agent2 . ' ' . $agent3 . ' ' . $agent4 . ' ' . $agent5 . ' ' . $agent6 . ' ' . $agent7 . ' ' . $agent8 . ' ' . $agent9 . ' ' . $agent10 . ' ' . $agent11 . ' ' . $agent12); // Don't show those hitting the same file repeatedly in close succession $skip_it = (($last_ip == $ip) && ($last_resource == $resource)); // Don't show obvious robots and crawlers or HTTP codes other than OK (200) or No Change (304) if (!$skip_it) { if (stristr($ip,'crawl') || stristr($ip,'search') || stristr($ip,'cache') || stristr($agent,'search') || stristr($agent,'crawl') || stristr($agent,'cache') || stristr($agent,'bot') || stristr($agent,'bot') || !(($status == '200') || ($status == '304'))) { $skip_it = true; } } // Is there an IP in the log for this record? $ip_pattern_found = ereg($ip_pattern, $ip, $raw_ip); if ($ip_pattern_found) { $raw_ip = str_replace('-','.',$raw_ip[0]); $ip_parts = explode('.',$raw_ip); foreach ($ip_parts as $node) { $node = intval($node); } $constructed_ip = implode('.',$ip_parts); if (substr_count($constructed_ip,'.') <> 3) // Need a 4 part IP to be valid { $ip_pattern_found = false; $constructed_ip = $ip; } } else { $constructed_ip = $ip; if ($constructed_ip == '') { $constructed_ip = '?'; } } // Determine the kind of entry it is $entry_pattern_found = ereg($entry_pattern, $resource, $entry_name); // Individual entry $not_a_page_pattern_found = ereg($not_a_page_pattern, $resource, $entry_name); // gets rid of .gifs, .icos and the like $monthly_archives_pattern_found = ereg($monthly_archives_pattern, $resource, $monthly_archive_name); $category_archives_pattern_found = ereg($category_archives_pattern, $resource, $category_archive_name); $index_pattern_found = ereg($index_pattern, $resource, $index_name); // Main Index $atom_pattern_found = ereg($atom_pattern, $resource, $atom_name); // Atom newsfeed $rdf_pattern_found = ereg($rdf_pattern, $resource, $rdf_name); // RSS Newfeed // Now mark the type of blog page requested if ($entry_pattern_found) { $type = 'Individual Entry'; $action .= ' entry'; $last_slash_pos = strrpos($resource,'/'); $dot_html_pos = strrpos($resource,'.html'); $entry_name = substr($resource,strrpos($resource,'/')+1,(strlen($resource) - $last_slash_pos - 6)); // Query the entry name so a usefully named link can be created $result = mysql_query("SELECT entry_title FROM mt_entry WHERE entry_basename = '" . $entry_name. "'"); if (!$result) { die('Invalid query: ' . mysql_error()); } $num_rows = mysql_num_rows($result); if ($num_rows == 1) { $row = mysql_fetch_assoc($result); $display_name = $row['entry_title']; } else { $display_name = ucwords($entry_name); } mysql_free_result($result); } else if ($atom_pattern_found) { $type = 'Atom Fetch'; $action = 'fetched the'; $display_name = 'Atom Feed'; } else if ($rdf_pattern_found) { $type = 'RSS Fetch'; $action = 'fetched the'; $display_name = 'RSS Feed'; } else if ($not_a_page_pattern_found) { $type = 'Not a Page'; $display_name = 'Not a Blog Page'; } else if ($monthly_archives_pattern_found) { $type = 'Monthly Archive'; $display_year = substr($monthly_archive_name[0],10,4); $display_month = substr($monthly_archive_name[0],15,2); $display_name = $months[$display_month] . ' ' . $display_year . ' Archive'; } else if ($category_archives_pattern_found) { $type = 'Category Archive'; $category_name = substr($resource,strpos($resource,'/',1)+1); $category_name = str_replace('_', ' ', $category_name); $category_name = str_replace('/', '', $category_name); $display_name = ucwords($category_name) . ' Category Archive'; } else if ($index_pattern_found) { $type = 'Main Index'; $display_name = 'Main Index'; } else { $type = 'Unknown'; $display_name = 'Unknown'; } if (!$skip_it && ($type == 'Not a Page')) { $skip_it = true; } if (!$skip_it) { if ($ip_pattern_found) { $record = geoip_record_by_addr($gi,$constructed_ip); if ($record->country_name == '') { $ip_pattern_found == false; } } if ($ip_pattern_found) { if ($record->country_name == 'United States') { if ($record->city <> '') { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->city . ', ' . $record->region . ' ' . $action . " $display_name
\n"; } else if ($record->region <> '') { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->region . ' ' . $action . " $display_name
\n"; } else { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor at ' . $constructed_ip . ' living in the ' . $record->country_name . ' ' . $action . " $display_name
\n"; } } else { if ($record->city <> '') { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->city . ', ' . $record->country_name . ' ' . $action . " $display_name
\n"; } else if ($record->country_name <> '') { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor from ' . $record->country_name . ' ' . $action . " $display_name
\n"; } else { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor at ' . $constructed_ip . ' ' . $action . " $display_name
\n"; } } } else { print $bullet . $hour . ':' . $min . ':' . $sec . ' ' . $timezone . ' a visitor at ' . $constructed_ip . ' ' . $action . " $display_name
\n";; } } $last_ip = $ip; $last_resource = $resource; $record = fgets($handle); } pclose($handle); mysql_close($db_link); geoip_close($gi); ?>