| | <?php |
---|
| | |
---|
| | /* NJS 2006-04-28 |
---|
| | In earlier versions of this script, which eprints to count was |
---|
| | determined by comparing the request date of the eprint against the |
---|
| | "lastproc" date of this script (i.e., minimum time unit one day). |
---|
| | This was fine if you only ran the script once per day, but if you ran |
---|
| | it more than that, it counted multiple times requests whose |
---|
| | $request_date == $lastproc. For example, if you ran this script five |
---|
| | times per day, all the downloads that occurred during that day would |
---|
| | be counted EVERY TIME this script ran, thus overinflating your stats |
---|
| | by a factor of up to five :( |
---|
| | |
---|
| | The solution is to use the full time stamp for comparison rather than |
---|
| | just the date. This timestamp MUST include time zone information so |
---|
| | that things don't get screwed up by daylight saving time. As long as |
---|
| | this is done consistently, there's no need to do things like convert |
---|
| | to GMT, for example. |
---|
| | |
---|
| | The very first thing we need to do is grab the current time stamp |
---|
| | with time zone, which will later be stored in the database as the |
---|
| | "lastproc" time. This needs to happen first so that we don't "lose" |
---|
| | any requests that occur while the script is running. |
---|
| | */ |
---|
| | $start_time = date('Y-m-d H:i:s O'); |
---|
| | |
---|
| | |
---|
| | // NJS 2005-12-09 Switched to GeoIP from GeoIP:IPfree. |
---|
| | include("geoip.inc"); |
---|
| | |
---|
| |
---|
| | $connect = mysql_pconnect ($sqlserver,$sqluser,$sqlpass); |
---|
| | $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect"); |
---|
| | |
---|
| | // First get the date of last update |
---|
| | $query = "select lastproc from lastproc order by timeinsert desc limit 1"; |
---|
| | // NJS 2006-04-28 Changed this from order by timeinsert to order by id. |
---|
| | // The is always guaranteed to increase temporally, but is otherwise |
---|
| | // time-independent and thus not affected by things like daylight savings. |
---|
| | $query = "select lastproc from lastproc order by id desc limit 1"; |
---|
| | $result = mysql_query($query,$connect); |
---|
| | $num_rows = mysql_num_rows($result); |
---|
| | if ($num_rows > 0) { |
---|
| | $row = mysql_fetch_assoc($result); |
---|
| |
---|
| | $datetestA = 0; |
---|
| | } |
---|
| | |
---|
| | $connect2 = mysql_connect($sqlserver2,$sqluser2,$sqlpass2); |
---|
| | $counter = 1; |
---|
| | $counter = 0; |
---|
| | foreach($log_file as $archivename=>$archivelog) { |
---|
| | $logf = $log_dir . $archivelog; |
---|
| | $archive_name = $archivename; |
---|
| | $handle = fopen($logf, "r"); |
---|
| |
---|
| | |
---|
| | $date = $matches[2]; |
---|
| | $archive = $matches[3]; |
---|
| | $uniquebits = $buffer; |
---|
| | $date = preg_replace("/:.*/","",$date); |
---|
| | $date = preg_replace("/\//", " ", $date); |
---|
| | $when = getdate(strtotime($date)); |
---|
| | $request_date = $when["year"]."-".$when["mon"]."-".$when["mday"]; |
---|
| | $datetestB = strtotime($request_date); |
---|
| | /* NJS 2006-04-25 |
---|
| | IMPORTANT: if you run this script more than once per day, |
---|
| | it will count multiple times downloads whose |
---|
| | $request_date == $lastproc. For example, if you ran this |
---|
| | script five times per day, all the downloads that |
---|
| | occurred during that day would be counted EVERY TIME this |
---|
| | script ran, thus overinflating your stats by a factor of |
---|
| | up to five :( This happens because $lastproc has one day |
---|
| | as its base unit. |
---|
| | |
---|
| | If finer granularity for stats updates is desired, the |
---|
| | solution would be to use the full timestamp rather than |
---|
| | just the date. |
---|
| | /* NJS 2006-04-28 |
---|
| | Switched to timestamp rather than date-based comparison. |
---|
| | First, clean up the Apache request date into something |
---|
| | that strtotime understands. Note that the Apache log |
---|
| | dates include time zone info by default. |
---|
| | */ |
---|
| | $date = preg_replace("/:/"," ",$date,1); // Change first ":" to " ". |
---|
| | $date = preg_replace("/\//", " ", $date); // Change all "/" to " ". |
---|
| | $datetestB = strtotime($date); |
---|
| | // Convert to properly formatted date string. |
---|
| | $request_date = date('Y-m-d H:i:s O', $datetestB); |
---|
| | |
---|
| | if ($datetestB < $datetestA) |
---|
| | continue; |
---|
| | |
---|
| | // NJS 2005-11-25 Added regexp for EPrints short URLs. |
---|
| |
---|
| | Keep track of where we are. Should avoid duplication of results |
---|
| | if the script is run more than once on the same log file |
---|
| | */ |
---|
| | |
---|
| | $query = "INSERT into lastproc (lastproc) values('".$request_date."')"; |
---|
| | // NJS 2006-04-28 Switched value inserted to $start_time instead of $request_date. |
---|
| | $query = "INSERT into lastproc (lastproc) values('".$start_time."')"; |
---|
| | $result = mysql_query($query,$connect); |
---|
| | |
---|
| | #print "Records counted: $counter\n"; |
---|
| | #print "Last count: $request_date\n"; |
---|
| | mysql_close($connect2); |
---|
| | mysql_close($connect); |
---|
| | |
---|
| | // Look up the title corresponding to the specified eprint id. |
---|
| | function getePrintName($db,$eprintid) { |
---|
| | global $connect2; |
---|
| | $sqldatabase = $db; |
---|
| | $db = mysql_select_db($sqldatabase,$connect2); |
---|
| | $query3 = "select title from archive where eprintid = $eprintid"; |
---|
| | $result3 = mysql_query($query3,$connect2); |
---|
| | $title = ''; |
---|
| | $suffix = ''; |
---|
| | // NJS 2006-04-25 Added check for empty result, probably a deleted item. |
---|
| | // Look in the deletion table for details. |
---|
| | if (mysql_num_rows($result3) == 0) { |
---|
| | return "Unknown item ($eprintid)"; |
---|
| | } else { |
---|
| | $query3 = "select title from deletion where eprintid = $eprintid"; |
---|
| | $result3 = mysql_query($query3,$connect2); |
---|
| | // If it's not in deletion, then we have no clue what it is. |
---|
| | if (mysql_num_rows($result3) == 0) { |
---|
| | $title = "Unknown item [$eprintid]"; |
---|
| | } |
---|
| | else { |
---|
| | $suffix = ' [deleted]'; |
---|
| | } |
---|
| | } |
---|
| | if ($title == '') { |
---|
| | $row = mysql_fetch_assoc($result3); |
---|
| | $row["title"] = trim($row["title"]); |
---|
| | $row["title"] = preg_replace("/\s+/"," ",$row["title"]); |
---|
| | return $row["title"]; |
---|
| | $title = $row["title"]; |
---|
| | } |
---|
| | return $title . $suffix; |
---|
| | } |
---|
| | |
---|
| | ?> |
---|
| | |
---|
|