Newer
Older
Digital_Repository / Repositories / statistics / scripts / fix-titles_src.php
  1. <?php
  2.  
  3. /* NJS 2007-07-24
  4. The database structure changed between versions 2.x and 3.x of
  5. EPrints, so we now need to check the major version number and alter
  6. the queries appropriately. Use only the MAJOR version number (i.e.,
  7. 2 or 3, don't include the release number).
  8. */
  9. $eprints_version = ##EPRINTS_VERSION##;
  10.  
  11. /*
  12.  
  13. Apache log for ePrints uses this format:
  14. LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
  15.  
  16. If the log format differs the regular expression matching would need to be adjusted.
  17. Parse:
  18. ip
  19. date YYYY MM DD
  20. archive ID
  21.  
  22. */
  23.  
  24. // eprintstats db
  25. $sqlserver = 'localhost';
  26. $sqluser = 'eprintstatspriv';
  27. $sqlpass = 'AuldGrizzel';
  28. $sqldatabase = 'eprintstats';
  29.  
  30. /* NJS 2006-05-26
  31. SQL details of your ePrints installation(s). This has now been
  32. generalised to work with multiple archives. For each archive that you
  33. have, add an entry to this array in the following format:
  34.  
  35. 'archive_name' => array(
  36. 'sqlserver' => 'db_host',
  37. 'username' => 'archive_name',
  38. 'password' => 'password',
  39. ),
  40. */
  41. $eprintsdbs = array(
  42. 'otago_eprints' => array(
  43. 'sqlserver' => 'localhost',
  44. 'username' => 'otago_eprints',
  45. 'password' => 'DrSyntaxRidesAgain',
  46. ),
  47. 'cardrona' => array(
  48. 'sqlserver' => 'localhost',
  49. 'username' => 'cardrona',
  50. 'password' => 'chautquau',
  51. ),
  52. );
  53.  
  54. ###########################################
  55. ##
  56. ## No configuration required below here.
  57. ##
  58. ###########################################
  59.  
  60. $connect = mysql_pconnect($sqlserver,$sqluser,$sqlpass);
  61. $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect");
  62.  
  63. // NJS 2006-06-14: Generalised connection list for multiple archives.
  64. $eprints_connections = array();
  65. foreach ($eprintsdbs as $archive_name => $details)
  66. {
  67. $eprints_connections[$archive_name] =
  68. mysql_connect($details['sqlserver'],$details['username'],$details['password']);
  69. }
  70.  
  71. /* Get list of all items in database. We only need the distinct archive name
  72. and eprint ID, because we'll be updating all duplicate instances with the
  73. same eprint title anyway. We could limit it to items with titles like
  74. "Unknown%", to make things faster, but that doesn't cover different titles
  75. for the same item (e.g., from corrected punctuation).
  76. */
  77. $query = "
  78. SELECT DISTINCT archive_name, archiveid
  79. FROM view
  80. ORDER BY archive_name, archiveid";
  81. $result = mysql_query( $query, $connect );
  82. $total_rows = ( $result ) ? mysql_num_rows( $result ) : 0;
  83. $count = 0;
  84. while ( $row = mysql_fetch_assoc( $result ) )
  85. {
  86. $archive_name = $row['archive_name'];
  87. $eprint_id = $row['archiveid'];
  88. printf( "Checking %s item %s (%d/%d): ", $archive_name, $eprint_id, ++$count, $total_rows );
  89. $eprint_name = getePrintName( $eprints_connections[$archive_name], $archive_name, $eprint_id, $eprints_version );
  90. $update = sprintf("
  91. UPDATE view
  92. SET eprint_name = '%s'
  93. WHERE archive_name = '%s'
  94. AND archiveid = %d",
  95. mysql_real_escape_string( $eprint_name ),
  96. $archive_name,
  97. $eprint_id
  98. );
  99. if ( mysql_query($update, $connect) )
  100. {
  101. $num_rows = mysql_affected_rows( $connect );
  102. printf( "OK (%d row%s updated).\n", $num_rows, ( $num_rows != 1 ) ? "s" : "" );
  103. }
  104. else
  105. {
  106. print( "failed!\n" );
  107. }
  108. }
  109.  
  110. foreach ($eprints_connections as $connection)
  111. {
  112. mysql_close($connection);
  113. }
  114. mysql_close($connect);
  115.  
  116. // Look up the title corresponding to the specified eprint id.
  117. function getePrintName($connection,$archive,$eprintid,$eprints_version) {
  118. // NJS 2006-06-14: DB connection now passed as an argument.
  119. $db = mysql_select_db($archive,$connection);
  120. // NJS 2007-07-24: Added check for EPrints version, as the
  121. // database structure changed between versions 2 and 3.
  122. if ( $eprints_version > 2 )
  123. {
  124. $query3 = "
  125. SELECT title
  126. FROM eprint
  127. WHERE eprintid = $eprintid
  128. AND eprint_status = 'archive'
  129. ";
  130. }
  131. else
  132. {
  133. $query3 = "
  134. SELECT title
  135. FROM archive
  136. WHERE eprintid = $eprintid
  137. ";
  138. }
  139. $result3 = mysql_query($query3,$connection);
  140. $title = '';
  141. $suffix = '';
  142. // NJS 2006-04-25 Added check for empty result, probably a deleted item.
  143. // Look in the deleted items for details.
  144. if (mysql_num_rows($result3) == 0) {
  145. // NJS 2007-07-24: Added check for EPrints version, as the
  146. // database structure changed between versions 2 and 3.
  147. if ( $eprints_version > 2 )
  148. {
  149. $query3 = "
  150. SELECT title
  151. FROM eprint
  152. WHERE eprintid = $eprintid
  153. AND eprint_status = 'deletion'
  154. ";
  155. }
  156. else
  157. {
  158. $query3 = "
  159. SELECT title
  160. FROM deletion
  161. WHERE eprintid = $eprintid
  162. ";
  163. }
  164. $result3 = mysql_query($query3,$connection);
  165. // If it's not in deletion, then we have no clue what it is.
  166. if (mysql_num_rows($result3) == 0) {
  167. $title = "Unknown item [$eprintid]";
  168. }
  169. else {
  170. $suffix = ' [deleted]';
  171. }
  172. }
  173. if ($title == '') {
  174. $row = mysql_fetch_assoc($result3);
  175. $row["title"] = trim($row["title"]);
  176. $row["title"] = preg_replace("/\s+/"," ",$row["title"]);
  177. $title = $row["title"];
  178. }
  179. return $title . $suffix;
  180. }
  181.  
  182. ?>
  183.