Newer
Older
Digital_Repository / Repositories / statistics / scripts / fix-titles_src.php
  1. <?php
  2.  
  3. if ( $argc != 1 )
  4. {
  5. if ( $argc != 3 )
  6. {
  7. print("Usage: php fix-titles.php [archivename eprintid]\n");
  8. return;
  9. }
  10. else
  11. {
  12. $archive_name = $argv[1];
  13. $eprint_id = $argv[2];
  14. }
  15. }
  16.  
  17. /* NJS 2007-07-24
  18. The database structure changed between versions 2.x and 3.x of
  19. EPrints, so we now need to check the major version number and alter
  20. the queries appropriately. Use only the MAJOR version number (i.e.,
  21. 2 or 3, don't include the release number).
  22. */
  23. $eprints_version = ##EPRINTS_VERSION##;
  24.  
  25. /*
  26.  
  27. Apache log for ePrints uses this format:
  28. LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
  29.  
  30. If the log format differs the regular expression matching would need to be adjusted.
  31. Parse:
  32. ip
  33. date YYYY MM DD
  34. archive ID
  35.  
  36. */
  37.  
  38. // eprintstats db
  39. $sqlserver = 'localhost';
  40. $sqluser = 'eprintstatspriv';
  41. $sqlpass = 'AuldGrizzel';
  42. $sqldatabase = 'eprintstats';
  43.  
  44. /* NJS 2006-05-26
  45. SQL details of your ePrints installation(s). This has now been
  46. generalised to work with multiple archives. For each archive that you
  47. have, add an entry to this array in the following format:
  48.  
  49. 'archive_name' => array(
  50. 'sqlserver' => 'db_host',
  51. 'dbname' => 'database_name',
  52. 'username' => 'archive_name',
  53. 'password' => 'password',
  54. ),
  55.  
  56. NJS 2009-04-29: Added support for different database name.
  57. Usually, archive_name, database_name and user_name are the same, but they
  58. don't have to be.
  59. */
  60. $eprintsdbs = array(
  61. 'otago_eprints' => array(
  62. 'sqlserver' => 'localhost',
  63. 'dbname' => 'otago_eprints_v3',
  64. 'username' => 'otago_eprints',
  65. 'password' => 'DrSyntaxRidesAgain',
  66. ),
  67. 'cardrona' => array(
  68. 'sqlserver' => 'localhost',
  69. 'dbname' => 'cardona_v3',
  70. 'username' => 'cardrona',
  71. 'password' => 'DrSyntaxRidesAgain',
  72. ),
  73. );
  74.  
  75. ###########################################
  76. ##
  77. ## No configuration required below here.
  78. ##
  79. ###########################################
  80.  
  81. $connect = mysql_pconnect($sqlserver,$sqluser,$sqlpass);
  82. $db = mysql_select_db($sqldatabase,$connect) or die("Could not connect to $sqldatabase database.\n");
  83.  
  84. // NJS 2006-06-14: Generalised connection list for multiple archives.
  85. $eprints_connections = array();
  86. foreach ($eprintsdbs as $archive => $details)
  87. {
  88. $eprints_connections[$archive] =
  89. mysql_connect($details['sqlserver'],$details['username'],$details['password'])
  90. or die( "Could not connect to EPrints database with user {$details['username']}.\n");
  91. }
  92.  
  93. /* Get list of all items in database. We only need the distinct archive name
  94. and eprint ID, because we'll be updating all duplicate instances with the
  95. same eprint title anyway. We could limit it to items with titles like
  96. "Unknown%", to make things faster, but that doesn't cover different titles
  97. for the same item (e.g., from corrected punctuation).
  98. */
  99. if ( $argc == 1 )
  100. {
  101. $query = "
  102. SELECT DISTINCT archive_name, archiveid
  103. FROM view
  104. ORDER BY archive_name, archiveid";
  105. }
  106. else
  107. {
  108. $query = "
  109. SELECT DISTINCT archive_name, archiveid
  110. FROM view
  111. WHERE archive_name = '$archive_name'
  112. AND archiveid = $eprint_id
  113. ORDER BY archive_name, archiveid";
  114. }
  115.  
  116. $result = mysql_query( $query, $connect );
  117. $total_rows = ( $result ) ? mysql_num_rows( $result ) : 0;
  118. $count = 0;
  119. while ( $row = mysql_fetch_assoc( $result ) )
  120. {
  121. $archive_name = $row['archive_name'];
  122. $eprint_id = $row['archiveid'];
  123. printf( "Checking %s item %s (%d/%d): ", $archive_name, $eprint_id, ++$count, $total_rows );
  124. $eprint_name = getePrintName( $eprints_connections[$archive_name], $eprintsdbs[$archive_name]['dbname'], $eprint_id, $eprints_version );
  125. $update = sprintf("
  126. UPDATE view
  127. SET eprint_name = '%s'
  128. WHERE archive_name = '%s'
  129. AND archiveid = %d",
  130. mysql_real_escape_string( $eprint_name ),
  131. $archive_name,
  132. $eprint_id
  133. );
  134. if ( mysql_query($update, $connect) )
  135. {
  136. $num_rows = mysql_affected_rows( $connect );
  137. printf( "OK (%d row%s updated).\n", $num_rows, ( $num_rows != 1 ) ? "s" : "" );
  138. }
  139. else
  140. {
  141. print( "failed!\n" );
  142. }
  143. }
  144.  
  145. foreach ($eprints_connections as $connection)
  146. {
  147. mysql_close($connection);
  148. }
  149. mysql_close($connect);
  150.  
  151. // Look up the title corresponding to the specified eprint id.
  152. function getePrintName($connection,$dbname,$eprintid,$eprints_version) {
  153. // NJS 2006-06-14: DB connection now passed as an argument.
  154. $db = mysql_select_db($dbname,$connection);
  155. // NJS 2007-07-24: Added check for EPrints version, as the
  156. // database structure changed between versions 2 and 3.
  157. if ( $eprints_version > 2 )
  158. {
  159. $query3 = "
  160. SELECT title
  161. FROM eprint
  162. WHERE eprintid = $eprintid
  163. AND eprint_status = 'archive'
  164. ";
  165. }
  166. else
  167. {
  168. $query3 = "
  169. SELECT title
  170. FROM archive
  171. WHERE eprintid = $eprintid
  172. ";
  173. }
  174. $result3 = mysql_query($query3,$connection);
  175. $title = '';
  176. $suffix = '';
  177. // NJS 2006-04-25 Added check for empty result, probably a deleted item.
  178. // Look in the deleted items for details.
  179. if (mysql_num_rows($result3) == 0) {
  180. // NJS 2007-07-24: Added check for EPrints version, as the
  181. // database structure changed between versions 2 and 3.
  182. if ( $eprints_version > 2 )
  183. {
  184. $query3 = "
  185. SELECT title
  186. FROM eprint
  187. WHERE eprintid = $eprintid
  188. AND eprint_status = 'deletion'
  189. ";
  190. }
  191. else
  192. {
  193. $query3 = "
  194. SELECT title
  195. FROM deletion
  196. WHERE eprintid = $eprintid
  197. ";
  198. }
  199. $result3 = mysql_query($query3,$connection);
  200. // If it's not in deletion, then we have no clue what it is.
  201. if (mysql_num_rows($result3) == 0) {
  202. $title = "Unknown item [$eprintid]";
  203. }
  204. else {
  205. $suffix = ' [deleted]';
  206. }
  207. }
  208. if ($title == '') {
  209. $row = mysql_fetch_assoc($result3);
  210. $row["title"] = trim($row["title"]);
  211. $row["title"] = preg_replace("/\s+/"," ",$row["title"]);
  212. $title = $row["title"];
  213. }
  214. return $title . $suffix;
  215. }
  216.  
  217. ?>
  218.