Newer
Older
Digital_Repository / Repositories / ADT / HarvestToADT.pl
  1. #!/usr/bin/perl
  2.  
  3. use DBI;
  4.  
  5. require Settings;
  6.  
  7. sub LookUpEprintsMaxID
  8. # Find out the maximum eprintid for the archive
  9. {
  10. my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS);
  11. my $query = $dbh->prepare("SELECT eprintid FROM archive ORDER BY eprintid DESC");
  12. $query->execute;
  13. my $numrows = $query->rows;
  14.  
  15. my $maxID = 0;
  16. if ($numrows > 0) {
  17. @row = $query->fetchrow_array;
  18. $maxID = $row[0];
  19. }
  20.  
  21. $query->finish;
  22. $dbh->disconnect;
  23.  
  24. return $maxID;
  25. }
  26.  
  27. sub IsAnRHDThesis
  28. # Determine if an eprint is a RHD thesis or not
  29. {
  30. local ($ID, *args) = @_;
  31. my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS);
  32. my $query = $dbh->prepare("SELECT thesis_type FROM archive WHERE eprintid ='$ID'");
  33. $query->execute;
  34. my $numrows = $query->rows;
  35.  
  36. my $isathesis = 0;
  37. if ($numrows > 0) {
  38. @row = $query->fetchrow_array;
  39. my $type = "\L$row[0]";
  40. if (($type =~ "phd") || ($type =~ "rmaster")) {
  41. $isathesis = 1;
  42. }
  43. }
  44.  
  45. $query->finish;
  46. $dbh->disconnect;
  47.  
  48. return $isathesis;
  49. }
  50.  
  51. sub LookUpEprintsDate
  52. # Lookup the year of publication for an eprint with an eprintID of $ID
  53. {
  54. local ($ID, *args) = @_;
  55. my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS);
  56. my $query = $dbh->prepare("SELECT date_effective FROM archive WHERE eprintid ='$ID'");
  57. $query->execute;
  58. my $numrows = $query->rows;
  59.  
  60. my $year = 0;
  61. if ($numrows > 0) {
  62. @row = $query->fetchrow_array;
  63. $year = $row[0];
  64. }
  65. return $year;
  66. };
  67.  
  68. require WriteADTpage;
  69.  
  70. # ------------------MAIN PROCESS-----------------
  71.  
  72. # If the ADT directory does not exist, create it
  73. if (!(-e $ADT_DIR)) {
  74. mkdir($ADT_DIR, 0755) || die "Failed to create adt directory\n";
  75. }
  76. chdir($ADT_DIR) || die "Failed to change working directory\n";
  77.  
  78. my $indexfile = ">" . $ADT_DIR . "/index.html";
  79. open(INDEXFILEHANDLE, $indexfile) or die "Failed to open/create file: $indexfile\n";
  80. print INDEXFILEHANDLE "<html><head><title>ADT theses</title></head><body><p>&nbsp;</p>\n";
  81.  
  82. # Look at all documents - if they exist and are a thesis then create a directory and an index page
  83. $MaxID = LookUpEprintsMaxID();
  84. for ($n = 1; $n <= $MaxID; $n++) {
  85. if (IsAnRHDThesis($n)) {
  86. $SubmittedDate = LookUpEprintsDate($n);
  87. $DirName = 'adt-' . $UNI_CODE . $SubmittedDate . '.' . sprintf("%04d", $n);
  88. # If the Eprint ADT directory does not exist, create it
  89. if (!(-e $DirName)) {
  90. mkdir($DirName, 0755) || die "Failed to create adt directory\n";
  91. }
  92. $FileName = $DirName . '/index.html';
  93. $ThesisURL = $BASE_URL . $ADT_URL . "/" . $FileName;
  94. if (!(WriteADTpage($FileName, $n))) {
  95. print "Missing metadata for eprint ID: $n\n";
  96. }
  97. else {
  98. print INDEXFILEHANDLE "<p><a href=$ThesisURL>$DirName</a></p>\n";
  99. }
  100. }
  101. }
  102.  
  103. print INDEXFILEHANDLE "</body></html>\n";
  104. close INDEXFILEHANDLE;