diff --git a/Repositories/ADT/HarvestToADT.pl b/Repositories/ADT/HarvestToADT.pl new file mode 100755 index 0000000..c4c1098 --- /dev/null +++ b/Repositories/ADT/HarvestToADT.pl @@ -0,0 +1,104 @@ +#!/usr/bin/perl + +use DBI; + +require Settings; + +sub LookUpEprintsMaxID +# Find out the maximum eprintid for the archive +{ + my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS); + my $query = $dbh->prepare("SELECT eprintid FROM archive ORDER BY eprintid DESC"); + $query->execute; + my $numrows = $query->rows; + + my $maxID = 0; + if ($numrows > 0) { + @row = $query->fetchrow_array; + $maxID = $row[0]; + } + + $query->finish; + $dbh->disconnect; + + return $maxID; +} + +sub IsAnRHDThesis +# Determine if an eprint is a RHD thesis or not +{ + local ($ID, *args) = @_; + my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS); + my $query = $dbh->prepare("SELECT thesistype FROM archive WHERE eprintid ='$ID'"); + $query->execute; + my $numrows = $query->rows; + + my $isathesis = 0; + if ($numrows > 0) { + @row = $query->fetchrow_array; + my $type = "\L$row[0]"; + if (($type =~ "phd") || ($type =~ "rmaster")) { + $isathesis = 1; + } + } + + $query->finish; + $dbh->disconnect; + + return $isathesis; +} + +sub LookUpEprintsDate +# Lookup the year of publication for an eprint with an eprintID of $ID +{ + local ($ID, *args) = @_; + my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS); + my $query = $dbh->prepare("SELECT year FROM archive WHERE eprintid ='$ID'"); + $query->execute; + my $numrows = $query->rows; + + my $year = 0; + if ($numrows > 0) { + @row = $query->fetchrow_array; + $year = $row[0]; + } + return $year; +}; + +require WriteADTpage; + +# ------------------MAIN PROCESS----------------- + +# If the ADT directory does not exist, create it +if (!(-e $ADT_DIR)) { + mkdir($ADT_DIR, 0755) || die "Failed to create adt directory\n"; +} +chdir($ADT_DIR) || die "Failed to change working directory\n"; + +my $indexfile = ">" . $ADT_DIR . "/index.html"; +open(INDEXFILEHANDLE, $indexfile) or die "Failed to open/create file: $indexfile\n"; +print INDEXFILEHANDLE "ADT theses

 

\n"; + +# Look at all documents - if they exist and are a thesis then create a directory and an index page +$MaxID = LookUpEprintsMaxID(); +for ($n = 1; $n <= $MaxID; $n++) { + if (IsAnRHDThesis($n)) { + $SubmittedDate = LookUpEprintsDate($n); + $DirName = 'adt-' . $UNI_CODE . $SubmittedDate . '.' . sprintf("%04d", $n); + # If the Eprint ADT directory does not exist, create it + if (!(-e $DirName)) { + mkdir($DirName, 0755) || die "Failed to create adt directory\n"; + } + $FileName = $DirName . '/index.html'; + $ThesisURL = $BASE_URL . $ADT_URL . "/" . $FileName; + if (!(WriteADTpage($FileName, $n))) { + print "Missing metadata for eprint ID: $n\n"; + } + else { + print INDEXFILEHANDLE "

$DirName

\n"; + } + } +} + +print INDEXFILEHANDLE "\n"; +close INDEXFILEHANDLE;