Newer
Older
Digital_Repository / Repositories / ADT / HarvestToADT.pl
nstanger on 16 Dec 2005 2 KB - Added base ADT harvesting source.
#!/usr/bin/perl

use DBI;

require Settings;

sub LookUpEprintsMaxID
# Find out the maximum eprintid for the archive
{
	my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS);
	my $query = $dbh->prepare("SELECT eprintid FROM archive ORDER BY eprintid DESC");
	$query->execute;
	my $numrows = $query->rows;

	my $maxID = 0;
	if ($numrows > 0) {
        	@row = $query->fetchrow_array;
		$maxID = $row[0];
        }

	$query->finish;
	$dbh->disconnect;

	return $maxID;
}

sub IsAnRHDThesis
# Determine if an eprint is a RHD thesis or not
{
	local ($ID, *args) = @_;
	my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS);
	my $query = $dbh->prepare("SELECT thesistype FROM archive WHERE eprintid ='$ID'");
	$query->execute;
	my $numrows = $query->rows;

	my $isathesis = 0;
	if ($numrows > 0) {
        	@row = $query->fetchrow_array;
		my $type = "\L$row[0]";
		if (($type =~ "phd") || ($type =~ "rmaster")) {
			$isathesis = 1;
		}
        }

	$query->finish;
	$dbh->disconnect;

	return $isathesis;
}

sub LookUpEprintsDate
# Lookup the year of publication for an eprint with an eprintID of $ID
{
	local ($ID, *args) = @_;
	my $dbh = DBI->connect("DBI:mysql:$DB_NAME:$DB_HOST", $DB_USER, $DB_PASS);
	my $query = $dbh->prepare("SELECT year FROM archive WHERE eprintid ='$ID'");
	$query->execute;
	my $numrows = $query->rows;

	my $year = 0;
	if ($numrows > 0) {
        	@row = $query->fetchrow_array;
		$year = $row[0];
        }
	return $year;
};

require WriteADTpage;

# ------------------MAIN PROCESS-----------------

# If the ADT directory does not exist, create it
if (!(-e $ADT_DIR)) {
	mkdir($ADT_DIR, 0755) || die "Failed to create adt directory\n";
}
chdir($ADT_DIR) || die "Failed to change working directory\n";

my $indexfile = ">" . $ADT_DIR . "/index.html";
open(INDEXFILEHANDLE, $indexfile) or die "Failed to open/create file: $indexfile\n";
print INDEXFILEHANDLE "<html><head><title>ADT theses</title></head><body><p>&nbsp;</p>\n";

# Look at all documents - if they exist and are a thesis then create a directory and an index page
$MaxID = LookUpEprintsMaxID();
for ($n = 1; $n <= $MaxID; $n++) {
	if (IsAnRHDThesis($n)) {
		$SubmittedDate = LookUpEprintsDate($n);
		$DirName = 'adt-' . $UNI_CODE . $SubmittedDate . '.' . sprintf("%04d", $n);
		# If the Eprint ADT directory does not exist, create it
		if (!(-e $DirName)) {
			mkdir($DirName, 0755) || die "Failed to create adt directory\n";
		}
		$FileName = $DirName . '/index.html';
		$ThesisURL = $BASE_URL . $ADT_URL . "/" . $FileName;
		if (!(WriteADTpage($FileName, $n))) {
			print "Missing metadata for eprint ID: $n\n";
		}
		else {
			print INDEXFILEHANDLE "<p><a href=$ThesisURL>$DirName</a></p>\n";
		}
	}
}

print INDEXFILEHANDLE "</body></html>\n";
close INDEXFILEHANDLE;