GitBucket
4.21.2
Toggle navigation
Snippets
Sign in
Files
Branches
1
Releases
Issues
Pull requests
Labels
Priorities
Milestones
Wiki
Forks
nigel.stanger
/
INFORMS
Browse code
Fixed typo (#57)
master
1 parent
c286f2e
commit
58bb14ba3901bc0ce15694286d916d482aebde9b
Nigel Stanger
authored
on 9 Mar
Patch
Showing
1 changed file
load_enrolment_data.pl
Ignore Space
Show notes
View
load_enrolment_data.pl
#!/usr/local/bin/perl -w ################################################################################ # # Parse data extracted from the Business Objects Course Approval Status (CARPT004) # and Class List (TTRPT02) reports and load the data into the database. # # Usage: see print_usage() below. # # Requirements: # * The papers, course approval status and class list data files generated # by the clean_BO_reports.pl script from the CARPT004 and TTRPT02 Business # Objects reports. # # * A correctly set up results database in PostgreSQL (see results_db.sql). # # * DBI and DBD::Pg for accessing the database. # # * Standard header boilerplate for UTF-8 initialisation. # # Notes (TODO): # * Look at using Pod::Usage for help output. # ################################################################################ use strict; use utf8; use open IO => ':encoding(utf8)'; use open ':std'; use DBI qw( :sql_types ); use DBD::Pg qw( :pg_types ); use Getopt::Long; use File::Basename; # Process the command line switches. my $help = 0; my $classlist_filename = ''; my $courseapproval_filename = ''; my $paperdetails_filename = ''; my $mode = 'start'; my $database_name = 'informs'; GetOptions( 'help|h' => \$help, # TODO: add a --papers-only option? # 'papers|p=s' => \$paperdetails_filename, 'mode|m=s' => \$mode, 'database|d=s' => \$database_name, ); !$help or print_usage(); # --mode defaults to "start" and therefore existence doesn't need to be tested for, # but we do need to check for correct values. ( $mode eq 'start' ) || ( $mode eq 'cutoff' ) || ( $mode eq 'end') || ( $mode eq 'debug' ) or print_usage(1); $database_name or print_usage(1); # Try to connect to the database. my $DBConnection = DBI->connect( sprintf( 'dbi:Pg:dbname=%s', $database_name ), 'nstanger', 'bl0bby', { AutoCommit => 0, pg_enable_utf8 => 1 } ) or die "ERROR: couldn't connect to database '${database_name}'.\n" . $DBI::errstr . "\n"; # Set up some counters. my %students_checked = ( ); # List of students already checked. my $students_added = 0; # Number of students inserted into database. my $students_updated = 0; # Number of students whose details were updated. my $students_unchanged = 0; # Number of students whose details haven't changed. my $enrolments_processed = 0; # Number of enrolments we actually processed. my $enrolments_added = 0; # Number of new enrolments. my $enrolments_updated = 0; # Number of enrolments that were updated. my $enrolments_ignored = 0; # Number of enrolments ignored (e.g., interest only). my $papers_added = 0; # Number of papers inserted into database. # Miscellaneous SQL stuff. my $Result; # Lists of SQL statements, the data types of their parameters and their # prepared statement handles. my %SQL_statements = ( 'fetch_paperdetails' => { 'sql' => 'SELECT DISTINCT P.Paper_Code, P.Year, CA.Period, P.Paper_Title, P.Points, CA.Location FROM Stage_Paper_Details P INNER JOIN Stage_Course_Approval CA USING ( Year, Paper_Code )', 'parameter_types' => [], 'handle' => undef, }, 'check_paper_exists' => { 'sql' => 'SELECT P.Paper_ID FROM Paper P WHERE ( P.Paper_ID = ? )', 'parameter_types' => [ SQL_VARCHAR ], 'handle' => undef, }, 'insert_paper' => { 'sql' => 'INSERT INTO Paper ( Paper_Code, Year, Period_Code, Title, Points, Location_Code, Status_Code, Map_Code ) VALUES ( ?, ?, ?, ?, ?, ( SELECT L.Location_Code FROM Location L WHERE ( L.Location_Name = ? ) ), DEFAULT, DEFAULT )', 'parameter_types' => [ SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR ], 'handle' => undef, }, # The class list (TTRPT02) should be the canonical version of who's currently enrolled in the paper. # The course approval list (CARPT004) may have extra people who've since withdrawn, which will be # be picked up by the outer join. There may also be very late withdrawals that turn up in both. # # Special_Enrolment and Enrolment_Status are often null, so coalesce them to simplify Perl processing. # Also coalesce the Programme and Stage_Class_List columns, as they may be null due to the outer join # and will otherwise cause downstream problems with uninitialised variables later in the script. 'fetch_enrolment' => { 'sql' => "select distinct CA.Student_ID, CA.SPR_Code, CA.Surname, CA.Given_Name, coalesce(CL.Other_Names, '') as Other_Names, coalesce(CL.Gender, '!') as Gender, CA.Residency, coalesce(CA.Special_Enrolment, '') as Special_Enrolment, coalesce(CA.Enrolment_Status, '') as Enrolment_Status, (select Mode_Code from Attendance_Mode where Description = Attendance_Mode) as Mode_Code, coalesce(P.Programme_Code, '!!!!') as Programme_Code, (select Specialisation_Code from Specialisation where Title = CA.Major1) as Major1, (select Specialisation_Code from Specialisation where Title = CA.Major2) as Major2, (select Specialisation_Code from Specialisation where Title = CA.Major3) as Major3, (select Specialisation_Code from Specialisation where Title = CA.Minor1) as Minor1, (select Specialisation_Code from Specialisation where Title = CA.Minor2) as Minor2, (select Specialisation_Code from Specialisation where Title = CA.Minor3) as Minor3 from Stage_Class_List CL inner join Programme P on (CL.Programme = P.Title) right outer join Stage_Course_Approval CA using (Paper_Code, Student_ID) where (Paper_Code = ?) and (Period = ?) and (Year = ?)", 'parameter_types' => [ SQL_VARCHAR, SQL_VARCHAR, SQL_SMALLINT, ], 'handle' => undef, }, 'check_student_exists' => { 'sql' => 'SELECT S.Surname, S.Othernames, S.Gender, S.International FROM Student S WHERE ( S.Student_ID = ? )', 'parameter_types' => [ SQL_INTEGER ], 'handle' => undef, }, 'check_unknown_programme' => { 'sql' => 'select CA.Student_ID, CL.Programme from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CL.Programme not in ( select Title from Programme )', 'parameter_types' => [], 'handle' => undef, }, # bleh 'check_unknown_specialisation' => { 'sql' => "select CA.Student_ID, CA.major1 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.major1 <> '' and CA.major1 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.major2 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.major2 <> '' and CA.major2 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.major3 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.major3 <> '' and CA.major3 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.minor1 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.minor1 <> '' and CA.minor1 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.minor2 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.minor2 <> '' and CA.minor2 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.minor3 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.minor3 <> '' and CA.minor3 not in ( select Title from Specialisation )", 'parameter_types' => [], 'handle' => undef, }, 'update_student' => { 'sql' => 'UPDATE Student SET Surname = ?, Othernames = ?, Gender = ? WHERE ( Student_ID = ? )', 'parameter_types' => [ SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_INTEGER ], 'handle' => undef, }, 'insert_student' => { 'sql' => 'INSERT INTO Student ( Student_ID, Surname, Othernames, Gender, International ) VALUES ( ?, ?, ?, ?, ? )', 'parameter_types' => [ SQL_INTEGER, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, PG_BOOL ], 'handle' => undef, }, 'check_enrolment_exists' => { 'sql' => 'SELECT E.Enrolled_At_Start, E.Enrolled_At_Cutoff, E.Enrolled_At_End, E.Terms_Carried_Over, E.Programme_Number, E.Mode_Code, E.Programme_Code, E.Major1, E.Major2, E.Major3, E.Minor1, E.Minor2, E.Minor3 FROM Enrolment E WHERE ( E.Student_ID = ? ) AND ( E.Paper_Code = ? ) AND ( E.Year = ? ) AND ( E.Period_Code = ? )', 'parameter_types' => [ SQL_INTEGER, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR ], 'handle' => undef, }, 'update_enrolment' => { 'sql' => 'UPDATE Enrolment SET Enrolled_At_Start = ?, Enrolled_At_Cutoff = ?, Enrolled_At_End = ?, Terms_Carried_Over = ?, Programme_Number = ?, Mode_Code = ?, Programme_Code = ?, Major1 = ?, Major2 = ?, Major3 = ?, Minor1 = ?, Minor2 = ?, Minor3 = ? WHERE ( Student_ID = ? ) AND ( Paper_Code = ? ) AND ( Year = ? ) AND ( Period_Code = ? )', 'parameter_types' => [ PG_BOOL, PG_BOOL, PG_BOOL, PG_BOOL, SQL_SMALLINT, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_INTEGER, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR ], 'handle' => undef, }, 'insert_enrolment' => { 'sql' => 'INSERT INTO Enrolment ( Student_ID, Paper_Code, Year, Period_Code, Enrolled_At_Start, Enrolled_At_Cutoff, Enrolled_At_End, Terms_Carried_Over, Programme_Number, Mode_Code, Programme_Code, Major1, Major2, Major3, Minor1, Minor2, Minor3 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )', 'parameter_types' => [ SQL_INTEGER, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR, PG_BOOL, PG_BOOL, PG_BOOL, PG_BOOL, SQL_SMALLINT, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR ], 'handle' => undef, }, ); foreach my $statement ( keys %SQL_statements ) { $SQL_statements{$statement}{'handle'} = $DBConnection->prepare( $SQL_statements{$statement}{'sql'} ); my @parameter_types = @{$SQL_statements{$statement}{'parameter_types'}}; for ( my $i = 0; $i < scalar( @{$SQL_statements{$statement}{'parameter_types'}} ); $i++ ) { $SQL_statements{$statement}{'handle'}->bind_param( $i + 1, undef, { TYPE => @{$SQL_statements{$statement}{'parameter_types'}}[$i] } ); } } # Go! $SQL_statements{'fetch_paperdetails'}{'handle'}->execute() or die "\nERROR: failed to retrieve paper details from staging tables (execute).\n" . $SQL_statements{'fetch_paperdetails'}{'handle'}->errstr . "\n"; my $paper_code; my $year_offered; my $period; my $paper_title; my $points; my $location_name; # The following are handled by SQL defaults: # my $paper_status = 'A'; # my $map_code = 1; $SQL_statements{'fetch_paperdetails'}{'handle'}->bind_columns( \$paper_code, \$year_offered, \$period, \$paper_title, \$points, \$location_name ); while ( my $paper_row = $SQL_statements{'fetch_paperdetails'}{'handle'}->fetch ) { # The complete ID for the paper, e.g., COMP111_S1_2006. my $paper_id = join( '_', $paper_code, $period, $year_offered ); # 2011-06-30 NJS: It no longer appears necessary to escape quote characters # by doubling them in strings destined for SQL. I can't find any specific # documentation to this effect, but DBD::Pg now appears to be doing this # automatically. Better keep this code here commented, just in case. # # Convert any ' characters in the names to '', otherwise SQL will barf. # $student_surname =~ s/'/''/g; # $student_othernames =~ s/'/''/g; print <<EOT ============================================================ Paper $paper_id ============================================================ EOT ; if ( $mode eq 'debug' ) { print <<EOT DEBUG: Paper code: $paper_code DEBUG: Year: $year_offered DEBUG: Period: $period DEBUG: Paper ID: $paper_id DEBUG: Title: $paper_title DEBUG: Points: $points DEBUG: Location (name): $location_name EOT ; } # Check whether the paper is already in the database. $SQL_statements{'check_paper_exists'}{'handle'}->execute( $paper_id ) or die "\nERROR: failed to check existence of paper (execute).\n" . $SQL_statements{'check_paper_exists'}{'handle'}->errstr . "\n"; $Result = $SQL_statements{'check_paper_exists'}{'handle'}->fetch; ( !$SQL_statements{'check_paper_exists'}{'handle'}->err ) or die "\nERROR: failed to check existence of paper (fetch).\n" . $SQL_statements{'check_paper_exists'}{'handle'}->errstr . "\n"; # Nope. if ( !defined( $Result ) ) { print "\nCould not locate paper $paper_id in the database.\n"; # The paper isn't already in the database, so insert it. $SQL_statements{'insert_paper'}{'handle'}->execute( $paper_code, $year_offered, $period, $paper_title, $points, $location_name ) or die "\nERROR: failed to insert paper into database.\n" . $SQL_statements{'insert_paper'}{'handle'}->errstr . "\n"; print "inserted\n"; $papers_added++; } # Process student enrolment details. $SQL_statements{'fetch_enrolment'}{'handle'}->execute( $paper_code, $period, $year_offered ) or die "\nERROR: failed to retrieve enrolment details for ${paper_id} (execute).\n" . $SQL_statements{'fetch_enrolment'}{'handle'}->errstr . "\n"; my $student_id; my $SPR_code; my $surname; my $first_name; my $othernames; my $gender; my $attendance_mode; my $programme_code; my $residency; my $special_enrolment; my $enrolment_status; my @majors = (undef, undef, undef); my @minors = (undef, undef, undef); $SQL_statements{'fetch_enrolment'}{'handle'}->bind_columns(\$student_id, \$SPR_code, \$surname, \$first_name, \$othernames, \$gender, \$residency, \$special_enrolment, \$enrolment_status, \$attendance_mode, \$programme_code, \$majors[0], \$majors[1], \$majors[2], \$minors[0], \$minors[1], \$minors[2]); while ( my $enrolment_row = $SQL_statements{'fetch_enrolment'}{'handle'}->fetch ) { $enrolments_processed++; # Not everyone has other names beyond the first. There might even # be people who don't have a first name, but I haven't hit any so far. # Let's be paranoid anyway :). $first_name = '' if ( !defined( $first_name ) ); $othernames = '' if ( !defined( $othernames ) ); $surname = '' if (!defined($surname)); $othernames = join( ' ', $first_name, $othernames ); $othernames =~ s/(^ +)|( +$)//g; $othernames =~ s/,//g; # The report separates them with commas?? Weird. $SPR_code = '' if (!defined($SPR_code)); my ( undef, $programme_number ) = split( /\//, $SPR_code ); $programme_code = "!!!!" if (!defined($programme_code)); if ($programme_code eq "!!!!") { print "\nWARNING: programme missing for student $student_id (possible missing data).\n\n"; } for (my $i = 1; $i <= 3; $i++) { print "\nWARNING: major $i specialisation missing for student $student_id (possible missing data).\n\n" if (defined($majors[$i-1]) && $majors[$i-1] eq "!!!!"); print "\nWARNING: minor $i specialisation missing for student $student_id (possible missing data).\n\n" if (defined($minors[$i-1]) && $minors[$i-1] eq "!!!!"); } # Various flags. $residency = '' if (!defined($residency)); my $international = ( $residency eq 'International Student' ); my $interest_only = ( $special_enrolment =~ /IO/ ); my $FEO = ( $special_enrolment =~ /FEO/ ); # We may find late withdrawals in both lists, but their enrolment status will start with 'W-', # or sometimes 'S-' (withdrawn under special/exceptional circumstances). my $withdrawn = ( $enrolment_status =~ /^[WS]-/ ); $gender = '!' if (!defined($gender)); $gender = substr( $gender, 0, 1 ); if ( $mode eq 'debug' ) { print <<EOT ---------------------------------------- DEBUG: Student ID: $student_id DEBUG: Surname: $surname DEBUG: Other names: $othernames DEBUG: Programme number: $programme_number DEBUG: Gender: $gender DEBUG: International: $international DEBUG: Interest only: $interest_only DEBUG: Final exam only: $FEO DEBUG: Enrolment status: $enrolment_status DEBUG: Withdrawn: $withdrawn DEBUG: Programme code: $programme_code EOT ; # Majors/minors may not be defined, which spews errors all over the debug output. foreach my $spectype ("Major", "Minor") { my @speclist = ($spectype eq "Major") ? @majors : @minors; for (my $i = 1; $i <= 3; $i++) { print "DEBUG: $spectype $i: "; if (defined($speclist[$i-1])) { print $speclist[$i-1]; } else { print "[undefined]"; } print "\n"; } } } # Keep a track of which students we've already checked, so that we don't # do any more than necessary (especially hit the database). if ( !defined( $students_checked{$student_id} ) ) { my ($old_surname, $old_othernames, $old_gender, $old_international) = ('', '', '', ''); $students_checked{$student_id} = 1; print "Student $student_id: $surname, $othernames..."; # Try inserting them into the database. First, check whether they're already in there... $SQL_statements{'check_student_exists'}{'handle'}->execute( $student_id ) or die "\nERROR: failed to check existence of student (execute).\n" . $SQL_statements{'check_student_exists'}{'handle'}->errstr . "\n"; $SQL_statements{'check_student_exists'}{'handle'}->bind_columns(\$old_surname, \$old_othernames, \$old_gender, \$old_international); $Result = $SQL_statements{'check_student_exists'}{'handle'}->fetch; ( !$SQL_statements{'check_student_exists'}{'handle'}->err ) or die "\nERROR: failed to check existence of student (fetch).\n" . $SQL_statements{'check_student_exists'}{'handle'}->errstr . "\n"; # Yep. if ( defined( $Result ) ) { # Check whether their name has changed. The $old_xxx variables are # automatically set by the fetch above. if ((($surname ne '') && ($othernames ne '') && ($gender ne '')) && (($surname ne $old_surname) || ($othernames ne $old_othernames) || ($gender ne $old_gender))) { # Update them if they have changed. Rather than mess around # trying to figure out which columns to update, just do the # lot. It's probably no less efficient, and a lot easier to # code. $SQL_statements{'update_student'}{'handle'}->execute($surname, $othernames, $gender, $student_id) or die "\nERROR: failed trying to update student details.\n" . $SQL_statements{'update_student'}{'handle'}->errstr . "\n"; print "existing entry updated\n"; if ( $mode eq 'debug' ) { print "DEBUG: Surname: old = $old_surname, new = $surname\n"; print "DEBUG: Othernames: old = $old_othernames, new = $othernames\n"; } $students_updated++; } else { print "already in database\n"; $students_unchanged++; } } else # The student isn't already in the database, so insert them. { $SQL_statements{'insert_student'}{'handle'}->execute( $student_id, $surname, $othernames, $gender, make_boolean( $international ) ) or die "\nERROR: failed to insert student into database.\n" . $SQL_statements{'insert_student'}{'handle'}->errstr . "\n"; print "inserted\n"; $students_added++; } } print " enrolling student $student_id in $paper_id..."; # Skip withdrawn and interest only students, UNLESS this is the "end" run. It's fairly common for students to withdraw under # exceptional circumstances after the initial "end" run. This means they will initially be listed as enrolled_at_end, but if # you run this again with an updated input file, ignoring withdrawn students will mean that enrolled_at_end won't change! if ( $withdrawn ) { print "withdrawn\n"; if ($mode ne 'end') { $enrolments_ignored++; next; } } elsif ( $interest_only ) { print "interest only\n"; $enrolments_ignored++; next; } # Notify FEO enrolments, but keep them, as they are actual enrolments. if ($FEO and !$withdrawn) { print "(final examination only) "; } # Now try enrolling them in the paper. If they're not already enrolled, # all of the enrolment flags will be false. If they *are* already # enrolled, we can get the existing enrolment flags from the database. # In either case, we just update the corresponding enrolment flag # according to the value of $mode. This means that we can just # insert/update *all* of the flag values, which saves us having # to dynamically generate SQL to update the correct flag. my %enrolment_flags = ( 'start' => 'false', 'cutoff' => 'false', 'end' => 'false', ); $SQL_statements{'check_enrolment_exists'}{'handle'}->execute( $student_id, $paper_code, $year_offered, $period ) or die "\nERROR: failed to check existence of enrolment (execute).\n" . $SQL_statements{'check_enrolment_exists'}{'handle'}->errstr . "\n"; my $existing_FEO; my $existing_programme_number; my $existing_attendance_mode; my $existing_programme_code; my @existing_majors = (undef, undef, undef); my @existing_minors = (undef, undef, undef); $SQL_statements{'check_enrolment_exists'}{'handle'}->bind_columns( \$enrolment_flags{'start'}, \$enrolment_flags{'cutoff'}, \$enrolment_flags{'end'}, \$existing_FEO, \$existing_programme_number, \$existing_attendance_mode, \$existing_programme_code, \$existing_majors[0], \$existing_majors[1], \$existing_majors[2], \$existing_minors[0], \$existing_minors[1], \$existing_minors[2] ); $Result = $SQL_statements{'check_enrolment_exists'}{'handle'}->fetch; ( !$SQL_statements{'check_enrolment_exists'}{'handle'}->err ) or die "\nERROR: failed to check existence of enrolment (fetch).\n" . $SQL_statements{'check_enrolment_exists'}{'handle'}->errstr . "\n"; if ( defined ( $Result ) ) { if ($withdrawn) { $enrolment_flags{$mode} = 'false'; # The computed values may be missing, and any changes aren't relevant for a withdrawn student anyway, # so just reset them to what's already in the database. $FEO = $existing_FEO; $programme_number = $existing_programme_number; $attendance_mode = $existing_attendance_mode; $programme_code = $existing_programme_code; @majors = @existing_majors; @minors = @existing_minors; } else { $enrolment_flags{$mode} = 'true'; } # print "DEBUG: enrolled at start = $enrolment_flags{'start'}\n"; # print "DEBUG: enrolled at cutoff = $enrolment_flags{'cutoff'}\n"; # print "DEBUG: enrolled at end = $enrolment_flags{'end'}\n"; # Update the withdrawal flags according to the value of $mode. # Also update the non-withdrawal flags and programme details, just in case something changes. $SQL_statements{'update_enrolment'}{'handle'}->execute( $enrolment_flags{'start'}, $enrolment_flags{'cutoff'}, $enrolment_flags{'end'}, make_boolean( $FEO ), $programme_number, $attendance_mode, $programme_code, $majors[0], $majors[1], $majors[2], $minors[0], $minors[1], $minors[2], $student_id, $paper_code, $year_offered, $period ) or die "\nERROR: failed trying to update enrolment details.\n" . $SQL_statements{'update_enrolment'}{'handle'}->errstr . "\n"; print "updated\n" if (!$withdrawn); $enrolments_updated++; } else { $enrolment_flags{$mode} = 'true' if (!$withdrawn); # The student isn't already enrolled, so insert a # new row and set the withdrawal flags appropriately. $SQL_statements{'insert_enrolment'}{'handle'}->execute( $student_id, $paper_code, $year_offered, $period, $enrolment_flags{'start'}, $enrolment_flags{'cutoff'}, $enrolment_flags{'end'}, make_boolean( $FEO ), $programme_number, $attendance_mode, $programme_code, $majors[0], $majors[1], $majors[2], $minors[0], $minors[1], $minors[2] ) or die "\nERROR: failed trying to insert enrolment row.\n" . $SQL_statements{'insert_enrolment'}{'handle'}->errstr . "\n"; print "added\n" if (!$withdrawn); $enrolments_added++; } } } # SANITY CHECKS # Unrecognised programme (not in database) my $student_id; my $programme; $SQL_statements{'check_unknown_programme'}{'handle'}->execute() or die "\nERROR: failed to check for unknown programmes in staging tables (execute).\n" . $SQL_statements{'check_unknown_programme'}{'handle'}->errstr . "\n"; $SQL_statements{'check_unknown_programme'}{'handle'}->bind_columns(\$student_id, \$programme); while ( my $paper_row = $SQL_statements{'check_unknown_programme'}{'handle'}->fetch ) { print "\nWARNING: programme " . $programme . " for student " . $student_id . " does not exist in database.\n" } # Unrecognised specialisation (not in database) my $specialisation; $SQL_statements{'check_unknown_specialisation'}{'handle'}->execute() or die "\nERROR: failed to check for unknown specialisations in staging tables (execute).\n" . $SQL_statements{'check_unknown_specialisation'}{'handle'}->errstr . "\n"; $SQL_statements{'check_unknown_specialisation'}{'handle'}->bind_columns(\$student_id, \$specialisation); while ( my $paper_row = $SQL_statements{'check_unknown_specialisation'}{'handle'}->fetch ) { print "\nWARNING: specialisation '" . $specialisation . "' for student " . $student_id . " does not exist in database.\n" } # Check for malformed Unicode? (issue #51) $DBConnection->commit; foreach my $statement ( keys %SQL_statements ) { $SQL_statements{$statement}{'handle'}->finish; } $DBConnection->disconnect; print <<EOT ============================================================ SUMMARY EOT ; printf "Papers added: %4d\n", $papers_added; print "Students:\n"; printf " New %4d\n", $students_added; printf " Updated %4d\n", $students_updated; printf " Unchanged %4d (=> total %4d)\n", $students_unchanged, ( $students_added + $students_updated + $students_unchanged ); print "Enrolments:\n"; printf " Processed %4d\n", $enrolments_processed; printf " New %4d\n", $enrolments_added; printf " Updated %4d\n", $enrolments_updated; printf " Ignored %4d (=> total %4d)\n", $enrolments_ignored, ( $enrolments_added + $enrolments_updated + $enrolments_ignored ); ##### END ##### sub get_input { my( $regexp, $prompt, $default, $suffix ) = @_; $prompt = "" if( !defined $prompt); for(;;) { print $prompt; if( defined $default ) { print " [$default]"; } print "$suffix "; my $in = <STDIN>; chomp $in; if( $in eq "" && defined $default ) { return $default; } if( $in=~m/^$regexp$/ ) { return $in; } else { print "Bad input, try again.\n"; } } } # Convert a boolean value into the equivalent string for SQL (i.e., true => # 'true', false => 'false', undef => undef => null in SQL). sub make_boolean { my $value = shift; if ( defined( $value ) ) { return 'true' if ( $value ); return 'false'; } return undef; } # Print command line usage. sub print_usage { my $exit_status = shift // 0; my $executable = fileparse( $0, '.pl' ); print <<EOD Usage: ${executable} [options] Loads a staged data set including paper details, student details, and enrolments in papers into the live tables. Options: --mode/-m <mode> process input file as specified by <mode>: start: load data; set enrolment flags for start of paper [default] cutoff: load data; set enrolment flags for withdrawal cutoff date end: load data; set enrolment flags for end of paper debug: load data; no enrolment flags, print debugging output --database/-d <db> name of database to load the data into [default 'informs'] --help/-h this text EOD ; exit $exit_status; }
#!/usr/local/bin/perl -w ################################################################################ # # Parse data extracted from the Business Objects Course Approval Status (CARPT004) # and Class List (TTRPT02) reports and load the data into the database. # # Usage: see print_usage() below. # # Requirements: # * The papers, course approval status and class list data files generated # by the clean_BO_reports.pl script from the CARPT004 and TTRPT02 Business # Objects reports. # # * A correctly set up results database in PostgreSQL (see results_db.sql). # # * DBI and DBD::Pg for accessing the database. # # * Standard header boilerplate for UTF-8 initialisation. # # Notes (TODO): # * Look at using Pod::Usage for help output. # ################################################################################ use strict; use utf8; use open IO => ':encoding(utf8)'; use open ':std'; use DBI qw( :sql_types ); use DBD::Pg qw( :pg_types ); use Getopt::Long; use File::Basename; # Process the command line switches. my $help = 0; my $classlist_filename = ''; my $courseapproval_filename = ''; my $paperdetails_filename = ''; my $mode = 'start'; my $database_name = 'informs'; GetOptions( 'help|h' => \$help, # TODO: add a --papers-only option? # 'papers|p=s' => \$paperdetails_filename, 'mode|m=s' => \$mode, 'database|d=s' => \$database_name, ); !$help or print_usage(); # --mode defaults to "start" and therefore existence doesn't need to be tested for, # but we do need to check for correct values. ( $mode eq 'start' ) || ( $mode eq 'cutoff' ) || ( $mode eq 'end') || ( $mode eq 'debug' ) or print_usage(1); $database_name or print_usage(1); # Try to connect to the database. my $DBConnection = DBI->connect( sprintf( 'dbi:Pg:dbname=%s', $database_name ), 'nstanger', 'bl0bby', { AutoCommit => 0, pg_enable_utf8 => 1 } ) or die "ERROR: couldn't connect to database '${database_name}'.\n" . $DBI::errstr . "\n"; # Set up some counters. my %students_checked = ( ); # List of students already checked. my $students_added = 0; # Number of students inserted into database. my $students_updated = 0; # Number of students whose details were updated. my $students_unchanged = 0; # Number of students whose details haven't changed. my $enrolments_processed = 0; # Number of enrolments we actually processed. my $enrolments_added = 0; # Number of new enrolments. my $enrolments_updated = 0; # Number of enrolments that were updated. my $enrolments_ignored = 0; # Number of enrolments ignored (e.g., interest only). my $papers_added = 0; # Number of papers inserted into database. # Miscellaneous SQL stuff. my $Result; # Lists of SQL statements, the data types of their parameters and their # prepared statement handles. my %SQL_statements = ( 'fetch_paperdetails' => { 'sql' => 'SELECT DISTINCT P.Paper_Code, P.Year, CA.Period, P.Paper_Title, P.Points, CA.Location FROM Stage_Paper_Details P INNER JOIN Stage_Course_Approval CA USING ( Year, Paper_Code )', 'parameter_types' => [], 'handle' => undef, }, 'check_paper_exists' => { 'sql' => 'SELECT P.Paper_ID FROM Paper P WHERE ( P.Paper_ID = ? )', 'parameter_types' => [ SQL_VARCHAR ], 'handle' => undef, }, 'insert_paper' => { 'sql' => 'INSERT INTO Paper ( Paper_Code, Year, Period_Code, Title, Points, Location_Code, Status_Code, Map_Code ) VALUES ( ?, ?, ?, ?, ?, ( SELECT L.Location_Code FROM Location L WHERE ( L.Location_Name = ? ) ), DEFAULT, DEFAULT )', 'parameter_types' => [ SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR ], 'handle' => undef, }, # The class list (TTRPT02) should be the canonical version of who's currently enrolled in the paper. # The course approval list (CARPT004) may have extra people who've since withdrawn, which will be # be picked up by the outer join. There may also be very late withdrawals that turn up in both. # # Special_Enrolment and Enrolment_Status are often null, so coalesce them to simplify Perl processing. # Also coalesce the Programme and Stage_Class_List columns, as they may be null due to the outer join # and will otherwise cause downstream problems with uninitialised variables later in the script. 'fetch_enrolment' => { 'sql' => "select distinct CA.Student_ID, CA.SPR_Code, CA.Surname, CA.Given_Name, coalesce(CL.Other_Names, '') as Other_Names, coalesce(CL.Gender, '!') as Gender, CA.Residency, coalesce(CA.Special_Enrolment, '') as Special_Enrolment, coalesce(CA.Enrolment_Status, '') as Enrolment_Status, (select Mode_Code from Attendance_Mode where Description = Attendance_Mode) as Mode_Code, coalesce(P.Programme_Code, '!!!!') as Programme_Code, (select Specialisation_Code from Specialisation where Title = CA.Major1) as Major1, (select Specialisation_Code from Specialisation where Title = CA.Major2) as Major2, (select Specialisation_Code from Specialisation where Title = CA.Major3) as Major3, (select Specialisation_Code from Specialisation where Title = CA.Minor1) as Minor1, (select Specialisation_Code from Specialisation where Title = CA.Minor2) as Minor2, (select Specialisation_Code from Specialisation where Title = CA.Minor3) as Minor3 from Stage_Class_List CL inner join Programme P on (CL.Programme = P.Title) right outer join Stage_Course_Approval CA using (Paper_Code, Student_ID) where (Paper_Code = ?) and (Period = ?) and (Year = ?)", 'parameter_types' => [ SQL_VARCHAR, SQL_VARCHAR, SQL_SMALLINT, ], 'handle' => undef, }, 'check_student_exists' => { 'sql' => 'SELECT S.Surname, S.Othernames, S.Gender, S.International FROM Student S WHERE ( S.Student_ID = ? )', 'parameter_types' => [ SQL_INTEGER ], 'handle' => undef, }, 'check_unknown_programme' => { 'sql' => 'select CA.Student_ID, CL.Programme from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CL.Programme not in ( select Title from Programme )', 'parameter_types' => [], 'handle' => undef, }, # bleh 'check_unknown_specialisation' => { 'sql' => "select CA.Student_ID, CA.major1 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.major1 <> '' and CA.major1 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.major2 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.major2 <> '' and CA.major2 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.major3 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.major3 <> '' and CA.major3 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.minor1 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.minor1 <> '' and CA.minor1 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.minor2 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.minor2 <> '' and CA.minor2 not in ( select Title from Specialisation ) union select CA.Student_ID, CA.minor3 as Specialisation from Stage_Course_Approval CA inner join Stage_Class_List CL using (Paper_Code, Student_ID) where CA.minor3 <> '' and CA.minor3 not in ( select Title from Specialisation )", 'parameter_types' => [], 'handle' => undef, }, 'update_student' => { 'sql' => 'UPDATE Student SET Surname = ?, Othernames = ?, Gender = ? WHERE ( Student_ID = ? )', 'parameter_types' => [ SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_INTEGER ], 'handle' => undef, }, 'insert_student' => { 'sql' => 'INSERT INTO Student ( Student_ID, Surname, Othernames, Gender, International ) VALUES ( ?, ?, ?, ?, ? )', 'parameter_types' => [ SQL_INTEGER, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, PG_BOOL ], 'handle' => undef, }, 'check_enrolment_exists' => { 'sql' => 'SELECT E.Enrolled_At_Start, E.Enrolled_At_Cutoff, E.Enrolled_At_End, E.Terms_Carried_Over, E.Programme_Number, E.Mode_Code, E.Programme_Code, E.Major1, E.Major2, E.Major3, E.Minor1, E.Minor2, E.Minor3 FROM Enrolment E WHERE ( E.Student_ID = ? ) AND ( E.Paper_Code = ? ) AND ( E.Year = ? ) AND ( E.Period_Code = ? )', 'parameter_types' => [ SQL_INTEGER, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR ], 'handle' => undef, }, 'update_enrolment' => { 'sql' => 'UPDATE Enrolment SET Enrolled_At_Start = ?, Enrolled_At_Cutoff = ?, Enrolled_At_End = ?, Terms_Carried_Over = ?, Programme_Number = ?, Mode_Code = ?, Programme_Code = ?, Major1 = ?, Major2 = ?, Major3 = ?, Minor1 = ?, Minor2 = ?, Minor3 = ? WHERE ( Student_ID = ? ) AND ( Paper_Code = ? ) AND ( Year = ? ) AND ( Period_Code = ? )', 'parameter_types' => [ PG_BOOL, PG_BOOL, PG_BOOL, PG_BOOL, SQL_SMALLINT, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_INTEGER, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR ], 'handle' => undef, }, 'insert_enrolment' => { 'sql' => 'INSERT INTO Enrolment ( Student_ID, Paper_Code, Year, Period_Code, Enrolled_At_Start, Enrolled_At_Cutoff, Enrolled_At_End, Terms_Carried_Over, Programme_Number, Mode_Code, Programme_Code, Major1, Major2, Major3, Minor1, Minor2, Minor3 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )', 'parameter_types' => [ SQL_INTEGER, SQL_VARCHAR, SQL_SMALLINT, SQL_VARCHAR, PG_BOOL, PG_BOOL, PG_BOOL, PG_BOOL, SQL_SMALLINT, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR, SQL_VARCHAR ], 'handle' => undef, }, ); foreach my $statement ( keys %SQL_statements ) { $SQL_statements{$statement}{'handle'} = $DBConnection->prepare( $SQL_statements{$statement}{'sql'} ); my @parameter_types = @{$SQL_statements{$statement}{'parameter_types'}}; for ( my $i = 0; $i < scalar( @{$SQL_statements{$statement}{'parameter_types'}} ); $i++ ) { $SQL_statements{$statement}{'handle'}->bind_param( $i + 1, undef, { TYPE => @{$SQL_statements{$statement}{'parameter_types'}}[$i] } ); } } # Go! $SQL_statements{'fetch_paperdetails'}{'handle'}->execute() or die "\nERROR: failed to retrieve paper details from staging tables (execute).\n" . $SQL_statements{'fetch_paperdetails'}{'handle'}->errstr . "\n"; my $paper_code; my $year_offered; my $period; my $paper_title; my $points; my $location_name; # The following are handled by SQL defaults: # my $paper_status = 'A'; # my $map_code = 1; $SQL_statements{'fetch_paperdetails'}{'handle'}->bind_columns( \$paper_code, \$year_offered, \$period, \$paper_title, \$points, \$location_name ); while ( my $paper_row = $SQL_statements{'fetch_paperdetails'}{'handle'}->fetch ) { # The complete ID for the paper, e.g., COMP111_S1_2006. my $paper_id = join( '_', $paper_code, $period, $year_offered ); # 2011-06-30 NJS: It no longer appears necessary to escape quote characters # by doubling them in strings destined for SQL. I can't find any specific # documentation to this effect, but DBD::Pg now appears to be doing this # automatically. Better keep this code here commented, just in case. # # Convert any ' characters in the names to '', otherwise SQL will barf. # $student_surname =~ s/'/''/g; # $student_othernames =~ s/'/''/g; print <<EOT ============================================================ Paper $paper_id ============================================================ EOT ; if ( $mode eq 'debug' ) { print <<EOT DEBUG: Paper code: $paper_code DEBUG: Year: $year_offered DEBUG: Period: $period DEBUG: Paper ID: $paper_id DEBUG: Title: $paper_title DEBUG: Points: $points DEBUG: Location (name): $location_name EOT ; } # Check whether the paper is already in the database. $SQL_statements{'check_paper_exists'}{'handle'}->execute( $paper_id ) or die "\nERROR: failed to check existence of paper (execute).\n" . $SQL_statements{'check_paper_exists'}{'handle'}->errstr . "\n"; $Result = $SQL_statements{'check_paper_exists'}{'handle'}->fetch; ( !$SQL_statements{'check_paper_exists'}{'handle'}->err ) or die "\nERROR: failed to check existence of paper (fetch).\n" . $SQL_statements{'check_paper_exists'}{'handle'}->errstr . "\n"; # Nope. if ( !defined( $Result ) ) { print "\nCould not locate paper $paper_id in the database.\n"; # The paper isn't already in the database, so insert it. $SQL_statements{'insert_paper'}{'handle'}->execute( $paper_code, $year_offered, $period, $paper_title, $points, $location_name ) or die "\nERROR: failed to insert paper into database.\n" . $SQL_statements{'insert_paper'}{'handle'}->errstr . "\n"; print "inserted\n"; $papers_added++; } # Process student enrolment details. $SQL_statements{'fetch_enrolment'}{'handle'}->execute( $paper_code, $period, $year_offered ) or die "\nERROR: failed to retrieve enrolment details for ${paper_id} (execute).\n" . $SQL_statements{'fetch_enrolment'}{'handle'}->errstr . "\n"; my $student_id; my $SPR_code; my $surname; my $first_name; my $othernames; my $gender; my $attendance_mode; my $programme_code; my $residency; my $special_enrolment; my $enrolment_status; my @majors = (undef, undef, undef); my @minors = (undef, undef, undef); $SQL_statements{'fetch_enrolment'}{'handle'}->bind_columns(\$student_id, \$SPR_code, \$surname, \$first_name, \$othernames, \$gender, \$residency, \$special_enrolment, \$enrolment_status, \$attendance_mode, \$programme_code, \$majors[0], \$majors[1], \$majors[2], \$minors[0], \$minors[1], \$minors[2]); while ( my $enrolment_row = $SQL_statements{'fetch_enrolment'}{'handle'}->fetch ) { $enrolments_processed++; # Not everyone has other names beyond the first. There might even # be people who don't have a first name, but I haven't hit any so far. # Let's be paranoid anyway :). $first_name = '' if ( !defined( $first_name ) ); $othernames = '' if ( !defined( $othernames ) ); $surname = '' if (!defined($surname)); $othernames = join( ' ', $first_name, $othernames ); $othernames =~ s/(^ +)|( +$)//g; $othernames =~ s/,//g; # The report separates them with commas?? Weird. $SPR_code = '' if (!defined($SPR_code)); my ( undef, $programme_number ) = split( /\//, $SPR_code ); $programme_code = "!!!!" if (!defined($programme_code)); if ($programme_code eq "!!!!") { print "\nWARNING: programme missing for student $student_id (possible missing data).\n\n"; } for (my $i = 1; $i <= 3; $i++) { print "\nWARNING: major $i specialisation missing for student $student_id (possible missing data).\n\n" if (defined($majors[$i-1]) && $majors[$i-1] eq "!!!!"); print "\nWARNING: minor $i specialisation missing for student $student_id (possible missing data).\n\n" if (defined($minors[$i-1]) && $minors[$i-1] eq "!!!!"); } # Various flags. $residency = '' if (!defined($residency)); my $international = ( $residency eq 'International Student' ); my $interest_only = ( $special_enrolment =~ /IO/ ); my $FEO = ( $special_enrolment =~ /FEO/ ); # We may find late withdrawals in both lists, but their enrolment status will start with 'W-', # or sometimes 'S-' (withdrawn under special/exceptional circumstances). my $withdrawn = ( $enrolment_status =~ /^[WS]-/ ); $gender = '!' if (!defined($gender)); $gender = substr( $gender, 0, 1 ); if ( $mode eq 'debug' ) { print <<EOT ---------------------------------------- DEBUG: Student ID: $student_id DEBUG: Surname: $surname DEBUG: Other names: $othernames DEBUG: Programme number: $programme_number DEBUG: Gender: $gender DEBUG: International: $international DEBUG: Interest only: $interest_only DEBUG: Final exam only: $FEO DEBUG: Enrolment status: $enrolment_status DEBUG: Withdrawn: $withdrawn DEBUG: Programme code: $programme_code EOT ; # Majors/minors may not be defined, which spews errors all over the debug output. foreach my $spectype ("Major", "Minor") { my @speclist = ($spectype eq "Major") ? @majors : @minors; for (my $i = 1; $i <= 3; $i++) { print "DEBUG: $spectype $i: "; if (defined($speclist[$i-1])) { print $speclist[$i-1]; } else { print "[undefined]"; } print "\n"; } } } # Keep a track of which students we've already checked, so that we don't # do any more than necessary (especially hit the database). if ( !defined( $students_checked{$student_id} ) ) { my ($old_surname, $old_othernames, $old_gender, $old_international) = ('', '', '', ''); $students_checked{$student_id} = 1; print "Student $student_id: $surname, $othernames..."; # Try inserting them into the database. First, check whether they're already in there... $SQL_statements{'check_student_exists'}{'handle'}->execute( $student_id ) or die "\nERROR: failed to check existence of student (execute).\n" . $SQL_statements{'check_student_exists'}{'handle'}->errstr . "\n"; $SQL_statements{'check_student_exists'}{'handle'}->bind_columns(\$old_surname, \$old_othernames, \$old_gender, \$old_international); $Result = $SQL_statements{'check_student_exists'}{'handle'}->fetch; ( !$SQL_statements{'check_student_exists'}{'handle'}->err ) or die "\nERROR: failed to check existence of student (fetch).\n" . $SQL_statements{'check_student_exists'}{'handle'}->errstr . "\n"; # Yep. if ( defined( $Result ) ) { # Check whether their name has changed. The $old_xxx variables are # automatically set by the fetch above. if ((($surname ne '') && ($othernames ne '') && ($gender ne '')) && (($surname ne $old_surname) || ($othernames ne $old_othernames) || ($gender ne $old_gender))) { # Update them if they have changed. Rather than mess around # trying to figure out which columns to update, just do the # lot. It's probably no less efficient, and a lot easier to # code. $SQL_statements{'update_student'}{'handle'}->execute($surname, $othernames, $gender, $student_id) or die "\nERROR: failed trying to update student details.\n" . $SQL_statements{'update_student'}{'handle'}->errstr . "\n"; print "existing entry updated\n"; if ( $mode eq 'debug' ) { print "DEBUG: Surname: old = $old_surname, new = $surname\n"; print "DEBUG: Othernames: old = $old_othernames, new = $othernames\n"; } $students_updated++; } else { print "already in database\n"; $students_unchanged++; } } else # The student isn't already in the database, so insert them. { $SQL_statements{'insert_student'}{'handle'}->execute( $student_id, $surname, $othernames, $gender, make_boolean( $international ) ) or die "\nERROR: failed to insert student into database.\n" . $SQL_statements{'insert_student'}{'handle'}->errstr . "\n"; print "inserted\n"; $students_added++; } } print " enrolling student $student_id in $paper_id..."; # Skip withdrawn and interest only students, UNLESS this is the "end" run. It's fairly common for students to withdraw under # exceptional circumstances after the initial "end" run. This means they will initially be listed as enrolled_at_end, but if # you run this again with an updated input file, ignoring withdrawn students will mean that enrolled_at_end won't change! if ( $withdrawn ) { print "withdrawn\n"; if ($mode ne 'end') { $enrolments_ignored++; next; } } elsif ( $interest_only ) { print "interest only\n"; $enrolments_ignored++; next; } # Notify FEO enrolments, but keep them, as they are actual enrolments. if ($FEO and !$withdrawn) { print "(final examination only) "; } # Now try enrolling them in the paper. If they're not already enrolled, # all of the enrolment flags will be false. If they *are* already # enrolled, we can get the existing enrolment flags from the database. # In either case, we just update the corresponding enrolment flag # according to the value of $mode. This means that we can just # insert/update *all* of the flag values, which saves us having # to dynamically generate SQL to update the correct flag. my %enrolment_flags = ( 'start' => 'false', 'cutoff' => 'false', 'end' => 'false', ); $SQL_statements{'check_enrolment_exists'}{'handle'}->execute( $student_id, $paper_code, $year_offered, $period ) or die "\nERROR: failed to check existence of enrolment (execute).\n" . $SQL_statements{'check_enrolment_exists'}{'handle'}->errstr . "\n"; my $existing_FEO; my $existing_programme_number; my $existing_attendance_mode; my $existing_programme_code; my @existing_majors = (undef, undef, undef); my @existing_minors = (undef, undef, undef); $SQL_statements{'check_enrolment_exists'}{'handle'}->bind_columns( \$enrolment_flags{'start'}, \$enrolment_flags{'cutoff'}, \$enrolment_flags{'end'}, \$existing_FEO, \$existing_programme_number, \$existing_attendance_mode, \$existing_programme_code, \$existing_majors[0], \$existing_majors[1], \$existing_majors[2], \$existing_minors[0], \$existing_minors[1], \$existing_minors[2] ); $Result = $SQL_statements{'check_enrolment_exists'}{'handle'}->fetch; ( !$SQL_statements{'check_enrolment_exists'}{'handle'}->err ) or die "\nERROR: failed to check existence of enrolment (fetch).\n" . $SQL_statements{'check_enrolment_exists'}{'handle'}->errstr . "\n"; if ( defined ( $Result ) ) { if ($withdrawn) { $enrolment_flags{$mode} = 'false'; # The computed values may be missing, and any changes aren't relevant for a withdrawn student anyway, # so just reset them to what's already in the database. $FEO = $existing_FEO; $programme_number = $existing_programme_number; $attendance_mode = $existing_attendance_mode; $programme_code = $existing_programme_code; @majors = @existing_majors; @minors = @existing_minors; } else { $enrolment_flags{$mode} = 'true'; } # print "DEBUG: enrolled at start = $enrolment_flags{'start'}\n"; # print "DEBUG: enrolled at cutoff = $enrolment_flags{'cutoff'}\n"; # print "DEBUG: enrolled at end = $enrolment_flags{'end'}\n"; # Update the withdrawal flags according to the value of $mode. # Also update the non-withdrawal flags and programme details, just in case something changes. $SQL_statements{'update_enrolment'}{'handle'}->execute( $enrolment_flags{'start'}, $enrolment_flags{'cutoff'}, $enrolment_flags{'end'}, make_boolean( $FEO ), $programme_number, $attendance_mode, $programme_code, $majors[0], $majors[1], $majors[2], $minors[0], $minors[1], $minors[2], $student_id, $paper_code, $year_offered, $period ) or die "\nERROR: failed trying to update enrolment details.\n" . $SQL_statements{'update_enrolment'}{'handle'}->errstr . "\n"; print "updated\n" if (!$withdrawn); $enrolments_updated++; } else { if ($!withdrawn) { $enrolment_flags{$mode} = 'true'; } # The student isn't already enrolled, so insert a # new row and set the withdrawal flags appropriately. $SQL_statements{'insert_enrolment'}{'handle'}->execute( $student_id, $paper_code, $year_offered, $period, $enrolment_flags{'start'}, $enrolment_flags{'cutoff'}, $enrolment_flags{'end'}, make_boolean( $FEO ), $programme_number, $attendance_mode, $programme_code, $majors[0], $majors[1], $majors[2], $minors[0], $minors[1], $minors[2] ) or die "\nERROR: failed trying to insert enrolment row.\n" . $SQL_statements{'insert_enrolment'}{'handle'}->errstr . "\n"; print "added\n" if (!$withdrawn); $enrolments_added++; } } } # SANITY CHECKS # Unrecognised programme (not in database) my $student_id; my $programme; $SQL_statements{'check_unknown_programme'}{'handle'}->execute() or die "\nERROR: failed to check for unknown programmes in staging tables (execute).\n" . $SQL_statements{'check_unknown_programme'}{'handle'}->errstr . "\n"; $SQL_statements{'check_unknown_programme'}{'handle'}->bind_columns(\$student_id, \$programme); while ( my $paper_row = $SQL_statements{'check_unknown_programme'}{'handle'}->fetch ) { print "\nWARNING: programme " . $programme . " for student " . $student_id . " does not exist in database.\n" } # Unrecognised specialisation (not in database) my $specialisation; $SQL_statements{'check_unknown_specialisation'}{'handle'}->execute() or die "\nERROR: failed to check for unknown specialisations in staging tables (execute).\n" . $SQL_statements{'check_unknown_specialisation'}{'handle'}->errstr . "\n"; $SQL_statements{'check_unknown_specialisation'}{'handle'}->bind_columns(\$student_id, \$specialisation); while ( my $paper_row = $SQL_statements{'check_unknown_specialisation'}{'handle'}->fetch ) { print "\nWARNING: specialisation '" . $specialisation . "' for student " . $student_id . " does not exist in database.\n" } # Check for malformed Unicode? (issue #51) $DBConnection->commit; foreach my $statement ( keys %SQL_statements ) { $SQL_statements{$statement}{'handle'}->finish; } $DBConnection->disconnect; print <<EOT ============================================================ SUMMARY EOT ; printf "Papers added: %4d\n", $papers_added; print "Students:\n"; printf " New %4d\n", $students_added; printf " Updated %4d\n", $students_updated; printf " Unchanged %4d (=> total %4d)\n", $students_unchanged, ( $students_added + $students_updated + $students_unchanged ); print "Enrolments:\n"; printf " Processed %4d\n", $enrolments_processed; printf " New %4d\n", $enrolments_added; printf " Updated %4d\n", $enrolments_updated; printf " Ignored %4d (=> total %4d)\n", $enrolments_ignored, ( $enrolments_added + $enrolments_updated + $enrolments_ignored ); ##### END ##### sub get_input { my( $regexp, $prompt, $default, $suffix ) = @_; $prompt = "" if( !defined $prompt); for(;;) { print $prompt; if( defined $default ) { print " [$default]"; } print "$suffix "; my $in = <STDIN>; chomp $in; if( $in eq "" && defined $default ) { return $default; } if( $in=~m/^$regexp$/ ) { return $in; } else { print "Bad input, try again.\n"; } } } # Convert a boolean value into the equivalent string for SQL (i.e., true => # 'true', false => 'false', undef => undef => null in SQL). sub make_boolean { my $value = shift; if ( defined( $value ) ) { return 'true' if ( $value ); return 'false'; } return undef; } # Print command line usage. sub print_usage { my $exit_status = shift // 0; my $executable = fileparse( $0, '.pl' ); print <<EOD Usage: ${executable} [options] Loads a staged data set including paper details, student details, and enrolments in papers into the live tables. Options: --mode/-m <mode> process input file as specified by <mode>: start: load data; set enrolment flags for start of paper [default] cutoff: load data; set enrolment flags for withdrawal cutoff date end: load data; set enrolment flags for end of paper debug: load data; no enrolment flags, print debugging output --database/-d <db> name of database to load the data into [default 'informs'] --help/-h this text EOD ; exit $exit_status; }
Show line notes below