• Added first draft of literature review. • Added more notes. • Added web output figure. • Changed console output table into a figure.

nigel.stanger / Publications

Browse code • Added first draft of literature review. • Added more notes. • Added web output figure. • Changed console output table into a figure. DP_2017
1 parent 545c8b1 commit 49dcd901e8196fd14c28e2c3a19d3414423a95ec Nigel Stanger authored on 24 Jul 2017

Patch

Showing 3 changed files

                Ignore Space
               Show notes
              View
            
          
          40 ■■■■■
          Koli_2017/Koli_2017_Stanger.bib
 	Doi = {10.1145/1385269.1385276},
	Isbn = {978-1-60558-233-7},
	Title = {Multi-{RQP}: {G}enerating test databases for the functional testing of {OLTP} applications}}
 
@article{Brusilovsky.P-2010a-Learning,
	Articleno = {19},
	Author = {Peter  Brusilovsky and Sergey Sosnovsky and Michael V. Yudelson and Danielle H. Lee and Vladimir Zadorozhny and Xin Zhou},
	Doi = {10.1145.1656255.1656257},
	Journal = {ACM Transactions on Computing Education},
	Month = jan,
	Number = {4},
	Title = {Learning {SQL} programming with interactive tools: {F}rom integration to personalization},
	Volume = {9},
	Year = {2010}}
 
@book{Cattell.R-2000a-ODMG3,
	Address = {San Francisco, California, USA},
	Author = {R. G. G. Cattell and Douglas K. Barry and Mark Berler and Jeff Eastman and David Jordan and Craig Russell and Olaf Schadow and Torsten Stanienda and Fernando Velez},
	Booktitle = {The Object Database Standard: ODMG 3.0},
	Pages = {53--62},
	Url = {http://dl.acm.org/citation.cfm?id=1273730.1273737},
	Title = {Computer assisted assessment of SQL query skills}}
 
@article{Dietrich.S-1993a-An-educational,
	Author = {Suzanne W. Dietrich},
	Doi = {10.1080/0899340930040201},
	Journal = {Computer Science Education},
	Number = {2},
	Pages = {157--184},
	Title = {An educational tool for formal relational database query languages},
	Volume = {4},
	Year = {1993}}
 
@inproceedings{Dietrich.S-1997a-WinRDBI,
	Author = {Suzanne W. Dietrich and Eric Eckert and Kevin Piscator},
	Crossref = {Lewis.J-1998a-SIGCSE},
	Doi = {10.1145/268085.268131},
	Crossref = {Lewis.J-1998a-SIGCSE},
	Doi = {10.1145/274790.274318},
	Pages = {307--311},
	Title = {Learning {SQL} with a computerized tutor}}
 
@article{Ohlsson.S-1992a-Constraint-based,
	Author = {Stellan Ohlsson},
	Journal = {Journal of Artificial Intelligence in Education},
	Number = {4},
	Pages = {429--447},
	Title = {Constraint-based student modelling},
	Volume = {3},
	Year = {1992}}
 
@article{Ohlsson.S-2016a-Constraint-based,
	Author = {Stellan Ohlsson},
	Doi = {10.1007/s40593-015-0075-7},
	Journal = {International Journal of Artificial Intelligence in Education},
	Number = {1},
	Pages = {457--473},
	Title = {Constraint-based modeling: {F}rom cognitive theory to computer tutoring -- and back again},
	Volume = {26},
	Year = {2016}}
 
@inproceedings{Prior.J-2004a-Backwash,
	Author = {Julia Coleman Prior and Raymond Lister},
	Crossref = {Boyle.R-2004a-ITiCSE},

            @string{acj = {Australian Computer Journal}}
@string{acmcsur = {ACM Computing Surveys}}
@string{acmq = {ACM Queue}}
@string{acmtocs = {ACM Transactions on Computing Systems}}
@string{acmtods = {ACM Transactions on Database Systems}}
@string{acmtois = {ACM Transactions on Information Systems}}
@string{acmtoit = {ACM Transactions on Internet Technology}}
@string{acmtosem = {ACM Transactions on Software Engineering and Methodology}}
@string{acmtosn = {ACM Transactions on Sensor Networks}}
@string{acmtweb = {ACM Transactions on the Web}}
@string{adt = {Application Development Trends}}
@string{ai = {Artificial Intelligence}}
@string{ajis = {Australasian Journal of Information Systems}}
@string{basist = {Bulletin of the American Society for Information Science and Technology}}
@string{bit = {Behaviour {\&} Information Technology}}
@string{byte = {B{YTE}}}
@string{cacm = {Communications of the ACM}}
@string{ccis = {Communications in Computer and Information Science}}
@string{cj = {The Computer Journal}}
@string{crpit = {Conferences in Research and Practice in Information Techology}}
@string{database = {ACM SIGMIS Database}}
@string{dbj = {Database Journal}}
@string{dbms = {DBMS Magazine}}
@string{dbpd = {Database Programming {\&} Design}}
@string{ddj = {Dr. Dobb's Journal}}
@string{develop = {{d}evelop, The Apple Technical Journal}}
@string{directions = {Apple Directions}}
@string{dke = {Data {\&} Knowledge Engineering}}
@string{dlib = {D-Lib Magazine}}
@string{ejis = {European Journal of Information Systems}}
@string{fm = {First Monday}}
@string{hbr = {Harvard Business Review}}
@string{ibmjrd = {IBM Journal of Research and Development}}
@string{ibmsj = {IBM Systems Journal}}
@string{idt = {Internet Development Trends}}
@string{ieeeahc = {IEEE Annals of the History of Computing}}
@string{ieeec = {IEEE Computer}}
@string{ieeedeb = {IEEE Data Engineering Bulletin}}
@string{ieeeic = {IEEE Internet Computing}}
@string{ieeeis = {IEEE Intelligent Systems}}
@string{ieeepc = {IEEE Pervasive Computing}}
@string{ieees = {IEEE Software}}
@string{ieeesp = {IEEE Spectrum}}
@string{ieeetnn = {IEEE Transactions on Neural Networks}}
@string{ieeetse = {IEEE Transactions on Software Engineering}}
@string{ijast = {International Journal of Applied Software Technology}}
@string{ijcatr = {International Journal of Computer Applications Technology and Research}}
@string{ijgis = {International Journal of Geographical Information Science}}
@string{ijgisold = {International Journal of Geographical Information Systems}}
@string{ijhcs = {International Journal of Human-Computer Studies}}
@string{ijmms = {International Journal of Man-Machine Studies}}
@string{ijseke = {International Journal of Software Engineering and Knowledge Engineering}}
@string{ijswis = {International Journal on Semantic Web and Information Systems}}
@string{ijwet = {International Journal of Web Engineering and Technology}}
@string{ipm = {Information Processing and Management}}
@string{is = {Information Systems}}
@string{isedj = {Information Systems Education Journal}}
@string{isj = {Information Systems Journal}}
@string{ist = {Information and Software Technology}}
@string{jacm = {Journal of the ACM}}
@string{jasist = {Journal of the American Society for Information Science and Technology}}
@string{jdim = {Journal of Digital Information Management}}
@string{jdmm = {Journal of Digital Media Management}}
@string{jiis = {Journal of Intelligent Information Systems}}
@string{jlp = {Journal of Logic Programming}}
@string{jodi = {Journal of Digital Information}}
@string{jot = {Journal of Object Technology}}
@string{jrpit = {Journal of Research and Practice in Information Technology}}
@string{jss = {The Journal of Systems and Software}}
@string{lncs = {Lecture Notes in Computer Science}}
@string{misq = {MIS Quarterly}}
@string{nzjc = {New Zealand Journal of Computing}}
@string{nzjis = {New Zealand Journal of Information Systems}}
@string{nzlimj = {New Zealand Library {\&} Information Management Journal}}
@string{oracle = {Oracle Magazine}}
@string{oss = {OCLC Systems {\&} Services: International Digital Library Perspectives}}
@string{pcbi = {PLoS Computational Biology}}
@string{pnas = {Proceedings of the National Academy of Sciences of the United States of America}}
@string{pvldb = {Proceedings of the VLDB Endowment}}
@string{sciam = {Scientific American}}
@string{sej = {Software Engineering Journal}}
@string{sigmetrics = {ACM SIGMETRICS Performance Evaluation Review}}
@string{sigmod = {ACM SIGMOD Record}}
@string{sigplan = {ACM SIGPLAN Notices}}
@string{sigsoft = {ACM SIGSOFT Software Engineering Notes}}
@string{spe = {Software---Practice and Experience}}
@string{swj = {Semantic Web}}
@string{tkde = {IEEE Transactions on Knowledge and Data Engineering}}
@string{vldb = {The VLDB Journal}}


@online{Ambler.S-2006a-Database,
	Author = {Scott W. Ambler},
	Howpublished = {Essay, Agile Data web site},
	Lastaccessed = {2017-07-20},
	Title = {Database testing: {H}ow to regression test a relational database},
	Url = {http://www.agiledata.org/essays/databaseTesting.html},
	Year = {2006}}

@manual{Apache-2017a-CQL,
	Address = {Forest Hill, Maryland, USA},
	Author = {{Apache Software Foundation}},
	Booktitle = {The Cassandra Query Language (CQL)},
	Edition = {v3.4.0},
	Lastaccessed = {2017-07-20},
	Month = jun,
	Title = {Cassandra Query Language (CQL) documentation},
	Url = {http://cassandra.apache.org/doc/old/CQL-3.0.html},
	Year = {2017}}

@manual{Apache-2017a-Hive,
	Address = {Forest Hill, Maryland, USA},
	Author = {{Apache Software Foundation}},
	Booktitle = {Hive Language Manual},
	Month = jun,
	Title = {Hive Language Manual},
	Url = {https://cwiki.apache.org/confluence/display/Hive/LanguageManual},
	Lastaccessed = {2017-07-20},
	Year = {2017}}

@article{Bench-Capon.T-1998a-Report,
	Author = {T. Bench-Capon and D. Castelli and F. Coenen and L. Devendeville-Brisoux and B. Eaglestone and N. Fiddian and A. Gray and A. Ligeza and A. Vermesan},
	Journal = {Information Research},
	Month = oct,
	Number = {2},
	Articleno = {7},
	Title = {Report on the 1st {I}nternational {W}orkshop on {V}alidation, {V}erification and {I}ntegrity {I}ssues of {E}xpert and {D}atabase {S}ystems},
	Url = {http://www.informationr.net/ir/4-3/paper55.html},
	Volume = {4},
	Year = {1998}}

@inproceedings{Bhangdiya.A-2015a-XDa-TA,
	Author = {Amol Bhangdiya and Bikash Chandra and Biplab Kar and Bharath Radhakrishnan and K. V. Maheshwara Reddy and Shetal Shah and S. Sudarshan},
	Crossref = {Gehrke.J-2015a-ICDE},
	Pages = {1468--1471},
	Doi = {10.1109/ICDE.2015.7113403},
	Title = {The {XDa-TA} system for automated grading of {SQL} query assignments}}

@inproceedings{Binnig.C-2008a-Multi-RQP,
	Articleno = {5},
	Author = {Carsten Binnig and Donald Kossmann and Eric Lo},
	Crossref = {Giakoumakis.L-2008a-DBTest},
	Doi = {10.1145/1385269.1385276},
	Isbn = {978-1-60558-233-7},
	Title = {Multi-{RQP}: {G}enerating test databases for the functional testing of {OLTP} applications}}

@article{Brusilovsky.P-2010a-Learning,
	Articleno = {19},
	Author = {Peter  Brusilovsky and Sergey Sosnovsky and Michael V. Yudelson and Danielle H. Lee and Vladimir Zadorozhny and Xin Zhou},
	Doi = {10.1145.1656255.1656257},
	Journal = {ACM Transactions on Computing Education},
	Month = jan,
	Number = {4},
	Title = {Learning {SQL} programming with interactive tools: {F}rom integration to personalization},
	Volume = {9},
	Year = {2010}}

@book{Cattell.R-2000a-ODMG3,
	Address = {San Francisco, California, USA},
	Author = {R. G. G. Cattell and Douglas K. Barry and Mark Berler and Jeff Eastman and David Jordan and Craig Russell and Olaf Schadow and Torsten Stanienda and Fernando Velez},
	Booktitle = {The Object Database Standard: ODMG 3.0},
	Isbn = {978-1-55860-647-5},
	Publisher = {Morgan Kaufmann},
	Title = {The Object Database Standard: ODMG 3.0},
	Year = {2000}}

@article{Chandra.B-2015a-Data,
	Author = {Bikash Chandra and Bhupesh Chawda and Biplab Kar and K. V. Maheshwara Reddy and Shetal Shah and S. Sudarshan},
	Journal = vldb,
	Month = dec,
	Number = {6},
	Pages = {731--755},
	Title = {Data generation for testing and grading {SQL} queries},
	Volume = {24},
	Doi = {10.1007/s00778-015-0395-0},
	Year = {2015}}

@article{Chandra.B-2016a-Partial,
	Author = {Bikash Chandra and Mathew Joseph and Bharath Radhakrishnan and Shreevidhya Acharya and S. Sudarshan},
	Journal = pvldb,
	Month = sep,
	Number = {13},
	Pages = {1541--1544},
	Title = {Partial marking for automated grading of {SQL} queries},
	Volume = {9},
	Doi = {10.14778/3007263.3007304},
	Year = {2016}}

@inproceedings{Chays.D-2008a-Query-based,
	Articleno = {6},
	Author = {David Chays and John Shahid and Phyllis G. Frankl},
	Crossref = {Giakoumakis.L-2008a-DBTest},
	Doi = {10.1145/1385269.1385277},
	Isbn = {978-1-60558-233-7},
	Keywords = {database, software testing},
	Title = {Query-based test generation for database applications}}

@techreport{Choppella.V-2006a-Constructing,
	Address = {Bloomington, Indiana, USA},
	Author = {Venkatesh Choppella and Arijit Sengupta and Edward L. Robertson and Steven D, Johnson},
	Institution = {Indiana University, School of Informatics and Computing},
	Month = apr,
	Number = {TR632},
	Title = {Constructing and Validating entity-relationship data models in the {PVS} specification language: {A} case study using a text-book example},
	Type = {Technical report},
	Url = {https://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR632},
	Year = {2006}}

@inproceedings{Dekeyser.S-2007a-Computer,
	Author = {Stijn Dekeyser and Michael de Raadt and Tien Yu Lee},
	Crossref = {Bailey.J-2007a-ADC},
	Pages = {53--62},
	Url = {http://dl.acm.org/citation.cfm?id=1273730.1273737},
	Title = {Computer assisted assessment of SQL query skills}}

@article{Dietrich.S-1993a-An-educational,
	Author = {Suzanne W. Dietrich},
	Doi = {10.1080/0899340930040201},
	Journal = {Computer Science Education},
	Number = {2},
	Pages = {157--184},
	Title = {An educational tool for formal relational database query languages},
	Volume = {4},
	Year = {1993}}

@inproceedings{Dietrich.S-1997a-WinRDBI,
	Author = {Suzanne W. Dietrich and Eric Eckert and Kevin Piscator},
	Crossref = {Lewis.J-1998a-SIGCSE},
	Doi = {10.1145/268085.268131},
	Pages = {126--130},
	Title = {{WinRDBI}: {A} {W}indows-based relational database educational tool}}

@inproceedings{Farre.C-2008a-SVTe,
	Articleno = {9},
	Author = {Carles Farr{\'e} and Guillem Rull and Ernest Teniente and Toni Urp{\'\i}},
	Crossref = {Giakoumakis.L-2008a-DBTest},
	Doi = {10.1145/1385269.1385281},
	Isbn = {978-1-60558-233-7},
	Title = {{SVTe}: {A} tool to validate database schemas giving explanations}}

@online{Gong.A-2015a-CS-121-Automation,
	Author = {Angela Gong},
	Lastaccessed = {2017-07-20},
	Month = oct,
	Title = {CS 121 Automation Tool},
	Url = {https://github.com/anjoola/cs12x-automate},
	Year = {2015}}

@inproceedings{Haller.K-2010a-Test,
	Articleno = {6},
	Author = {Klaus Haller},
	Crossref = {Babu.S-2010a-DBTest},
	Doi = {10.1145/1838126.1838132},
	Keywords = {databases, information systems, test coverage, test data, testing},
	Title = {The test data challenge for database-driven applications}}

@inproceedings{Kearns.R-1997a-A-teaching,
	Author = {R. Kearns and Stephen Shead and Alan Fekete},
	Crossref = {Sondergaard.H-1997a-ACSE},
	Doi = {10.1145/299359.299391},
	Pages = {224--231},
	Title = {A teaching system for {SQL}}}

@inproceedings{Kenny.C-2005a-Automated,
	Author = {Claire Kenny and Claus Pahl},
	Crossref = {Dann.W-2005a-SIGCSE},
	Pages = {58--62},
	Doi = {10.1145/1047124.1047377},
	Title = {Automated tutoring for a database skills training environment}}

@inproceedings{Kleiner.C-2013a-Automated,
	Author = {Carsten Kleiner and Christopher Tebbe and Felix Heine},
	Crossref = {Laakso.M-2013a-KoliCalling},
	Pages = {161--168},
	Doi = {10.1145/2526968.2526986},
	Title = {Automated grading and tutoring of {SQL} statements to improve student learning}}

@inproceedings{Lukovic.I-2003a-Proceedings,
	Author = {Ivan Lukovi{\'c} and Sonja Risti{\'c} and Pavle Mogin},
	Crossref = {SISY-2003a-Proceedings},
	Pages = {125--136},
	Title = {On the formal specification of database schema constraints},
	Url = {http://www.mcs.vuw.ac.nz/~pmogin/SISY_2003.pdf}}

@inproceedings{Marcozzi.M-2012a-Test,
	Articleno = {6},
	Author = {Micha{\"e}l Marcozzi and Wim Vanhoof and Jean-Luc Hainaut},
	Crossref = {Lo.E-2012a-DBTest},
	Doi = {10.1145/2304510.2304518},
	Isbn = {978-1-4503-1429-9},
	Title = {Test input generation for database programs using relational constraints}}

@inproceedings{Mitrovic.A-1998a-Learning,
	Author = {Antonija Mitrovic},
	Crossref = {Lewis.J-1998a-SIGCSE},
	Doi = {10.1145/274790.274318},
	Pages = {307--311},
	Title = {Learning {SQL} with a computerized tutor}}

@article{Ohlsson.S-1992a-Constraint-based,
	Author = {Stellan Ohlsson},
	Journal = {Journal of Artificial Intelligence in Education},
	Number = {4},
	Pages = {429--447},
	Title = {Constraint-based student modelling},
	Volume = {3},
	Year = {1992}}

@article{Ohlsson.S-2016a-Constraint-based,
	Author = {Stellan Ohlsson},
	Doi = {10.1007/s40593-015-0075-7},
	Journal = {International Journal of Artificial Intelligence in Education},
	Number = {1},
	Pages = {457--473},
	Title = {Constraint-based modeling: {F}rom cognitive theory to computer tutoring -- and back again},
	Volume = {26},
	Year = {2016}}

@inproceedings{Prior.J-2004a-Backwash,
	Author = {Julia Coleman Prior and Raymond Lister},
	Crossref = {Boyle.R-2004a-ITiCSE},
	Pages = {32--36},
	Doi = {10.1145/1026487.1008008},
	Title = {The backwash effect on {SQL} skills grading}}

@inproceedings{Russell.G-2004a-Improving,
	Author = {Gordon Russell and Andrew Cumming},
	Crossref = {Kinshuk-2004a-CELDA},
	Pages = {281--288},
	Url = {http://www.napier.ac.uk/research-and-innovation/research-search/outputs/improving-the-student-learning-experience-for-sql-using-automatic-marking},
	Title = {Improving the student learning experience for {SQL} using automatic marking}}

@inproceedings{Russell.G-2005a-Online,
	Author = {Gordon Russell and Andrew Cumming},
	Crossref = {OReilly.U-2005a-TLAD},
	Pages = {46--50},
	Url = {http://www.napier.ac.uk/research-and-innovation/research-search/outputs/online-assessment-and-checking-of-sql-detecting-and-preventing-plagiarism},
	Title = {Online assessment and checking of {SQL}: {D}etecting and preventing plagiarism}}

@inproceedings{Sadiq.S-2004a-SQLator,
	Author = {Shazia Wasim Sadiq and Maria E. Orlowska and Wasim Sadiq and Joe Y.-C. Lin},
	Crossref = {Boyle.R-2004a-ITiCSE},
	Pages = {223--227},
	Doi = {10.1145/1026487.1008055},
	Title = {{SQLator}: {A}n online {SQL} learning workbench}}

@article{Spivey.J-1989a-An-introduction,
	Author = {J.M. Spivey},
	Doi = {10.1049/sej.1989.0006},
	Journal = sej,
	Month = jan,
	Number = {1},
	Pages = {40--50},
	Title = {An introduction to {Z} and formal specifications},
	Volume = {4},
	Year = {1989}}

@article{Vatanawood.W-2004a-Formal,
	Author = {Wiwat Vatanawood and Wanchai Rivepiboon},
	Doi = {10.1002/int.10159},
	Journal = {International Journal of Intelligent Systems},
	Month = jan # {--} # feb,
	Number = {1--2},
	Pages = {159--175},
	Title = {Formal specification synthesis for relational database model},
	Volume = {19},
	Year = {2004}}


# Crossrefs

@proceedings{Babu.S-2010a-DBTest,
	Address = {Indianapolis, Indiana, USA},
	Booktitle = {Proceedings of the 3rd International Workshop on Testing Database Systems (DBTest 2010)},
	Editor = {Shivnath Babu and G. N. Paulley},
	Isbn = {978-1-4503-0190-9},
	Month = jun # {~7},
	Publisher = {ACM},
	Title = {Proceedings of the 3rd International Workshop on Testing Database Systems (DBTest 2010)},
	Year = {2010}}

@proceedings{Bailey.J-2007a-ADC,
	Address = {Ballarat, Victoria, Australia},
	Editor = {James Bailey and Alan Fekete},
	Month = jan # {~29--} # feb # {~2},
	Publisher = {Australian Computer Society},
	Booktitle = {Database Technologies 2007, Proceedings of the 18th Australasian Database Conference (ADC 2007)},
	Title = {Database Technologies 2007, Proceedings of the 18th Australasian Database Conference (ADC 2007)},
	Volume = {63},
	Year = {2007}}

@proceedings{Boyle.R-2004a-ITiCSE,
	Address = {Leeds, UK},
	Editor = {Roger D. Boyle and Martyn Clark and Amruth N. Kumar},
	Month = jun # {~28--30},
	Publisher = {ACM},
	Booktitle = {Proceedings of the 9th Annual SIGCSE Conference on Innovation and Technology in Computer Science Education (ITiCSE 2004)},
	Title = {Proceedings of the 9th Annual SIGCSE Conference on Innovation and Technology in Computer Science Education (ITiCSE 2004)},
	Year = {2004}}

@proceedings{Dann.W-2005a-SIGCSE,
	Address = {St. Louis, Missouri, USA},
	Editor = {Wanda Dann and Thomas L. Naps and Paul T. Tymann and Doug Baldwin},
	Month = feb # {~23--27},
	Publisher = {ACM},
	Booktitle = {Proceedings of the 36th SIGCSE Technical Symposium on Computer Science Education (SIGCSE 2005)},
	Title = {Proceedings of the 36th SIGCSE Technical Symposium on Computer Science Education (SIGCSE 2005)},
	Year = {2005}}

@proceedings{Gehrke.J-2015a-ICDE,
	Address = {Seoul, South Korea},
	Editor = {Johannes Gehrke and Wolfgang Lehner and Kyuseok Shim and Sang Kyun Cha and Guy M. Lohman},
	Month = apr # {~13--17},
	Publisher = {IEEE Computer Society},
	Booktitle = {Proceedings of the 31st IEEE International Conference on Data Engineering (ICDE 2015)},
	Title = {Proceedings of the 31st IEEE International Conference on Data Engineering (ICDE 2015)},
	Year = {2015}}

@proceedings{Giakoumakis.L-2008a-DBTest,
	Address = {Vancouver, British Columbia, Canada},
	Booktitle = {Proceedings of the 1st International Workshop on Testing Database Systems (DBTest 2008)},
	Editor = {Leo Giakoumakis and Donald Kossmann},
	Isbn = {978-1-60558-233-7},
	Month = jun # {~13},
	Publisher = {ACM},
	Title = {Proceedings of the 1st International Workshop on Testing Database Systems (DBTest 2008)},
	Year = {2008}}

@proceedings{Kinshuk-2004a-CELDA,
	Address = {Lisbon, Portugal},
	Editor = {{Kinshuk} and Demetrios G. Sampson and Pedro T. Isa{\'\i}as},
	Month = {15--17~} # dec,
	Publisher = {IADIS},
	Booktitle = {Proceedings of the IADIS International Conference on Cognition and Exploratory Learning in Digital Age (CELDA'04)},
	Title = {Proceedings of the IADIS International Conference on Cognition and Exploratory Learning in Digital Age (CELDA'04)},
	Year = {2004}}

@proceedings{OReilly.U-2005a-TLAD,
	Address = {Sunderland, UK},
	Editor = {Una O'Reilly and Richard Cooper},
	Month = jul,
	Publisher = {LTSN-ICS},
	Booktitle = {Proceedings of the 3rd HEA-ICS Workshop on Teaching Learning and Assessment in Databases (TLAD 2005)},
	Title = {Proceedings of the 3rd HEA-ICS Workshop on Teaching Learning and Assessment in Databases (TLAD 2005)},
	Year = {2005}}

@proceedings{Laakso.M-2013a-KoliCalling,
	Address = {Koli, Finland},
	Editor = {Mikko-Jussi Laakso and {Simon}},
	Month = nov # {~14--17},
	Publisher = {ACM},
	Booktitle = {Proceedings of the 13th Koli Calling International Conference on Computing Education Research (Koli Calling '13)},
	Title = {Proceedings of the 13th Koli Calling International Conference on Computing Education Research (Koli Calling '13)},
	Year = {2013}}

@proceedings{Lewis.J-1998a-SIGCSE,
	Address = {Atlanta, Georgia, USA},
	Booktitle = {Proceedings of the 29th SIGCSE Technical Symposium on Computer Science Education (SIGCSE'98)},
	Editor = {John Lewis and Jane Prey and Daniel Joyce and John Impagliazzo},
	Isbn = {0-89791-994-7},
	Month = feb # {~26--} # mar # {~1},
	Publisher = {ACM},
	Title = {Proceedings of the 29th SIGCSE Technical Symposium on Computer Science Education (SIGCSE'98)},
	Year = {1998}}

@proceedings{Lo.E-2012a-DBTest,
	Address = {Scottsdale, Arizona, USA},
	Booktitle = {Proceedings of the 5th International Workshop on Testing Database Systems (DBTest 2012)},
	Editor = {Eric Lo and Florian Waas},
	Isbn = {978-1-4503-1429-9},
	Month = may # {~21},
	Publisher = {ACM},
	Title = {Proceedings of the 5th International Workshop on Testing Database Systems (DBTest 2012)},
	Year = {2012}}

@proceedings{SISY-2003a-Proceedings,
	Address = {Subotica, Serbia},
	Booktitle = {Proceedings of the 1st Serbian-Hungarian Joint Symposium on Intelligent Systems (SISY 2003)},
	Month = {19--20~} # sep,
	Title = {Proceedings of the 1st Serbian-Hungarian Joint Symposium on Intelligent Systems (SISY 2003)},
	Year = {2003}}

@proceedings{Sondergaard.H-1997a-ACSE,
	Address = {Melbourne, Victoria, Australia},
	Booktitle = {Proceedings of the ACM SIGCSE 2nd Australasian Conference on Computer Science Education (ACSE 1997)},
	Editor = {Harald S{\o}ndergaard and A. John Hurst},
	Isbn = {0-89791-958-0},
	Publisher = {ACM},
	Title = {Proceedings of the ACM SIGCSE 2nd Australasian Conference on Computer Science Education (ACSE 1997)},
	Year = {1997}}

            @string{acj = {Australian Computer Journal}}
@string{acmcsur = {ACM Computing Surveys}}
@string{acmq = {ACM Queue}}
@string{acmtocs = {ACM Transactions on Computing Systems}}
@string{acmtods = {ACM Transactions on Database Systems}}
@string{acmtois = {ACM Transactions on Information Systems}}
@string{acmtoit = {ACM Transactions on Internet Technology}}
@string{acmtosem = {ACM Transactions on Software Engineering and Methodology}}
@string{acmtosn = {ACM Transactions on Sensor Networks}}
@string{acmtweb = {ACM Transactions on the Web}}
@string{adt = {Application Development Trends}}
@string{ai = {Artificial Intelligence}}
@string{ajis = {Australasian Journal of Information Systems}}
@string{basist = {Bulletin of the American Society for Information Science and Technology}}
@string{bit = {Behaviour {\&} Information Technology}}
@string{byte = {B{YTE}}}
@string{cacm = {Communications of the ACM}}
@string{ccis = {Communications in Computer and Information Science}}
@string{cj = {The Computer Journal}}
@string{crpit = {Conferences in Research and Practice in Information Techology}}
@string{database = {ACM SIGMIS Database}}
@string{dbj = {Database Journal}}
@string{dbms = {DBMS Magazine}}
@string{dbpd = {Database Programming {\&} Design}}
@string{ddj = {Dr. Dobb's Journal}}
@string{develop = {{d}evelop, The Apple Technical Journal}}
@string{directions = {Apple Directions}}
@string{dke = {Data {\&} Knowledge Engineering}}
@string{dlib = {D-Lib Magazine}}
@string{ejis = {European Journal of Information Systems}}
@string{fm = {First Monday}}
@string{hbr = {Harvard Business Review}}
@string{ibmjrd = {IBM Journal of Research and Development}}
@string{ibmsj = {IBM Systems Journal}}
@string{idt = {Internet Development Trends}}
@string{ieeeahc = {IEEE Annals of the History of Computing}}
@string{ieeec = {IEEE Computer}}
@string{ieeedeb = {IEEE Data Engineering Bulletin}}
@string{ieeeic = {IEEE Internet Computing}}
@string{ieeeis = {IEEE Intelligent Systems}}
@string{ieeepc = {IEEE Pervasive Computing}}
@string{ieees = {IEEE Software}}
@string{ieeesp = {IEEE Spectrum}}
@string{ieeetnn = {IEEE Transactions on Neural Networks}}
@string{ieeetse = {IEEE Transactions on Software Engineering}}
@string{ijast = {International Journal of Applied Software Technology}}
@string{ijcatr = {International Journal of Computer Applications Technology and Research}}
@string{ijgis = {International Journal of Geographical Information Science}}
@string{ijgisold = {International Journal of Geographical Information Systems}}
@string{ijhcs = {International Journal of Human-Computer Studies}}
@string{ijmms = {International Journal of Man-Machine Studies}}
@string{ijseke = {International Journal of Software Engineering and Knowledge Engineering}}
@string{ijswis = {International Journal on Semantic Web and Information Systems}}
@string{ijwet = {International Journal of Web Engineering and Technology}}
@string{ipm = {Information Processing and Management}}
@string{is = {Information Systems}}
@string{isedj = {Information Systems Education Journal}}
@string{isj = {Information Systems Journal}}
@string{ist = {Information and Software Technology}}
@string{jacm = {Journal of the ACM}}
@string{jasist = {Journal of the American Society for Information Science and Technology}}
@string{jdim = {Journal of Digital Information Management}}
@string{jdmm = {Journal of Digital Media Management}}
@string{jiis = {Journal of Intelligent Information Systems}}
@string{jlp = {Journal of Logic Programming}}
@string{jodi = {Journal of Digital Information}}
@string{jot = {Journal of Object Technology}}
@string{jrpit = {Journal of Research and Practice in Information Technology}}
@string{jss = {The Journal of Systems and Software}}
@string{lncs = {Lecture Notes in Computer Science}}
@string{misq = {MIS Quarterly}}
@string{nzjc = {New Zealand Journal of Computing}}
@string{nzjis = {New Zealand Journal of Information Systems}}
@string{nzlimj = {New Zealand Library {\&} Information Management Journal}}
@string{oracle = {Oracle Magazine}}
@string{oss = {OCLC Systems {\&} Services: International Digital Library Perspectives}}
@string{pcbi = {PLoS Computational Biology}}
@string{pnas = {Proceedings of the National Academy of Sciences of the United States of America}}
@string{pvldb = {Proceedings of the VLDB Endowment}}
@string{sciam = {Scientific American}}
@string{sej = {Software Engineering Journal}}
@string{sigmetrics = {ACM SIGMETRICS Performance Evaluation Review}}
@string{sigmod = {ACM SIGMOD Record}}
@string{sigplan = {ACM SIGPLAN Notices}}
@string{sigsoft = {ACM SIGSOFT Software Engineering Notes}}
@string{spe = {Software---Practice and Experience}}
@string{swj = {Semantic Web}}
@string{tkde = {IEEE Transactions on Knowledge and Data Engineering}}
@string{vldb = {The VLDB Journal}}


@online{Ambler.S-2006a-Database,
	Author = {Scott W. Ambler},
	Howpublished = {Essay, Agile Data web site},
	Lastaccessed = {2017-07-20},
	Title = {Database testing: {H}ow to regression test a relational database},
	Url = {http://www.agiledata.org/essays/databaseTesting.html},
	Year = {2006}}

@manual{Apache-2017a-CQL,
	Address = {Forest Hill, Maryland, USA},
	Author = {{Apache Software Foundation}},
	Booktitle = {The Cassandra Query Language (CQL)},
	Edition = {v3.4.0},
	Lastaccessed = {2017-07-20},
	Month = jun,
	Title = {Cassandra Query Language (CQL) documentation},
	Url = {http://cassandra.apache.org/doc/old/CQL-3.0.html},
	Year = {2017}}

@manual{Apache-2017a-Hive,
	Address = {Forest Hill, Maryland, USA},
	Author = {{Apache Software Foundation}},
	Booktitle = {Hive Language Manual},
	Month = jun,
	Title = {Hive Language Manual},
	Url = {https://cwiki.apache.org/confluence/display/Hive/LanguageManual},
	Lastaccessed = {2017-07-20},
	Year = {2017}}

@article{Bench-Capon.T-1998a-Report,
	Author = {T. Bench-Capon and D. Castelli and F. Coenen and L. Devendeville-Brisoux and B. Eaglestone and N. Fiddian and A. Gray and A. Ligeza and A. Vermesan},
	Journal = {Information Research},
	Month = oct,
	Number = {2},
	Articleno = {7},
	Title = {Report on the 1st {I}nternational {W}orkshop on {V}alidation, {V}erification and {I}ntegrity {I}ssues of {E}xpert and {D}atabase {S}ystems},
	Url = {http://www.informationr.net/ir/4-3/paper55.html},
	Volume = {4},
	Year = {1998}}

@inproceedings{Bhangdiya.A-2015a-XDa-TA,
	Author = {Amol Bhangdiya and Bikash Chandra and Biplab Kar and Bharath Radhakrishnan and K. V. Maheshwara Reddy and Shetal Shah and S. Sudarshan},
	Crossref = {Gehrke.J-2015a-ICDE},
	Pages = {1468--1471},
	Doi = {10.1109/ICDE.2015.7113403},
	Title = {The {XDa-TA} system for automated grading of {SQL} query assignments}}

@inproceedings{Binnig.C-2008a-Multi-RQP,
	Articleno = {5},
	Author = {Carsten Binnig and Donald Kossmann and Eric Lo},
	Crossref = {Giakoumakis.L-2008a-DBTest},
	Doi = {10.1145/1385269.1385276},
	Isbn = {978-1-60558-233-7},
	Title = {Multi-{RQP}: {G}enerating test databases for the functional testing of {OLTP} applications}}

@book{Cattell.R-2000a-ODMG3,
	Address = {San Francisco, California, USA},
	Author = {R. G. G. Cattell and Douglas K. Barry and Mark Berler and Jeff Eastman and David Jordan and Craig Russell and Olaf Schadow and Torsten Stanienda and Fernando Velez},
	Booktitle = {The Object Database Standard: ODMG 3.0},
	Isbn = {978-1-55860-647-5},
	Publisher = {Morgan Kaufmann},
	Title = {The Object Database Standard: ODMG 3.0},
	Year = {2000}}

@article{Chandra.B-2015a-Data,
	Author = {Bikash Chandra and Bhupesh Chawda and Biplab Kar and K. V. Maheshwara Reddy and Shetal Shah and S. Sudarshan},
	Journal = vldb,
	Month = dec,
	Number = {6},
	Pages = {731--755},
	Title = {Data generation for testing and grading {SQL} queries},
	Volume = {24},
	Doi = {10.1007/s00778-015-0395-0},
	Year = {2015}}

@article{Chandra.B-2016a-Partial,
	Author = {Bikash Chandra and Mathew Joseph and Bharath Radhakrishnan and Shreevidhya Acharya and S. Sudarshan},
	Journal = pvldb,
	Month = sep,
	Number = {13},
	Pages = {1541--1544},
	Title = {Partial marking for automated grading of {SQL} queries},
	Volume = {9},
	Doi = {10.14778/3007263.3007304},
	Year = {2016}}

@inproceedings{Chays.D-2008a-Query-based,
	Articleno = {6},
	Author = {David Chays and John Shahid and Phyllis G. Frankl},
	Crossref = {Giakoumakis.L-2008a-DBTest},
	Doi = {10.1145/1385269.1385277},
	Isbn = {978-1-60558-233-7},
	Keywords = {database, software testing},
	Title = {Query-based test generation for database applications}}

@techreport{Choppella.V-2006a-Constructing,
	Address = {Bloomington, Indiana, USA},
	Author = {Venkatesh Choppella and Arijit Sengupta and Edward L. Robertson and Steven D, Johnson},
	Institution = {Indiana University, School of Informatics and Computing},
	Month = apr,
	Number = {TR632},
	Title = {Constructing and Validating entity-relationship data models in the {PVS} specification language: {A} case study using a text-book example},
	Type = {Technical report},
	Url = {https://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR632},
	Year = {2006}}

@inproceedings{Dekeyser.S-2007a-Computer,
	Author = {Stijn Dekeyser and Michael de Raadt and Tien Yu Lee},
	Crossref = {Bailey.J-2007a-ADC},
	Pages = {53--62},
	Url = {http://dl.acm.org/citation.cfm?id=1273730.1273737},
	Title = {Computer assisted assessment of SQL query skills}}

@inproceedings{Dietrich.S-1997a-WinRDBI,
	Author = {Suzanne W. Dietrich and Eric Eckert and Kevin Piscator},
	Crossref = {Lewis.J-1998a-SIGCSE},
	Doi = {10.1145/268085.268131},
	Pages = {126--130},
	Title = {{WinRDBI}: {A} {W}indows-based relational database educational tool}}

@inproceedings{Farre.C-2008a-SVTe,
	Articleno = {9},
	Author = {Carles Farr{\'e} and Guillem Rull and Ernest Teniente and Toni Urp{\'\i}},
	Crossref = {Giakoumakis.L-2008a-DBTest},
	Doi = {10.1145/1385269.1385281},
	Isbn = {978-1-60558-233-7},
	Title = {{SVTe}: {A} tool to validate database schemas giving explanations}}

@online{Gong.A-2015a-CS-121-Automation,
	Author = {Angela Gong},
	Lastaccessed = {2017-07-20},
	Month = oct,
	Title = {CS 121 Automation Tool},
	Url = {https://github.com/anjoola/cs12x-automate},
	Year = {2015}}

@inproceedings{Haller.K-2010a-Test,
	Articleno = {6},
	Author = {Klaus Haller},
	Crossref = {Babu.S-2010a-DBTest},
	Doi = {10.1145/1838126.1838132},
	Keywords = {databases, information systems, test coverage, test data, testing},
	Title = {The test data challenge for database-driven applications}}

@inproceedings{Kearns.R-1997a-A-teaching,
	Author = {R. Kearns and Stephen Shead and Alan Fekete},
	Crossref = {Sondergaard.H-1997a-ACSE},
	Doi = {10.1145/299359.299391},
	Pages = {224--231},
	Title = {A teaching system for {SQL}}}

@inproceedings{Kenny.C-2005a-Automated,
	Author = {Claire Kenny and Claus Pahl},
	Crossref = {Dann.W-2005a-SIGCSE},
	Pages = {58--62},
	Doi = {10.1145/1047124.1047377},
	Title = {Automated tutoring for a database skills training environment}}

@inproceedings{Kleiner.C-2013a-Automated,
	Author = {Carsten Kleiner and Christopher Tebbe and Felix Heine},
	Crossref = {Laakso.M-2013a-KoliCalling},
	Pages = {161--168},
	Doi = {10.1145/2526968.2526986},
	Title = {Automated grading and tutoring of {SQL} statements to improve student learning}}

@inproceedings{Lukovic.I-2003a-Proceedings,
	Author = {Ivan Lukovi{\'c} and Sonja Risti{\'c} and Pavle Mogin},
	Crossref = {SISY-2003a-Proceedings},
	Pages = {125--136},
	Title = {On the formal specification of database schema constraints},
	Url = {http://www.mcs.vuw.ac.nz/~pmogin/SISY_2003.pdf}}

@inproceedings{Marcozzi.M-2012a-Test,
	Articleno = {6},
	Author = {Micha{\"e}l Marcozzi and Wim Vanhoof and Jean-Luc Hainaut},
	Crossref = {Lo.E-2012a-DBTest},
	Doi = {10.1145/2304510.2304518},
	Isbn = {978-1-4503-1429-9},
	Title = {Test input generation for database programs using relational constraints}}

@inproceedings{Mitrovic.A-1998a-Learning,
	Author = {Antonija Mitrovic},
	Crossref = {Lewis.J-1998a-SIGCSE},
	Doi = {10.1145/274790.274318},
	Pages = {307--311},
	Title = {Learning {SQL} with a computerized tutor}}

@inproceedings{Prior.J-2004a-Backwash,
	Author = {Julia Coleman Prior and Raymond Lister},
	Crossref = {Boyle.R-2004a-ITiCSE},
	Pages = {32--36},
	Doi = {10.1145/1026487.1008008},
	Title = {The backwash effect on {SQL} skills grading}}

@inproceedings{Russell.G-2004a-Improving,
	Author = {Gordon Russell and Andrew Cumming},
	Crossref = {Kinshuk-2004a-CELDA},
	Pages = {281--288},
	Url = {http://www.napier.ac.uk/research-and-innovation/research-search/outputs/improving-the-student-learning-experience-for-sql-using-automatic-marking},
	Title = {Improving the student learning experience for {SQL} using automatic marking}}

@inproceedings{Russell.G-2005a-Online,
	Author = {Gordon Russell and Andrew Cumming},
	Crossref = {OReilly.U-2005a-TLAD},
	Pages = {46--50},
	Url = {http://www.napier.ac.uk/research-and-innovation/research-search/outputs/online-assessment-and-checking-of-sql-detecting-and-preventing-plagiarism},
	Title = {Online assessment and checking of {SQL}: {D}etecting and preventing plagiarism}}

@inproceedings{Sadiq.S-2004a-SQLator,
	Author = {Shazia Wasim Sadiq and Maria E. Orlowska and Wasim Sadiq and Joe Y.-C. Lin},
	Crossref = {Boyle.R-2004a-ITiCSE},
	Pages = {223--227},
	Doi = {10.1145/1026487.1008055},
	Title = {{SQLator}: {A}n online {SQL} learning workbench}}

@article{Spivey.J-1989a-An-introduction,
	Author = {J.M. Spivey},
	Doi = {10.1049/sej.1989.0006},
	Journal = sej,
	Month = jan,
	Number = {1},
	Pages = {40--50},
	Title = {An introduction to {Z} and formal specifications},
	Volume = {4},
	Year = {1989}}

@article{Vatanawood.W-2004a-Formal,
	Author = {Wiwat Vatanawood and Wanchai Rivepiboon},
	Doi = {10.1002/int.10159},
	Journal = {International Journal of Intelligent Systems},
	Month = jan # {--} # feb,
	Number = {1--2},
	Pages = {159--175},
	Title = {Formal specification synthesis for relational database model},
	Volume = {19},
	Year = {2004}}


# Crossrefs

@proceedings{Babu.S-2010a-DBTest,
	Address = {Indianapolis, Indiana, USA},
	Booktitle = {Proceedings of the 3rd International Workshop on Testing Database Systems (DBTest 2010)},
	Editor = {Shivnath Babu and G. N. Paulley},
	Isbn = {978-1-4503-0190-9},
	Month = jun # {~7},
	Publisher = {ACM},
	Title = {Proceedings of the 3rd International Workshop on Testing Database Systems (DBTest 2010)},
	Year = {2010}}

@proceedings{Bailey.J-2007a-ADC,
	Address = {Ballarat, Victoria, Australia},
	Editor = {James Bailey and Alan Fekete},
	Month = jan # {~29--} # feb # {~2},
	Publisher = {Australian Computer Society},
	Booktitle = {Database Technologies 2007, Proceedings of the 18th Australasian Database Conference (ADC 2007)},
	Title = {Database Technologies 2007, Proceedings of the 18th Australasian Database Conference (ADC 2007)},
	Volume = {63},
	Year = {2007}}

@proceedings{Boyle.R-2004a-ITiCSE,
	Address = {Leeds, UK},
	Editor = {Roger D. Boyle and Martyn Clark and Amruth N. Kumar},
	Month = jun # {~28--30},
	Publisher = {ACM},
	Booktitle = {Proceedings of the 9th Annual SIGCSE Conference on Innovation and Technology in Computer Science Education (ITiCSE 2004)},
	Title = {Proceedings of the 9th Annual SIGCSE Conference on Innovation and Technology in Computer Science Education (ITiCSE 2004)},
	Year = {2004}}

@proceedings{Dann.W-2005a-SIGCSE,
	Address = {St. Louis, Missouri, USA},
	Editor = {Wanda Dann and Thomas L. Naps and Paul T. Tymann and Doug Baldwin},
	Month = feb # {~23--27},
	Publisher = {ACM},
	Booktitle = {Proceedings of the 36th SIGCSE Technical Symposium on Computer Science Education (SIGCSE 2005)},
	Title = {Proceedings of the 36th SIGCSE Technical Symposium on Computer Science Education (SIGCSE 2005)},
	Year = {2005}}

@proceedings{Gehrke.J-2015a-ICDE,
	Address = {Seoul, South Korea},
	Editor = {Johannes Gehrke and Wolfgang Lehner and Kyuseok Shim and Sang Kyun Cha and Guy M. Lohman},
	Month = apr # {~13--17},
	Publisher = {IEEE Computer Society},
	Booktitle = {Proceedings of the 31st IEEE International Conference on Data Engineering (ICDE 2015)},
	Title = {Proceedings of the 31st IEEE International Conference on Data Engineering (ICDE 2015)},
	Year = {2015}}

@proceedings{Giakoumakis.L-2008a-DBTest,
	Address = {Vancouver, British Columbia, Canada},
	Booktitle = {Proceedings of the 1st International Workshop on Testing Database Systems (DBTest 2008)},
	Editor = {Leo Giakoumakis and Donald Kossmann},
	Isbn = {978-1-60558-233-7},
	Month = jun # {~13},
	Publisher = {ACM},
	Title = {Proceedings of the 1st International Workshop on Testing Database Systems (DBTest 2008)},
	Year = {2008}}

@proceedings{Kinshuk-2004a-CELDA,
	Address = {Lisbon, Portugal},
	Editor = {{Kinshuk} and Demetrios G. Sampson and Pedro T. Isa{\'\i}as},
	Month = {15--17~} # dec,
	Publisher = {IADIS},
	Booktitle = {Proceedings of the IADIS International Conference on Cognition and Exploratory Learning in Digital Age (CELDA'04)},
	Title = {Proceedings of the IADIS International Conference on Cognition and Exploratory Learning in Digital Age (CELDA'04)},
	Year = {2004}}

@proceedings{OReilly.U-2005a-TLAD,
	Address = {Sunderland, UK},
	Editor = {Una O'Reilly and Richard Cooper},
	Month = jul,
	Publisher = {LTSN-ICS},
	Booktitle = {Proceedings of the 3rd HEA-ICS Workshop on Teaching Learning and Assessment in Databases (TLAD 2005)},
	Title = {Proceedings of the 3rd HEA-ICS Workshop on Teaching Learning and Assessment in Databases (TLAD 2005)},
	Year = {2005}}

@proceedings{Laakso.M-2013a-KoliCalling,
	Address = {Koli, Finland},
	Editor = {Mikko-Jussi Laakso and {Simon}},
	Month = nov # {~14--17},
	Publisher = {ACM},
	Booktitle = {Proceedings of the 13th Koli Calling International Conference on Computing Education Research (Koli Calling '13)},
	Title = {Proceedings of the 13th Koli Calling International Conference on Computing Education Research (Koli Calling '13)},
	Year = {2013}}

@proceedings{Lewis.J-1998a-SIGCSE,
	Address = {Atlanta, Georgia, USA},
	Booktitle = {Proceedings of the 29th SIGCSE Technical Symposium on Computer Science Education (SIGCSE'98)},
	Editor = {John Lewis and Jane Prey and Daniel Joyce and John Impagliazzo},
	Isbn = {0-89791-994-7},
	Month = feb # {~26--} # mar # {~1},
	Publisher = {ACM},
	Title = {Proceedings of the 29th SIGCSE Technical Symposium on Computer Science Education (SIGCSE'98)},
	Year = {1998}}

@proceedings{Lo.E-2012a-DBTest,
	Address = {Scottsdale, Arizona, USA},
	Booktitle = {Proceedings of the 5th International Workshop on Testing Database Systems (DBTest 2012)},
	Editor = {Eric Lo and Florian Waas},
	Isbn = {978-1-4503-1429-9},
	Month = may # {~21},
	Publisher = {ACM},
	Title = {Proceedings of the 5th International Workshop on Testing Database Systems (DBTest 2012)},
	Year = {2012}}

@proceedings{SISY-2003a-Proceedings,
	Address = {Subotica, Serbia},
	Booktitle = {Proceedings of the 1st Serbian-Hungarian Joint Symposium on Intelligent Systems (SISY 2003)},
	Month = {19--20~} # sep,
	Title = {Proceedings of the 1st Serbian-Hungarian Joint Symposium on Intelligent Systems (SISY 2003)},
	Year = {2003}}

@proceedings{Sondergaard.H-1997a-ACSE,
	Address = {Melbourne, Victoria, Australia},
	Booktitle = {Proceedings of the ACM SIGCSE 2nd Australasian Conference on Computer Science Education (ACSE 1997)},
	Editor = {Harald S{\o}ndergaard and A. John Hurst},
	Isbn = {0-89791-958-0},
	Publisher = {ACM},
	Title = {Proceedings of the ACM SIGCSE 2nd Australasian Conference on Computer Science Education (ACSE 1997)},
	Year = {1997}}
 	Doi = {10.1145/1385269.1385276},
 	Isbn = {978-1-60558-233-7},
 	Title = {Multi-{RQP}: {G}enerating test databases for the functional testing of {OLTP} applications}}
+@article{Brusilovsky.P-2010a-Learning,
+	Articleno = {19},
+	Author = {Peter  Brusilovsky and Sergey Sosnovsky and Michael V. Yudelson and Danielle H. Lee and Vladimir Zadorozhny and Xin Zhou},
+	Doi = {10.1145.1656255.1656257},
+	Journal = {ACM Transactions on Computing Education},
+	Month = jan,
+	Number = {4},
+	Title = {Learning {SQL} programming with interactive tools: {F}rom integration to personalization},
+	Volume = {9},
+	Year = {2010}}
 @book{Cattell.R-2000a-ODMG3,
 	Address = {San Francisco, California, USA},
 	Author = {R. G. G. Cattell and Douglas K. Barry and Mark Berler and Jeff Eastman and David Jordan and Craig Russell and Olaf Schadow and Torsten Stanienda and Fernando Velez},
 	Booktitle = {The Object Database Standard: ODMG 3.0},
 	Pages = {53--62},
 	Url = {http://dl.acm.org/citation.cfm?id=1273730.1273737},
 	Title = {Computer assisted assessment of SQL query skills}}
+@article{Dietrich.S-1993a-An-educational,
+	Author = {Suzanne W. Dietrich},
+	Doi = {10.1080/0899340930040201},
+	Journal = {Computer Science Education},
+	Number = {2},
+	Pages = {157--184},
+	Title = {An educational tool for formal relational database query languages},
+	Volume = {4},
+	Year = {1993}}
 @inproceedings{Dietrich.S-1997a-WinRDBI,
 	Author = {Suzanne W. Dietrich and Eric Eckert and Kevin Piscator},
 	Crossref = {Lewis.J-1998a-SIGCSE},
 	Doi = {10.1145/268085.268131},
 	Crossref = {Lewis.J-1998a-SIGCSE},
 	Doi = {10.1145/274790.274318},
 	Pages = {307--311},
 	Title = {Learning {SQL} with a computerized tutor}}
+@article{Ohlsson.S-1992a-Constraint-based,
+	Author = {Stellan Ohlsson},
+	Journal = {Journal of Artificial Intelligence in Education},
+	Number = {4},
+	Pages = {429--447},
+	Title = {Constraint-based student modelling},
+	Volume = {3},
+	Year = {1992}}
+@article{Ohlsson.S-2016a-Constraint-based,
+	Author = {Stellan Ohlsson},
+	Doi = {10.1007/s40593-015-0075-7},
+	Journal = {International Journal of Artificial Intelligence in Education},
+	Number = {1},
+	Pages = {457--473},
+	Title = {Constraint-based modeling: {F}rom cognitive theory to computer tutoring -- and back again},
+	Volume = {26},
+	Year = {2016}}
 @inproceedings{Prior.J-2004a-Backwash,
 	Author = {Julia Coleman Prior and Raymond Lister},
 	Crossref = {Boyle.R-2004a-ITiCSE},

                Ignore Space
               Show notes
              View
            
          
          83 ■■■■■
          Koli_2017/Koli_2017_Stanger.tex
 \end{abstract}
 
\maketitle
 
\cite{Bhangdiya.A-2015a-XDa-TA,Chandra.B-2015a-Data,Chandra.B-2016a-Partial,Dekeyser.S-2007a-Computer,Kearns.R-1997a-A-teaching,Prior.J-2004a-Backwash,Russell.G-2005a-Online,Gong.A-2015a-CS-121-Automation,Farre.C-2008a-SVTe,Dietrich.S-1997a-WinRDBI,Binnig.C-2008a-Multi-RQP,Chays.D-2008a-Query-based,Marcozzi.M-2012a-Test,Haller.K-2010a-Test,Vatanawood.W-2004a-Formal,Lukovic.I-2003a-Proceedings,Bench-Capon.T-1998a-Report,Spivey.J-1989a-An-introduction,Choppella.V-2006a-Constructing,Ambler.S-2006a-Database}
 
\section{Introduction}
 
Any introductory database course needs to cover several core concepts, including what is a database, what is a logical data model, and how to create and interact with a database. Typically such courses will focus on the Relational Model and its embodiment in SQL database management systems (DBMSs). This is partly because the Relational Model provides a sound theoretical framework for discussing key database concepts [cite], and partly because SQL DBMSs are still widely used. The shadow of SQL is so strong that even non-relational systems have adopted some form of SQL-like language in order to leverage existing knowledge (e.g., OQL \cite{Cattell.R-2000a-ODMG3}, HiveQL \cite{Apache-2017a-Hive}, and CQL \cite{Apache-2017a-CQL}).
 
Courses that teach SQL usually include one or more assessments that test students' SQL skills. These test students' ability to 
create a database using SQL data definition (DDL) statements, and to interact with the database using SQL data manipulation (DML) statements. Manually grading such code can be a slow, tedious, and potentially error-prone process. Automating the grading process enables faster turnaround times and greater consistency [cite]. If the grading can be done in real time, the grading tool could become part of a larger, interactive SQL learning environment \cite{Kenny.C-2005a-Automated,Kleiner.C-2013a-Automated,Mitrovic.A-1998a-Learning,Russell.G-2004a-Improving,Sadiq.S-2004a-SQLator}.
create a database using SQL data definition (DDL) statements, and to interact with the database using SQL data manipulation (DML) statements. Manually grading such code can be a slow, tedious, and potentially error-prone process. Automating the grading process enables faster turnaround times and greater consistency [cite]. If the grading can be done in real time, the grading tool could become part of a larger, interactive SQL learning environment (e.g.,  \cite{Kenny.C-2005a-Automated,Kleiner.C-2013a-Automated,Mitrovic.A-1998a-Learning,Russell.G-2004a-Improving,Sadiq.S-2004a-SQLator}).
 
There have been many prior efforts to automatically grade SQL DML (see Section~\ref{sec-literature}), we have been unable to find any similar systems for automatically grading SQL DDL.
 
In our department, we offered typical introductory papers on database systems. INFO 212 was offered from 1997(?) to 2011, and was a dedicated semester-long course (13 weeks). It was replaced by INFO 214 in 2012, which included 6\(\frac{1}{2}\) weeks of core database material (the remainder of the paper covered data communications and networking). It was discontinued at the end of 2016.
\end{enumerate}
 
The third approach was used from 2009 until 2016 (?dates), and was what inspired the work discussed in this paper. The third approach is also the most amenable to autmoation, as much of the assessment specification is fixed in advance, with less room for deviation.
 
Prior approaches to grading SQL DDL have focused on the \texttt{CREATE TABLE} syntax, but we have taken a different approach, where we verify that the implemented schema conforms to the behaviour expected from the original specification. If the student achieves this, then by definition the DDL syntax must be correct (weakness: we do not consider coding style). This enables us to focus less on the specifics of the syntax and more on whether students have implemnted the requirements correctly.
% Can be difficult for students to know whether they are on the right track with regards to a specification
 
One obvious approach to grading SQL DDL is syntax checking of \texttt{CREATE TABLE} statements. We feel that this is already effectively catered for by the syntax checking built into every SQL DBMS (although it is fair to say that the errors produced by such checkers can sometimes be obscure and unhelpful). While it might be feasible to build an SQL DDL syntax checker that provides more helpful feedback, this misses a key element of database design and implementation: that the database should meet the requirements of the problem being solved. A database schema is normally designed and implemented within the context of a specific set of requirements, so checking that the implemented SQL schema fulfils these requirements would seem to be a more helpful approach to learning the principles of database design and implementation. If a student's schema conforms to the behaviour expected from the original specification, then by definition the DDL syntax must be correct. This enables us to focus more on the student's understanding of the problem than on details of SQL syntax.
% weakness: we do not consider coding style
 
% Prior approaches to grading SQL DDL have focused on the \texttt{CREATE TABLE} syntax, but we have taken a different approach, where we verify that the implemented schema conforms to the behaviour expected from the original specification. If the student achieves this, then by definition the DDL syntax must be correct (weakness: we do not consider coding style). This enables us to focus less on the specifics of the syntax and more on whether students have implemnted the requirements correctly.
 
The requirements specification for the assessment is tightly defined, which means it can be readily codified in machine-readable form. Rather than attempt to parse and check the \texttt{CREATE TABLE} statements directly, we instead issue queries against the schema's metadata (catalog), and compare the results of these queries against the machine-readable version of the specification. The process then effectively becomes one of unit testing the schema against the original requirements. In our implementation, we used the PHPunit database unit testing framework to carry out this process, albeit in a somewhat unorthodox way (see Section~\ref{sec-design}).
 
% original schema is codified in machine-readable form
% use a database unit testing framework (PHPUnit) to automate
 
\section{Prior work}
\label{sec-literature}
 
There have been many prior efforts to build learning systems for SQL. However, these have focused almost exclusively on SQL queries using the \texttt{SELECT} statement (i.e., DML) rather than schema definitions (DDL). This is unsurprising given the relative complexity of the \texttt{SELECT} statement compared to most other SQL statements.
 
\citeauthor{Kearns.R-1997a-A-teaching}'s \emph{esql} \cite{Kearns.R-1997a-A-teaching} supported students in learning the fundamental concepts underlying SQL. It could parse and execute \texttt{CREATE}, \texttt{DROP}, \texttt{ALTER}, \texttt{DELETE}, \texttt{INSERT}, and \texttt{SELECT} statements, but all of these except \texttt{SELECT} were simply passed through to the DBMS. The system enabled students to better understand the steps in the execution of a query by visualizing the intermediate tables generated by each step of the query. It did not provide feedback on students' attempts beyond basic syntax checking and displaying query results.
 
\citeauthor{Dietrich.S-1993a-An-educational}'s \emph{RDBI} \cite{Dietrich.S-1993a-An-educational} was a Prolog-based interpreter for relational algebra, tuple and domain relational calculus, and SQL. It focused primarily on queries, and used its own non-SQL data definition language. RDBI did not provide feedback on students' attempts beyond basic syntax checking and displaying query results. 
 
\citeauthor{Mitrovic.A-1998a-Learning}'s \emph{SQL-Tutor} \cite{Mitrovic.A-1998a-Learning} was an intelligent teaching system  that provided students with a guided discovery learning environment for SQL queries. It supported only the \texttt{SELECT} statement, and used constraint-based modeling \cite{Ohlsson.S-1992a-Constraint-based,Ohlsson.S-2016a-Constraint-based} to provide feedback to students on both syntactic and semantic SQL errors.
 
\citeauthor{Sadiq.S-2004a-SQLator} \emph{SQLator} \cite{Sadiq.S-2004a-SQLator} was a web-based interactive tool for learning SQL. Students were presented with a series of questions in English, and had to write SQL \texttt{SELECT} statements to answer these questions. SQLator used an ``equivalence engine'' to determine whether an SQL query fulfilled the requirements of the original English question. SQLator supported only the \texttt{SELECT} statement, and provided only basic feedback (correct or incorrect) to students. SQLator was able to automatically mark about a third of submitted queries as correct, thus improving the speed of grading. 
 
\citeauthor{Prior.J-2004a-Backwash}'s \emph{AsseSQL} \cite{Prior.J-2004a-Backwash} was an online examination environment for evaluating students' ability to formulate SQL queries. Students would write and execute their queries, and the data set produced by their query would be compared against the correct data set. The answer would then be flagged as correct or incorrect as appropriate. AsseSQL supported only the \texttt{SELECT} statement.
 
\citeauthor{Russell.G-2004a-Improving}'s \emph{ActiveSQL}\footnote{\url{https://db.grussell.org/}} \cite{Russell.G-2004a-Improving,Russell.G-2005a-Online} was an online interactive learning environment for SQL that provided immediate feedback to students. ActiveSQL measured the accuracy of a query in a similar way to \citeauthor{Prior.J-2004a-Backwash}'s AsseSQL, but instead of a simple correct/incorrect answer, it computed an accuracy score based on the differences between the query output and the correct answer. It was also able to detect ``hard-coded'' queries that produced the desired result, but would fail if the data set changed \cite{Russell.G-2005a-Online}. ActiveSQL supported only the \texttt{SELECT} statement.
 
\citeauthor{Dekeyser.S-2007a-Computer}'s \emph{SQLify} \cite{Dekeyser.S-2007a-Computer} was another online SQL learning system that incorporated semantic feedback and automatic assessment. SQLify evaluated each query on an eight-level scale that covered query syntax, output schema, and query semantics. Instructors could use this information to award an overall grade. Again, SQLify supported only the \texttt{SELECT} statement.
 
\citeauthor{Brusilovsky.P-2010a-Learning}'s \emph{SQL Exploratorium} \cite{Brusilovsky.P-2010a-Learning} took an interesting approach to generating problems, using parameterised query templates to generate the questions given to students. Again, the SQL Exploratorium supported only the \texttt{SELECT} statement.
 
\citeauthor{Kleiner.C-2013a-Automated}'s \emph{aSQLg} \cite{Kleiner.C-2013a-Automated} was an automated assessment tool that provided feedback to students. This enabled students to improve their learning by making further submissions after incorporating this feedback. The aSQLg system checked queries for syntax, efficiency (cost), result correctness, and statement style. Again, aSQLg supported only the \texttt{SELECT} statement.
 
\citeauthor{Kenny.C-2005a-Automated} \cite{Kenny.C-2005a-Automated} described an SQL learning system similar to those already described, which also incorporated an assessment of a student's previous progress. This enabled a more personalized and adaptive approach to student learning, where feedback was tailored according to student progress. Again, this system supported only the \texttt{SELECT} statement.
 
\citeauthor{Bhangdiya.A-2015a-XDa-TA}'s \emph{XDa-TA}\footnote{\url{http://www.cse.iitb.ac.in/infolab/xdata/}} extended the idea of automated grading of SQL by adding the ability to generate data sets designed to catch common errors. These data sets were automatically derived from a set of correct SQL queries \cite{Bhangdiya.A-2015a-XDa-TA,Chandra.B-2015a-Data}. Later work \cite{Chandra.B-2016a-Partial} added support for awarding partial marks.
 
\citeauthor{Gong.A-2015a-CS-121-Automation}'s ``CS 121 Automation Tool'' \cite{Gong.A-2015a-CS-121-Automation} was a tool designed to semi-automate the grading of SQL assessments, again focusing on SQL DML statements. Interestingly, the system appears to be extensible and could thus potentially be modified to support grading of \texttt{CREATE TABLE} statements.
 
There is relatively little work on unit testing of databases. Most authors working in this area have focused on testing database \emph{applications} rather than the database itself (e.g., \cite{Binnig.C-2008a-Multi-RQP,Chays.D-2008a-Query-based,Marcozzi.M-2012a-Test,Haller.K-2010a-Test}). \citeauthor{Ambler.S-2006a-Database} discusses how to test the functionality of a database \cite{Ambler.S-2006a-Database}, while \citeauthor{Farre.C-2008a-SVTe} test the ``correctness'' of a schema \cite{Farre.C-2008a-SVTe}, focusing mainly on consistency of constraints. Neither consider whether the database schema meets the specified requirements.
 
To our knowledge there has been no work on automated grading of SQL \texttt{CREATE TABLE} statements. While dealing with these is simpler than dealing with \emph{SELECT} statements, the ability to at least semi-automate the grading of SQL schema definitions should reap rewards in terms of more consistent application of grading criteria, and faster turnaround time.
 
Only a couple of the systems discussed in this section [which?] have considered a more ``functional'' approach to checking SQL code, i.e., verifying that the code written fulfils the requirements of the problem, rather than focusing on the code itself. Given the relatively static nature of an SQL schema, we feel this is the most appropriate way of approaching an automated grading system. This sounds like it should be a useful application of formal methods \cite{Spivey.J-1989a-An-introduction}, but work with formal methods and databases seems to have focused either on \emph{generating} a valid schema from a specification (e.g., \cite{Vatanawood.W-2004a-Formal,Lukovic.I-2003a-Proceedings,Choppella.V-2006a-Constructing}), or on verifying schema transformation and evolution \cite{Bench-Capon.T-1998a-Report}.
 
 
\section{System design}
\label{sec-design}
% System was implmented in PHP in order to speed development of web interface. Also because of ready availability of database unit testing framework PHPunit (a PHP implementation of the DBunit testing framework for Java).
% Main program can be launched from either a console program or a web application. Console application uses a single database user: student's schema loaded into DBMS (assuming error-free), then console app is run. Web application: students supply their DBMS credentials and the system connect directly to their schema, with output appearing in the web browser.
% Project specification is encoded as a collection of PHP classes, one per table (subclass of PHPunit TestCase class). These classes encode the expected name, a range of possible data types, minimum and maximum lengths, nullability, etc., of the table and its columns. It also includes specification of simple constraints such as minimum and maximum values. Valid and invalid values can also be supplied.
% Each table also includes two sets of tests to run on the database, one to test the structural requirements of the table (columns, data types, etc.), the other to test the data requirements (constraints). Empty and known-valid fixtures are also included.
 
 
\begin{figure}
    \centering
    \includegraphics[width=0.85\columnwidth, keepaspectratio]{images/BDL_ERD.pdf} 
    \caption{ERD of schema}
\end{figure}
 
 
% ANSI terminal colours for Terminal.app; see https://en.wikipedia.org/wiki/ANSI_escape_code#Colors
% grey 203, 204, 205
% green 37 188 36
\setlength{\dotvskip}{-1.25ex}
\newlength{\codeskip}
\setlength{\codeskip}{-0.5ex}
 
\begin{table}
\begin{figure}
    \ttfamily\scriptsize
%     \hrule
    \begin{tabbing}
        0123\=\kill
        \tcbox[colback=test grey]{NOTE: Checking structure of table Product.} \\[\codeskip]
        TEST: [[ Product ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test red, coltext=test grey]{--- FAILED: 2 of 8 legal values tested were rejected by a CHECK constraint.}
    \end{tabbing}
%     \hrule
    \caption{Example of output}
\end{table}
    \vskip-1ex
    \caption{Example of console output}
\end{figure}
 
\begin{figure}
    \includegraphics[width=0.95\columnwidth,keepaspectratio]{images/web_output.png}
    \caption{Example of web output}
\end{figure}
 
 
\begin{table}
    \footnotesize
%     \hrule
    \caption{Example of table specification}
\end{table}
 
\begin{figure}
    \centering
    \includegraphics[width=0.85\columnwidth, keepaspectratio]{images/BDL_ERD.pdf} 
    \caption{ERD of schema}
\end{figure}
 
\begin{figure}
    \sffamily
    \begin{tikzpicture}[every node/.style={draw, minimum height=7.5mm, inner sep=1em}]
        \node (console) {Console app};
        \coordinate[below=3mm of console.south] (console port);
    \caption{System architecture}
\end{figure}
 
 
 
\section{Evaluation}
\label{sec-evaluation}
 
\section{Conclusions \& future work}

            \documentclass[sigconf, authordraft]{acmart}

\usepackage{tcolorbox}
\usepackage{listings}
\usepackage{tikz}

\usetikzlibrary{calc}
\usetikzlibrary{graphs}
\usetikzlibrary{shapes}
\usetikzlibrary{positioning}
\usetikzlibrary{arrows.meta}

\lstloadlanguages{PHP}
\lstset{language=PHP,basicstyle=\footnotesize\ttfamily}


% \title{(Mis)using unit testing to semi-automatically grade SQL schemas}
\title{Semi-automated grading of SQL schemas \\ by (mis)use of database unit testing}
\author{Nigel Stanger}
\orcid{orcid.org/0000-0003-3450-7443}
\affiliation{
    \institution{University of Otago}
    \department{Department of Information Science}
    \city{Dunedin}
    \country{New Zealand}
}
\email{nigel.stanger@otago.ac.nz}

\begin{document}

\begin{abstract}
    abstract
\end{abstract}

\maketitle


\section{Introduction}

Any introductory database course needs to cover several core concepts, including what is a database, what is a logical data model, and how to create and interact with a database. Typically such courses will focus on the Relational Model and its embodiment in SQL database management systems (DBMSs). This is partly because the Relational Model provides a sound theoretical framework for discussing key database concepts [cite], and partly because SQL DBMSs are still widely used. The shadow of SQL is so strong that even non-relational systems have adopted some form of SQL-like language in order to leverage existing knowledge (e.g., OQL \cite{Cattell.R-2000a-ODMG3}, HiveQL \cite{Apache-2017a-Hive}, and CQL \cite{Apache-2017a-CQL}).

Courses that teach SQL usually include one or more assessments that test students' SQL skills. These test students' ability to 
create a database using SQL data definition (DDL) statements, and to interact with the database using SQL data manipulation (DML) statements. Manually grading such code can be a slow, tedious, and potentially error-prone process. Automating the grading process enables faster turnaround times and greater consistency [cite]. If the grading can be done in real time, the grading tool could become part of a larger, interactive SQL learning environment (e.g.,  \cite{Kenny.C-2005a-Automated,Kleiner.C-2013a-Automated,Mitrovic.A-1998a-Learning,Russell.G-2004a-Improving,Sadiq.S-2004a-SQLator}).

There have been many prior efforts to automatically grade SQL DML (see Section~\ref{sec-literature}), we have been unable to find any similar systems for automatically grading SQL DDL.

In our department, we offered typical introductory papers on database systems. INFO 212 was offered from 1997(?) to 2011, and was a dedicated semester-long course (13 weeks). It was replaced by INFO 214 in 2012, which included 6\(\frac{1}{2}\) weeks of core database material (the remainder of the paper covered data communications and networking). It was discontinued at the end of 2016.

Over the years that these two papers were offered, we tried several different approaches to formulating and grading SQL DDL assessments. The three most significant were:
\begin{enumerate}
    \item Allow students to choose and code their own scenario. It could be argued that this could boost student interest in the assessment, as they can work on a problem domain that interests them. It does however mean that every student's submission is different, and makes the grading process harder.
    
    \item Assign a standard scenario, but leave some elements under-specified. This improves the grading experience, but there is still the possibility of variation among student submissions, as they may interpret the under-specified elements in different ways. This is particularly problematic to automate if they choose different names for tables and columns, or implement a different structure.
    
    \item Provide a detailed specification of a standard scenario. This can be presented as the detailed output from the requirements analysis phase. Students are told that they need to adhere closely to the specification, as other developers will be independently using the same specification to implement end-user applications. Students still have some room to alter things, but such changes cannot affect the view of the database seen by clients. This approach tests both the studnets' ability to write SQL DDL, and to interpret and correctly convert a written database specification into a corresponding SQL schema.
\end{enumerate}

The third approach was used from 2009 until 2016 (?dates), and was what inspired the work discussed in this paper. The third approach is also the most amenable to autmoation, as much of the assessment specification is fixed in advance, with less room for deviation.

% Can be difficult for students to know whether they are on the right track with regards to a specification

One obvious approach to grading SQL DDL is syntax checking of \texttt{CREATE TABLE} statements. We feel that this is already effectively catered for by the syntax checking built into every SQL DBMS (although it is fair to say that the errors produced by such checkers can sometimes be obscure and unhelpful). While it might be feasible to build an SQL DDL syntax checker that provides more helpful feedback, this misses a key element of database design and implementation: that the database should meet the requirements of the problem being solved. A database schema is normally designed and implemented within the context of a specific set of requirements, so checking that the implemented SQL schema fulfils these requirements would seem to be a more helpful approach to learning the principles of database design and implementation. If a student's schema conforms to the behaviour expected from the original specification, then by definition the DDL syntax must be correct. This enables us to focus more on the student's understanding of the problem than on details of SQL syntax.
% weakness: we do not consider coding style

% Prior approaches to grading SQL DDL have focused on the \texttt{CREATE TABLE} syntax, but we have taken a different approach, where we verify that the implemented schema conforms to the behaviour expected from the original specification. If the student achieves this, then by definition the DDL syntax must be correct (weakness: we do not consider coding style). This enables us to focus less on the specifics of the syntax and more on whether students have implemnted the requirements correctly.

The requirements specification for the assessment is tightly defined, which means it can be readily codified in machine-readable form. Rather than attempt to parse and check the \texttt{CREATE TABLE} statements directly, we instead issue queries against the schema's metadata (catalog), and compare the results of these queries against the machine-readable version of the specification. The process then effectively becomes one of unit testing the schema against the original requirements. In our implementation, we used the PHPunit database unit testing framework to carry out this process, albeit in a somewhat unorthodox way (see Section~\ref{sec-design}).

% original schema is codified in machine-readable form
% rather than attempt to parse CREATE TABLE statements, simply execute the DDL code to generate the database schema in the target DBMS, then run queries against the schema's metadata
% use a database unit testing framework (PHPUnit) to automate

\section{Prior work}
\label{sec-literature}

There have been many prior efforts to build learning systems for SQL. However, these have focused almost exclusively on SQL queries using the \texttt{SELECT} statement (i.e., DML) rather than schema definitions (DDL). This is unsurprising given the relative complexity of the \texttt{SELECT} statement compared to most other SQL statements.

\citeauthor{Kearns.R-1997a-A-teaching}'s \emph{esql} \cite{Kearns.R-1997a-A-teaching} supported students in learning the fundamental concepts underlying SQL. It could parse and execute \texttt{CREATE}, \texttt{DROP}, \texttt{ALTER}, \texttt{DELETE}, \texttt{INSERT}, and \texttt{SELECT} statements, but all of these except \texttt{SELECT} were simply passed through to the DBMS. The system enabled students to better understand the steps in the execution of a query by visualizing the intermediate tables generated by each step of the query. It did not provide feedback on students' attempts beyond basic syntax checking and displaying query results.

\citeauthor{Dietrich.S-1993a-An-educational}'s \emph{RDBI} \cite{Dietrich.S-1993a-An-educational} was a Prolog-based interpreter for relational algebra, tuple and domain relational calculus, and SQL. It focused primarily on queries, and used its own non-SQL data definition language. RDBI did not provide feedback on students' attempts beyond basic syntax checking and displaying query results. 

\citeauthor{Mitrovic.A-1998a-Learning}'s \emph{SQL-Tutor} \cite{Mitrovic.A-1998a-Learning} was an intelligent teaching system  that provided students with a guided discovery learning environment for SQL queries. It supported only the \texttt{SELECT} statement, and used constraint-based modeling \cite{Ohlsson.S-1992a-Constraint-based,Ohlsson.S-2016a-Constraint-based} to provide feedback to students on both syntactic and semantic SQL errors.

\citeauthor{Sadiq.S-2004a-SQLator} \emph{SQLator} \cite{Sadiq.S-2004a-SQLator} was a web-based interactive tool for learning SQL. Students were presented with a series of questions in English, and had to write SQL \texttt{SELECT} statements to answer these questions. SQLator used an ``equivalence engine'' to determine whether an SQL query fulfilled the requirements of the original English question. SQLator supported only the \texttt{SELECT} statement, and provided only basic feedback (correct or incorrect) to students. SQLator was able to automatically mark about a third of submitted queries as correct, thus improving the speed of grading. 

\citeauthor{Prior.J-2004a-Backwash}'s \emph{AsseSQL} \cite{Prior.J-2004a-Backwash} was an online examination environment for evaluating students' ability to formulate SQL queries. Students would write and execute their queries, and the data set produced by their query would be compared against the correct data set. The answer would then be flagged as correct or incorrect as appropriate. AsseSQL supported only the \texttt{SELECT} statement.

\citeauthor{Russell.G-2004a-Improving}'s \emph{ActiveSQL}\footnote{\url{https://db.grussell.org/}} \cite{Russell.G-2004a-Improving,Russell.G-2005a-Online} was an online interactive learning environment for SQL that provided immediate feedback to students. ActiveSQL measured the accuracy of a query in a similar way to \citeauthor{Prior.J-2004a-Backwash}'s AsseSQL, but instead of a simple correct/incorrect answer, it computed an accuracy score based on the differences between the query output and the correct answer. It was also able to detect ``hard-coded'' queries that produced the desired result, but would fail if the data set changed \cite{Russell.G-2005a-Online}. ActiveSQL supported only the \texttt{SELECT} statement.

\citeauthor{Dekeyser.S-2007a-Computer}'s \emph{SQLify} \cite{Dekeyser.S-2007a-Computer} was another online SQL learning system that incorporated semantic feedback and automatic assessment. SQLify evaluated each query on an eight-level scale that covered query syntax, output schema, and query semantics. Instructors could use this information to award an overall grade. Again, SQLify supported only the \texttt{SELECT} statement.

\citeauthor{Brusilovsky.P-2010a-Learning}'s \emph{SQL Exploratorium} \cite{Brusilovsky.P-2010a-Learning} took an interesting approach to generating problems, using parameterised query templates to generate the questions given to students. Again, the SQL Exploratorium supported only the \texttt{SELECT} statement.

\citeauthor{Kleiner.C-2013a-Automated}'s \emph{aSQLg} \cite{Kleiner.C-2013a-Automated} was an automated assessment tool that provided feedback to students. This enabled students to improve their learning by making further submissions after incorporating this feedback. The aSQLg system checked queries for syntax, efficiency (cost), result correctness, and statement style. Again, aSQLg supported only the \texttt{SELECT} statement.

\citeauthor{Kenny.C-2005a-Automated} \cite{Kenny.C-2005a-Automated} described an SQL learning system similar to those already described, which also incorporated an assessment of a student's previous progress. This enabled a more personalized and adaptive approach to student learning, where feedback was tailored according to student progress. Again, this system supported only the \texttt{SELECT} statement.

\citeauthor{Bhangdiya.A-2015a-XDa-TA}'s \emph{XDa-TA}\footnote{\url{http://www.cse.iitb.ac.in/infolab/xdata/}} extended the idea of automated grading of SQL by adding the ability to generate data sets designed to catch common errors. These data sets were automatically derived from a set of correct SQL queries \cite{Bhangdiya.A-2015a-XDa-TA,Chandra.B-2015a-Data}. Later work \cite{Chandra.B-2016a-Partial} added support for awarding partial marks.

\citeauthor{Gong.A-2015a-CS-121-Automation}'s ``CS 121 Automation Tool'' \cite{Gong.A-2015a-CS-121-Automation} was a tool designed to semi-automate the grading of SQL assessments, again focusing on SQL DML statements. Interestingly, the system appears to be extensible and could thus potentially be modified to support grading of \texttt{CREATE TABLE} statements.

There is relatively little work on unit testing of databases. Most authors working in this area have focused on testing database \emph{applications} rather than the database itself (e.g., \cite{Binnig.C-2008a-Multi-RQP,Chays.D-2008a-Query-based,Marcozzi.M-2012a-Test,Haller.K-2010a-Test}). \citeauthor{Ambler.S-2006a-Database} discusses how to test the functionality of a database \cite{Ambler.S-2006a-Database}, while \citeauthor{Farre.C-2008a-SVTe} test the ``correctness'' of a schema \cite{Farre.C-2008a-SVTe}, focusing mainly on consistency of constraints. Neither consider whether the database schema meets the specified requirements.

To our knowledge there has been no work on automated grading of SQL \texttt{CREATE TABLE} statements. While dealing with these is simpler than dealing with \emph{SELECT} statements, the ability to at least semi-automate the grading of SQL schema definitions should reap rewards in terms of more consistent application of grading criteria, and faster turnaround time.

Only a couple of the systems discussed in this section [which?] have considered a more ``functional'' approach to checking SQL code, i.e., verifying that the code written fulfils the requirements of the problem, rather than focusing on the code itself. Given the relatively static nature of an SQL schema, we feel this is the most appropriate way of approaching an automated grading system. This sounds like it should be a useful application of formal methods \cite{Spivey.J-1989a-An-introduction}, but work with formal methods and databases seems to have focused either on \emph{generating} a valid schema from a specification (e.g., \cite{Vatanawood.W-2004a-Formal,Lukovic.I-2003a-Proceedings,Choppella.V-2006a-Constructing}), or on verifying schema transformation and evolution \cite{Bench-Capon.T-1998a-Report}.


\section{System design}
\label{sec-design}

% System was implmented in PHP in order to speed development of web interface. Also because of ready availability of database unit testing framework PHPunit (a PHP implementation of the DBunit testing framework for Java).
% Main program can be launched from either a console program or a web application. Console application uses a single database user: student's schema loaded into DBMS (assuming error-free), then console app is run. Web application: students supply their DBMS credentials and the system connect directly to their schema, with output appearing in the web browser.
% Project specification is encoded as a collection of PHP classes, one per table (subclass of PHPunit TestCase class). These classes encode the expected name, a range of possible data types, minimum and maximum lengths, nullability, etc., of the table and its columns. It also includes specification of simple constraints such as minimum and maximum values. Valid and invalid values can also be supplied.
% Each table also includes two sets of tests to run on the database, one to test the structural requirements of the table (columns, data types, etc.), the other to test the data requirements (constraints). Empty and known-valid fixtures are also included.


\begin{figure}
    \centering
    \includegraphics[width=0.85\columnwidth, keepaspectratio]{images/BDL_ERD.pdf} 
    \caption{ERD of schema}
\end{figure}


% ANSI terminal colours for Terminal.app; see https://en.wikipedia.org/wiki/ANSI_escape_code#Colors
% grey 203, 204, 205
% green 37 188 36
% red 194, 54, 33
\definecolor{test grey}{rgb}{0.796,0.800,0.804}
\definecolor{test green}{rgb}{0.145,0.737,0.141}
\definecolor{test red}{rgb}{0.761,0.212,0.129}

\tcbset{boxsep=0pt, boxrule=0pt, arc=0pt, left=0pt, right=0pt, top=0.5pt, bottom=0.5pt}

\newlength{\dothskip}
\setlength{\dothskip}{0.72cm}
\newlength{\dotvskip}
\setlength{\dotvskip}{-1.25ex}
\newlength{\codeskip}
\setlength{\codeskip}{-0.5ex}

\begin{figure}
    \ttfamily\scriptsize
    \begin{tabbing}
        0123\=\kill
        \tcbox[colback=test grey]{NOTE: Checking structure of table Product.} \\[\codeskip]
        TEST: [[ Product ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        \tcbox[colback=test green]{+++ PASSED: Table Product exists.} \\[\codeskip]
        TEST: [[ Product.Product\_code ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test green]{+++ PASSED: Table Product contains all the expected columns.} \\[\codeskip]
        TEST: [[ Product.Product\_code: data type is NUMBER | INTEGER ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test green]{+++ PASSED: All columns of table Product have data types compatible with the}\\[\codeskip]
        \tcbox[colback=test green]{specification.} \\[\codeskip]
        TEST: [[ Product.Product\_code precision and scale = 8 (with scale 0) ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test green]{+++ PASSED: All columns of table Product have lengths compatible with the} \\[\codeskip]
        \tcbox[colback=test green]{specification.} \\[\codeskip]
        TEST: [[ Product PK ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        \tcbox[colback=test green]{+++ PASSED: Primary key of table Product exists.} \\[\codeskip]
        TEST: [[ Product PK: Product\_code ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        \tcbox[colback=test green]{+++ PASSED: Primary key of table Product includes (only) the expected} \\[\codeskip]
        \tcbox[colback=test green]{columns.} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test grey]{NOTE: Testing constraints of table Product.} \\[\codeskip]
        TEST: [[ Product.Stock\_count accepts ``0'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        TEST: [[ Product.Stock\_count accepts ``99999'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        TEST: [[ Product.Restock\_level accepts ``0'' ]]  \\
        \>  \textcolor{test red}{- FAILED! Column Product.Restock\_level won't accept legal value 0.} \\
        \textcolor{test red}{Failed asserting that false is true.} \\
        TEST: [[ Product.Restock\_level accepts ``99999'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        TEST: [[ Product.Minimum\_level accepts ``0'' ]]  \\
        \>  \textcolor{test red}{- FAILED! Column Product.Minimum\_level won't accept legal value 0.} \\
        \textcolor{test red}{Failed asserting that false is true.} \\
        TEST: [[ Product.Minimum\_level accepts ``653'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test red, coltext=test grey]{--- FAILED: 2 of 8 legal values tested were rejected by a CHECK constraint.}
    \end{tabbing}
    \vskip-1ex
    \caption{Example of console output}
\end{figure}

\begin{figure}
    \includegraphics[width=0.95\columnwidth,keepaspectratio]{images/web_output.png}
    \caption{Example of web output}
\end{figure}


\begin{table}
    \footnotesize
%     \hrule
    \begin{verbatim}
public function getTableName()
{
    return 'PRODUCT';
}

public function getColumnList()
{
    return array(
        'PRODUCT_CODE' => array(
            'generic_type' => 'NUMBER',
            'sql_type' => array('NUMBER', 'INTEGER'),
            'min_length' => 8, 'max_length' => 8, 'decimals' => 0,
            'test_value' => 87654321, 'nullable' => false),
        'DESCRIPTION' => array( ... ),
        'STOCK_COUNT' => array(
            'generic_type' => 'NUMBER',
            'sql_type' => array('NUMBER', 'INTEGER'),
            'min_length' => 5, 'max_length' => 6, 'decimals' => 0,
            'underflow' => -1, 'overflow' => 100000,
            'legal_values' => array(0, 99999), 'test_value' => 456,
            'nullable' => false),
        'RESTOCK_LEVEL' => array( ... ),
        'MINIMUM_LEVEL' => array( ... ),
        'LIST_PRICE' => array(
            'generic_type' => 'NUMBER',
            'sql_type' => array('NUMBER', 'INTEGER'),
            'min_length' => 7, 'max_length' => 8, 'decimals' => 2,
            'underflow' => -0.01, 'overflow' => 100000.00,
            'legal_values' => array(0, 99999.99), 'test_value' => 123.99,
            'nullable' =>  false),
        'ASSEMBLY_MANUAL' => array(
            'generic_type' => 'BINARY',
            'sql_type' => array('BLOB'),
            'test_value'    =>  "NULL",
            'nullable' => true),
        'ASSEMBLY_PROGRAM' => array( ... )
    );

}

public function getPKColumnList()
{
    return array( 'PRODUCT_CODE' );
}

public function getFKColumnList()
{
    return array();
}   \end{verbatim}
%     \hrule
    \caption{Example of table specification}
\end{table}

\begin{figure}
    \sffamily
    \begin{tikzpicture}[every node/.style={draw, minimum height=7.5mm, inner sep=1em}]
        \node (console) {Console app};
        \coordinate[below=3mm of console.south] (console port);
        
        \node[anchor=north west, minimum width=6cm] (driver) at ($(console.south west) - (0,3mm)$) {Main driver};
        
        \node[anchor=south east] (web) at ($(driver.north east) + (0,3mm)$) {Web app};
        \coordinate[below=3mm of web.south] (web port);
        
        \node[below=5mm of driver] (phpunit) {PHPunit};
        
        \node[left=5mm of phpunit] (spec) {\shortstack{Schema \\ spec.}};
        
        \node[cylinder, shape border rotate=90, below=5mm of phpunit, aspect=0.1] (database) {Database};
        
        \path (database.before top) -- (database.after top) coordinate[midway] (dbtop);
        
        \node[right=5mm of database] (schema) {\shortstack{Student's \\ schema}};
        
        \graph { [edges={draw, arrows={-{Stealth}}}]
            {(console), (web)} -> {(console port), (web port)},
            {(driver), (spec)} -> (phpunit),
            (phpunit) -> (dbtop),
            (schema) -> (database),
        };
    \end{tikzpicture}
    \caption{System architecture}
\end{figure}



\section{Evaluation}
\label{sec-evaluation}

\section{Conclusions \& future work}
\label{sec-conclusion}

\newpage\mbox{}\newpage
\bibliographystyle{ACM-Reference-Format}
\bibliography{Koli_2017_Stanger}

\end{document}

            \documentclass[sigconf, authordraft]{acmart}

\usepackage{tcolorbox}
\usepackage{listings}
\usepackage{tikz}

\usetikzlibrary{calc}
\usetikzlibrary{graphs}
\usetikzlibrary{shapes}
\usetikzlibrary{positioning}
\usetikzlibrary{arrows.meta}

\lstloadlanguages{PHP}
\lstset{language=PHP,basicstyle=\footnotesize\ttfamily}


% \title{(Mis)using unit testing to semi-automatically grade SQL schemas}
\title{Semi-automated grading of SQL schemas \\ by (mis)use of database unit testing}
\author{Nigel Stanger}
\orcid{orcid.org/0000-0003-3450-7443}
\affiliation{
    \institution{University of Otago}
    \department{Department of Information Science}
    \city{Dunedin}
    \country{New Zealand}
}
\email{nigel.stanger@otago.ac.nz}

\begin{document}

\begin{abstract}
    abstract
\end{abstract}

\maketitle

\cite{Bhangdiya.A-2015a-XDa-TA,Chandra.B-2015a-Data,Chandra.B-2016a-Partial,Dekeyser.S-2007a-Computer,Kearns.R-1997a-A-teaching,Prior.J-2004a-Backwash,Russell.G-2005a-Online,Gong.A-2015a-CS-121-Automation,Farre.C-2008a-SVTe,Dietrich.S-1997a-WinRDBI,Binnig.C-2008a-Multi-RQP,Chays.D-2008a-Query-based,Marcozzi.M-2012a-Test,Haller.K-2010a-Test,Vatanawood.W-2004a-Formal,Lukovic.I-2003a-Proceedings,Bench-Capon.T-1998a-Report,Spivey.J-1989a-An-introduction,Choppella.V-2006a-Constructing,Ambler.S-2006a-Database}

\section{Introduction}

Any introductory database course needs to cover several core concepts, including what is a database, what is a logical data model, and how to create and interact with a database. Typically such courses will focus on the Relational Model and its embodiment in SQL database management systems (DBMSs). This is partly because the Relational Model provides a sound theoretical framework for discussing key database concepts [cite], and partly because SQL DBMSs are still widely used. The shadow of SQL is so strong that even non-relational systems have adopted some form of SQL-like language in order to leverage existing knowledge (e.g., OQL \cite{Cattell.R-2000a-ODMG3}, HiveQL \cite{Apache-2017a-Hive}, and CQL \cite{Apache-2017a-CQL}).

Courses that teach SQL usually include one or more assessments that test students' SQL skills. These test students' ability to 
create a database using SQL data definition (DDL) statements, and to interact with the database using SQL data manipulation (DML) statements. Manually grading such code can be a slow, tedious, and potentially error-prone process. Automating the grading process enables faster turnaround times and greater consistency [cite]. If the grading can be done in real time, the grading tool could become part of a larger, interactive SQL learning environment \cite{Kenny.C-2005a-Automated,Kleiner.C-2013a-Automated,Mitrovic.A-1998a-Learning,Russell.G-2004a-Improving,Sadiq.S-2004a-SQLator}.

There have been many prior efforts to automatically grade SQL DML (see Section~\ref{sec-literature}), we have been unable to find any similar systems for automatically grading SQL DDL.

In our department, we offered typical introductory papers on database systems. INFO 212 was offered from 1997(?) to 2011, and was a dedicated semester-long course (13 weeks). It was replaced by INFO 214 in 2012, which included 6\(\frac{1}{2}\) weeks of core database material (the remainder of the paper covered data communications and networking). It was discontinued at the end of 2016.

Over the years that these two papers were offered, we tried several different approaches to formulating and grading SQL DDL assessments. The three most significant were:
\begin{enumerate}
    \item Allow students to choose and code their own scenario. It could be argued that this could boost student interest in the assessment, as they can work on a problem domain that interests them. It does however mean that every student's submission is different, and makes the grading process harder.
    
    \item Assign a standard scenario, but leave some elements under-specified. This improves the grading experience, but there is still the possibility of variation among student submissions, as they may interpret the under-specified elements in different ways. This is particularly problematic to automate if they choose different names for tables and columns, or implement a different structure.
    
    \item Provide a detailed specification of a standard scenario. This can be presented as the detailed output from the requirements analysis phase. Students are told that they need to adhere closely to the specification, as other developers will be independently using the same specification to implement end-user applications. Students still have some room to alter things, but such changes cannot affect the view of the database seen by clients. This approach tests both the studnets' ability to write SQL DDL, and to interpret and correctly convert a written database specification into a corresponding SQL schema.
\end{enumerate}

The third approach was used from 2009 until 2016 (?dates), and was what inspired the work discussed in this paper. The third approach is also the most amenable to autmoation, as much of the assessment specification is fixed in advance, with less room for deviation.

Prior approaches to grading SQL DDL have focused on the \texttt{CREATE TABLE} syntax, but we have taken a different approach, where we verify that the implemented schema conforms to the behaviour expected from the original specification. If the student achieves this, then by definition the DDL syntax must be correct (weakness: we do not consider coding style). This enables us to focus less on the specifics of the syntax and more on whether students have implemnted the requirements correctly.

The requirements specification for the assessment is tightly defined, which means it can be readily codified in machine-readable form. Rather than attempt to parse and check the \texttt{CREATE TABLE} statements directly, we instead issue queries against the schema's metadata (catalog), and compare the results of these queries against the machine-readable version of the specification. The process then effectively becomes one of unit testing the schema against the original requirements. In our implementation, we used the PHPunit database unit testing framework to carry out this process, albeit in a somewhat unorthodox way (see Section~\ref{sec-design}).

% original schema is codified in machine-readable form
% rather than attempt to parse CREATE TABLE statements, simply execute the DDL code to generate the database schema in the target DBMS, then run queries against the schema's metadata
% use a database unit testing framework (PHPUnit) to automate

\section{Prior work}
\label{sec-literature}


\section{System design}
\label{sec-design}

% System was implmented in PHP in order to speed development of web interface. Also because of ready availability of database unit testing framework PHPunit (a PHP implementation of the DBunit testing framework for Java).
% Main program can be launched from either a console program or a web application. Console application uses a single database user: student's schema loaded into DBMS (assuming error-free), then console app is run. Web application: students supply their DBMS credentials and the system connect directly to their schema, with output appearing in the web browser.
% Project specification is encoded as a collection of PHP classes, one per table (subclass of PHPunit TestCase class). These classes encode the expected name, a range of possible data types, minimum and maximum lengths, nullability, etc., of the table and its columns. It also includes specification of simple constraints such as minimum and maximum values. Valid and invalid values can also be supplied.
% Each table also includes two sets of tests to run on the database, one to test the structural requirements of the table (columns, data types, etc.), the other to test the data requirements (constraints). Empty and known-valid fixtures are also included.

% ANSI terminal colours for Terminal.app; see https://en.wikipedia.org/wiki/ANSI_escape_code#Colors
% grey 203, 204, 205
% green 37 188 36
% red 194, 54, 33
\definecolor{test grey}{rgb}{0.796,0.800,0.804}
\definecolor{test green}{rgb}{0.145,0.737,0.141}
\definecolor{test red}{rgb}{0.761,0.212,0.129}

\tcbset{boxsep=0pt, boxrule=0pt, arc=0pt, left=0pt, right=0pt, top=0.5pt, bottom=0.5pt}

\newlength{\dothskip}
\setlength{\dothskip}{0.72cm}
\newlength{\dotvskip}
\setlength{\dotvskip}{-1.25ex}
\newlength{\codeskip}
\setlength{\codeskip}{-0.5ex}

\begin{table}
    \ttfamily\scriptsize
%     \hrule
    \begin{tabbing}
        0123\=\kill
        \tcbox[colback=test grey]{NOTE: Checking structure of table Product.} \\[\codeskip]
        TEST: [[ Product ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        \tcbox[colback=test green]{+++ PASSED: Table Product exists.} \\[\codeskip]
        TEST: [[ Product.Product\_code ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test green]{+++ PASSED: Table Product contains all the expected columns.} \\[\codeskip]
        TEST: [[ Product.Product\_code: data type is NUMBER | INTEGER ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test green]{+++ PASSED: All columns of table Product have data types compatible with the}\\[\codeskip]
        \tcbox[colback=test green]{specification.} \\[\codeskip]
        TEST: [[ Product.Product\_code precision and scale = 8 (with scale 0) ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test green]{+++ PASSED: All columns of table Product have lengths compatible with the} \\[\codeskip]
        \tcbox[colback=test green]{specification.} \\[\codeskip]
        TEST: [[ Product PK ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        \tcbox[colback=test green]{+++ PASSED: Primary key of table Product exists.} \\[\codeskip]
        TEST: [[ Product PK: Product\_code ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        \tcbox[colback=test green]{+++ PASSED: Primary key of table Product includes (only) the expected} \\[\codeskip]
        \tcbox[colback=test green]{columns.} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test grey]{NOTE: Testing constraints of table Product.} \\[\codeskip]
        TEST: [[ Product.Stock\_count accepts ``0'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        TEST: [[ Product.Stock\_count accepts ``99999'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        TEST: [[ Product.Restock\_level accepts ``0'' ]]  \\
        \>  \textcolor{test red}{- FAILED! Column Product.Restock\_level won't accept legal value 0.} \\
        \textcolor{test red}{Failed asserting that false is true.} \\
        TEST: [[ Product.Restock\_level accepts ``99999'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\
        TEST: [[ Product.Minimum\_level accepts ``0'' ]]  \\
        \>  \textcolor{test red}{- FAILED! Column Product.Minimum\_level won't accept legal value 0.} \\
        \textcolor{test red}{Failed asserting that false is true.} \\
        TEST: [[ Product.Minimum\_level accepts ``653'' ]]  \\
        \>  \textcolor{test green}{+ OK} \\[\dotvskip]
        \hspace*{\dothskip}\vdots \\
        \tcbox[colback=test red, coltext=test grey]{--- FAILED: 2 of 8 legal values tested were rejected by a CHECK constraint.}
    \end{tabbing}
%     \hrule
    \caption{Example of output}
\end{table}

\begin{table}
    \footnotesize
%     \hrule
    \begin{verbatim}
public function getTableName()
{
    return 'PRODUCT';
}

public function getColumnList()
{
    return array(
        'PRODUCT_CODE' => array(
            'generic_type' => 'NUMBER',
            'sql_type' => array('NUMBER', 'INTEGER'),
            'min_length' => 8, 'max_length' => 8, 'decimals' => 0,
            'test_value' => 87654321, 'nullable' => false),
        'DESCRIPTION' => array( ... ),
        'STOCK_COUNT' => array(
            'generic_type' => 'NUMBER',
            'sql_type' => array('NUMBER', 'INTEGER'),
            'min_length' => 5, 'max_length' => 6, 'decimals' => 0,
            'underflow' => -1, 'overflow' => 100000,
            'legal_values' => array(0, 99999), 'test_value' => 456,
            'nullable' => false),
        'RESTOCK_LEVEL' => array( ... ),
        'MINIMUM_LEVEL' => array( ... ),
        'LIST_PRICE' => array(
            'generic_type' => 'NUMBER',
            'sql_type' => array('NUMBER', 'INTEGER'),
            'min_length' => 7, 'max_length' => 8, 'decimals' => 2,
            'underflow' => -0.01, 'overflow' => 100000.00,
            'legal_values' => array(0, 99999.99), 'test_value' => 123.99,
            'nullable' =>  false),
        'ASSEMBLY_MANUAL' => array(
            'generic_type' => 'BINARY',
            'sql_type' => array('BLOB'),
            'test_value'    =>  "NULL",
            'nullable' => true),
        'ASSEMBLY_PROGRAM' => array( ... )
    );

}

public function getPKColumnList()
{
    return array( 'PRODUCT_CODE' );
}

public function getFKColumnList()
{
    return array();
}   \end{verbatim}
%     \hrule
    \caption{Example of table specification}
\end{table}

\begin{figure}
    \centering
    \includegraphics[width=0.85\columnwidth, keepaspectratio]{images/BDL_ERD.pdf} 
    \caption{ERD of schema}
\end{figure}

\begin{figure}
    \sffamily
    \begin{tikzpicture}[every node/.style={draw, minimum height=7.5mm, inner sep=1em}]
        \node (console) {Console app};
        \coordinate[below=3mm of console.south] (console port);
        
        \node[anchor=north west, minimum width=6cm] (driver) at ($(console.south west) - (0,3mm)$) {Main driver};
        
        \node[anchor=south east] (web) at ($(driver.north east) + (0,3mm)$) {Web app};
        \coordinate[below=3mm of web.south] (web port);
        
        \node[below=5mm of driver] (phpunit) {PHPunit};
        
        \node[left=5mm of phpunit] (spec) {\shortstack{Schema \\ spec.}};
        
        \node[cylinder, shape border rotate=90, below=5mm of phpunit, aspect=0.1] (database) {Database};
        
        \path (database.before top) -- (database.after top) coordinate[midway] (dbtop);
        
        \node[right=5mm of database] (schema) {\shortstack{Student's \\ schema}};
        
        \graph { [edges={draw, arrows={-{Stealth}}}]
            {(console), (web)} -> {(console port), (web port)},
            {(driver), (spec)} -> (phpunit),
            (phpunit) -> (dbtop),
            (schema) -> (database),
        };
    \end{tikzpicture}
    \caption{System architecture}
\end{figure}


\section{Evaluation}
\label{sec-evaluation}

\section{Conclusions \& future work}
\label{sec-conclusion}

\newpage\mbox{}\newpage
\bibliographystyle{ACM-Reference-Format}
\bibliography{Koli_2017_Stanger}

\end{document}
 \end{abstract}
 \maketitle
-\cite{Bhangdiya.A-2015a-XDa-TA,Chandra.B-2015a-Data,Chandra.B-2016a-Partial,Dekeyser.S-2007a-Computer,Kearns.R-1997a-A-teaching,Prior.J-2004a-Backwash,Russell.G-2005a-Online,Gong.A-2015a-CS-121-Automation,Farre.C-2008a-SVTe,Dietrich.S-1997a-WinRDBI,Binnig.C-2008a-Multi-RQP,Chays.D-2008a-Query-based,Marcozzi.M-2012a-Test,Haller.K-2010a-Test,Vatanawood.W-2004a-Formal,Lukovic.I-2003a-Proceedings,Bench-Capon.T-1998a-Report,Spivey.J-1989a-An-introduction,Choppella.V-2006a-Constructing,Ambler.S-2006a-Database}
 \section{Introduction}
 Any introductory database course needs to cover several core concepts, including what is a database, what is a logical data model, and how to create and interact with a database. Typically such courses will focus on the Relational Model and its embodiment in SQL database management systems (DBMSs). This is partly because the Relational Model provides a sound theoretical framework for discussing key database concepts [cite], and partly because SQL DBMSs are still widely used. The shadow of SQL is so strong that even non-relational systems have adopted some form of SQL-like language in order to leverage existing knowledge (e.g., OQL \cite{Cattell.R-2000a-ODMG3}, HiveQL \cite{Apache-2017a-Hive}, and CQL \cite{Apache-2017a-CQL}).
 Courses that teach SQL usually include one or more assessments that test students' SQL skills. These test students' ability to
+create a database using SQL data definition (DDL) statements, and to interact with the database using SQL data manipulation (DML) statements. Manually grading such code can be a slow, tedious, and potentially error-prone process. Automating the grading process enables faster turnaround times and greater consistency [cite]. If the grading can be done in real time, the grading tool could become part of a larger, interactive SQL learning environment \cite{Kenny.C-2005a-Automated,Kleiner.C-2013a-Automated,Mitrovic.A-1998a-Learning,Russell.G-2004a-Improving,Sadiq.S-2004a-SQLator}.
+create a database using SQL data definition (DDL) statements, and to interact with the database using SQL data manipulation (DML) statements. Manually grading such code can be a slow, tedious, and potentially error-prone process. Automating the grading process enables faster turnaround times and greater consistency [cite]. If the grading can be done in real time, the grading tool could become part of a larger, interactive SQL learning environment (e.g.,  \cite{Kenny.C-2005a-Automated,Kleiner.C-2013a-Automated,Mitrovic.A-1998a-Learning,Russell.G-2004a-Improving,Sadiq.S-2004a-SQLator}).
 There have been many prior efforts to automatically grade SQL DML (see Section~\ref{sec-literature}), we have been unable to find any similar systems for automatically grading SQL DDL.
 In our department, we offered typical introductory papers on database systems. INFO 212 was offered from 1997(?) to 2011, and was a dedicated semester-long course (13 weeks). It was replaced by INFO 214 in 2012, which included 6\(\frac{1}{2}\) weeks of core database material (the remainder of the paper covered data communications and networking). It was discontinued at the end of 2016.
 \end{enumerate}
 The third approach was used from 2009 until 2016 (?dates), and was what inspired the work discussed in this paper. The third approach is also the most amenable to autmoation, as much of the assessment specification is fixed in advance, with less room for deviation.
+Prior approaches to grading SQL DDL have focused on the \texttt{CREATE TABLE} syntax, but we have taken a different approach, where we verify that the implemented schema conforms to the behaviour expected from the original specification. If the student achieves this, then by definition the DDL syntax must be correct (weakness: we do not consider coding style). This enables us to focus less on the specifics of the syntax and more on whether students have implemnted the requirements correctly.
+% Can be difficult for students to know whether they are on the right track with regards to a specification
+One obvious approach to grading SQL DDL is syntax checking of \texttt{CREATE TABLE} statements. We feel that this is already effectively catered for by the syntax checking built into every SQL DBMS (although it is fair to say that the errors produced by such checkers can sometimes be obscure and unhelpful). While it might be feasible to build an SQL DDL syntax checker that provides more helpful feedback, this misses a key element of database design and implementation: that the database should meet the requirements of the problem being solved. A database schema is normally designed and implemented within the context of a specific set of requirements, so checking that the implemented SQL schema fulfils these requirements would seem to be a more helpful approach to learning the principles of database design and implementation. If a student's schema conforms to the behaviour expected from the original specification, then by definition the DDL syntax must be correct. This enables us to focus more on the student's understanding of the problem than on details of SQL syntax.
+% weakness: we do not consider coding style
+% Prior approaches to grading SQL DDL have focused on the \texttt{CREATE TABLE} syntax, but we have taken a different approach, where we verify that the implemented schema conforms to the behaviour expected from the original specification. If the student achieves this, then by definition the DDL syntax must be correct (weakness: we do not consider coding style). This enables us to focus less on the specifics of the syntax and more on whether students have implemnted the requirements correctly.
 The requirements specification for the assessment is tightly defined, which means it can be readily codified in machine-readable form. Rather than attempt to parse and check the \texttt{CREATE TABLE} statements directly, we instead issue queries against the schema's metadata (catalog), and compare the results of these queries against the machine-readable version of the specification. The process then effectively becomes one of unit testing the schema against the original requirements. In our implementation, we used the PHPunit database unit testing framework to carry out this process, albeit in a somewhat unorthodox way (see Section~\ref{sec-design}).
 % original schema is codified in machine-readable form
 % use a database unit testing framework (PHPUnit) to automate
 \section{Prior work}
 \label{sec-literature}
+There have been many prior efforts to build learning systems for SQL. However, these have focused almost exclusively on SQL queries using the \texttt{SELECT} statement (i.e., DML) rather than schema definitions (DDL). This is unsurprising given the relative complexity of the \texttt{SELECT} statement compared to most other SQL statements.
+\citeauthor{Kearns.R-1997a-A-teaching}'s \emph{esql} \cite{Kearns.R-1997a-A-teaching} supported students in learning the fundamental concepts underlying SQL. It could parse and execute \texttt{CREATE}, \texttt{DROP}, \texttt{ALTER}, \texttt{DELETE}, \texttt{INSERT}, and \texttt{SELECT} statements, but all of these except \texttt{SELECT} were simply passed through to the DBMS. The system enabled students to better understand the steps in the execution of a query by visualizing the intermediate tables generated by each step of the query. It did not provide feedback on students' attempts beyond basic syntax checking and displaying query results.
+\citeauthor{Dietrich.S-1993a-An-educational}'s \emph{RDBI} \cite{Dietrich.S-1993a-An-educational} was a Prolog-based interpreter for relational algebra, tuple and domain relational calculus, and SQL. It focused primarily on queries, and used its own non-SQL data definition language. RDBI did not provide feedback on students' attempts beyond basic syntax checking and displaying query results.
+\citeauthor{Mitrovic.A-1998a-Learning}'s \emph{SQL-Tutor} \cite{Mitrovic.A-1998a-Learning} was an intelligent teaching system  that provided students with a guided discovery learning environment for SQL queries. It supported only the \texttt{SELECT} statement, and used constraint-based modeling \cite{Ohlsson.S-1992a-Constraint-based,Ohlsson.S-2016a-Constraint-based} to provide feedback to students on both syntactic and semantic SQL errors.
+\citeauthor{Sadiq.S-2004a-SQLator} \emph{SQLator} \cite{Sadiq.S-2004a-SQLator} was a web-based interactive tool for learning SQL. Students were presented with a series of questions in English, and had to write SQL \texttt{SELECT} statements to answer these questions. SQLator used an ``equivalence engine'' to determine whether an SQL query fulfilled the requirements of the original English question. SQLator supported only the \texttt{SELECT} statement, and provided only basic feedback (correct or incorrect) to students. SQLator was able to automatically mark about a third of submitted queries as correct, thus improving the speed of grading.
+\citeauthor{Prior.J-2004a-Backwash}'s \emph{AsseSQL} \cite{Prior.J-2004a-Backwash} was an online examination environment for evaluating students' ability to formulate SQL queries. Students would write and execute their queries, and the data set produced by their query would be compared against the correct data set. The answer would then be flagged as correct or incorrect as appropriate. AsseSQL supported only the \texttt{SELECT} statement.
+\citeauthor{Russell.G-2004a-Improving}'s \emph{ActiveSQL}\footnote{\url{https://db.grussell.org/}} \cite{Russell.G-2004a-Improving,Russell.G-2005a-Online} was an online interactive learning environment for SQL that provided immediate feedback to students. ActiveSQL measured the accuracy of a query in a similar way to \citeauthor{Prior.J-2004a-Backwash}'s AsseSQL, but instead of a simple correct/incorrect answer, it computed an accuracy score based on the differences between the query output and the correct answer. It was also able to detect ``hard-coded'' queries that produced the desired result, but would fail if the data set changed \cite{Russell.G-2005a-Online}. ActiveSQL supported only the \texttt{SELECT} statement.
+\citeauthor{Dekeyser.S-2007a-Computer}'s \emph{SQLify} \cite{Dekeyser.S-2007a-Computer} was another online SQL learning system that incorporated semantic feedback and automatic assessment. SQLify evaluated each query on an eight-level scale that covered query syntax, output schema, and query semantics. Instructors could use this information to award an overall grade. Again, SQLify supported only the \texttt{SELECT} statement.
+\citeauthor{Brusilovsky.P-2010a-Learning}'s \emph{SQL Exploratorium} \cite{Brusilovsky.P-2010a-Learning} took an interesting approach to generating problems, using parameterised query templates to generate the questions given to students. Again, the SQL Exploratorium supported only the \texttt{SELECT} statement.
+\citeauthor{Kleiner.C-2013a-Automated}'s \emph{aSQLg} \cite{Kleiner.C-2013a-Automated} was an automated assessment tool that provided feedback to students. This enabled students to improve their learning by making further submissions after incorporating this feedback. The aSQLg system checked queries for syntax, efficiency (cost), result correctness, and statement style. Again, aSQLg supported only the \texttt{SELECT} statement.
+\citeauthor{Kenny.C-2005a-Automated} \cite{Kenny.C-2005a-Automated} described an SQL learning system similar to those already described, which also incorporated an assessment of a student's previous progress. This enabled a more personalized and adaptive approach to student learning, where feedback was tailored according to student progress. Again, this system supported only the \texttt{SELECT} statement.
+\citeauthor{Bhangdiya.A-2015a-XDa-TA}'s \emph{XDa-TA}\footnote{\url{http://www.cse.iitb.ac.in/infolab/xdata/}} extended the idea of automated grading of SQL by adding the ability to generate data sets designed to catch common errors. These data sets were automatically derived from a set of correct SQL queries \cite{Bhangdiya.A-2015a-XDa-TA,Chandra.B-2015a-Data}. Later work \cite{Chandra.B-2016a-Partial} added support for awarding partial marks.
+\citeauthor{Gong.A-2015a-CS-121-Automation}'s ``CS 121 Automation Tool'' \cite{Gong.A-2015a-CS-121-Automation} was a tool designed to semi-automate the grading of SQL assessments, again focusing on SQL DML statements. Interestingly, the system appears to be extensible and could thus potentially be modified to support grading of \texttt{CREATE TABLE} statements.
+There is relatively little work on unit testing of databases. Most authors working in this area have focused on testing database \emph{applications} rather than the database itself (e.g., \cite{Binnig.C-2008a-Multi-RQP,Chays.D-2008a-Query-based,Marcozzi.M-2012a-Test,Haller.K-2010a-Test}). \citeauthor{Ambler.S-2006a-Database} discusses how to test the functionality of a database \cite{Ambler.S-2006a-Database}, while \citeauthor{Farre.C-2008a-SVTe} test the ``correctness'' of a schema \cite{Farre.C-2008a-SVTe}, focusing mainly on consistency of constraints. Neither consider whether the database schema meets the specified requirements.
+To our knowledge there has been no work on automated grading of SQL \texttt{CREATE TABLE} statements. While dealing with these is simpler than dealing with \emph{SELECT} statements, the ability to at least semi-automate the grading of SQL schema definitions should reap rewards in terms of more consistent application of grading criteria, and faster turnaround time.
+Only a couple of the systems discussed in this section [which?] have considered a more ``functional'' approach to checking SQL code, i.e., verifying that the code written fulfils the requirements of the problem, rather than focusing on the code itself. Given the relatively static nature of an SQL schema, we feel this is the most appropriate way of approaching an automated grading system. This sounds like it should be a useful application of formal methods \cite{Spivey.J-1989a-An-introduction}, but work with formal methods and databases seems to have focused either on \emph{generating} a valid schema from a specification (e.g., \cite{Vatanawood.W-2004a-Formal,Lukovic.I-2003a-Proceedings,Choppella.V-2006a-Constructing}), or on verifying schema transformation and evolution \cite{Bench-Capon.T-1998a-Report}.
 \section{System design}
 \label{sec-design}
 % System was implmented in PHP in order to speed development of web interface. Also because of ready availability of database unit testing framework PHPunit (a PHP implementation of the DBunit testing framework for Java).
 % Main program can be launched from either a console program or a web application. Console application uses a single database user: student's schema loaded into DBMS (assuming error-free), then console app is run. Web application: students supply their DBMS credentials and the system connect directly to their schema, with output appearing in the web browser.
 % Project specification is encoded as a collection of PHP classes, one per table (subclass of PHPunit TestCase class). These classes encode the expected name, a range of possible data types, minimum and maximum lengths, nullability, etc., of the table and its columns. It also includes specification of simple constraints such as minimum and maximum values. Valid and invalid values can also be supplied.
 % Each table also includes two sets of tests to run on the database, one to test the structural requirements of the table (columns, data types, etc.), the other to test the data requirements (constraints). Empty and known-valid fixtures are also included.
+\begin{figure}
+    \centering
+    \includegraphics[width=0.85\columnwidth, keepaspectratio]{images/BDL_ERD.pdf}
+    \caption{ERD of schema}
+\end{figure}
 % ANSI terminal colours for Terminal.app; see https://en.wikipedia.org/wiki/ANSI_escape_code#Colors
 % grey 203, 204, 205
 % green 37 188 36
 \setlength{\dotvskip}{-1.25ex}
 \newlength{\codeskip}
 \setlength{\codeskip}{-0.5ex}
+\begin{table}
+\begin{figure}
     \ttfamily\scriptsize
-%     \hrule
     \begin{tabbing}
 \=\kill
         \tcbox[colback=test grey]{NOTE: Checking structure of table Product.} \\[\codeskip]
         TEST: [[ Product ]]  \\
         \>  \textcolor{test green}{+ OK} \\[\dotvskip]
         \hspace*{\dothskip}\vdots \\
         \tcbox[colback=test red, coltext=test grey]{--- FAILED: 2 of 8 legal values tested were rejected by a CHECK constraint.}
     \end{tabbing}
+%     \hrule
+    \caption{Example of output}
+\end{table}
+    \vskip-1ex
+    \caption{Example of console output}
+\end{figure}
+\begin{figure}
+    \includegraphics[width=0.95\columnwidth,keepaspectratio]{images/web_output.png}
+    \caption{Example of web output}
+\end{figure}
 \begin{table}
     \footnotesize
 %     \hrule
     \caption{Example of table specification}
 \end{table}
 \begin{figure}
-    \centering
-    \includegraphics[width=0.85\columnwidth, keepaspectratio]{images/BDL_ERD.pdf}
-    \caption{ERD of schema}
-\end{figure}
-\begin{figure}
     \sffamily
     \begin{tikzpicture}[every node/.style={draw, minimum height=7.5mm, inner sep=1em}]
         \node (console) {Console app};
         \coordinate[below=3mm of console.south] (console port);
     \caption{System architecture}
 \end{figure}
 \section{Evaluation}
 \label{sec-evaluation}
 \section{Conclusions \& future work}

Ignore Space Show notes View Koli_2017/images/web_output.png 0 → 100644

Show line notes below