library(TeachingDemos) # define some colors dCol <- rgb(1.0,0,0,alpha=0.2) dBorder <- rgb(1.0,0,0,alpha=1.0) # function for producing a histogram histogram <- function(data, width=5, step=10, min=0, max=100, xlab=columnName, ylab=NULL, overridelabels=NULL, showcurve=FALSE, stats=TRUE, showbinwidth=FALSE, ylimit=NULL, curveAdjust=0.6, centresRight=TRUE, statsRight=FALSE, values=TRUE, ...) { # get number of results of data in specified column n <- length(data) # create histogram data h <- hist(data, breaks=seq(min, max, by=width), include.lowest=TRUE, right=centresRight, plot=FALSE) if(showcurve) { # calculate density for plotting a fitted curve den <- density(data, curveAdjust, kernel="cosine", na.rm=TRUE) } # calculate range for y-axis unless it is specified as a parameter if(is.null(ylimit)) { ylimit <- max(h$count) ylimit <- ceiling(ylimit / step) * step if(showcurve) { ylimit <- max(c(ylimit,width*n*den$y)) } } subText = "" if(showbinwidth) { subText = paste("(bin width of ", width,")",sep="") } # create initial plot if(is.null(ylab)) { ylab <- "Count" } plot(h,ylim=c(0, ylimit), xlab=xlab, ylab=ylab, sub=subText, axes=FALSE, ...) # plot some hrizontal grid lines abline(h=seq(0, ylimit, by=step), col="grey", lty="solid", lwd=0.5) # plot the histogram lines(h, ylim=c(0, ylimit), border="darkblue", col="light blue") mtext("Histogram", side=3, line=0.25) ## render the axis labels ## # cache the original graphics params and shrink the text a bit op <- par(cex=0.7, font.lab=2) # y-axis LEFT <- 2 axis(LEFT, at=seq(0, ylimit, by=step), las=1) # x-axis # are we overriding x labels? if (!is.null(overridelabels)) { axis(1, at=seq(min+0.5*width, max-0.5*width, by=width), labels=overridelabels) } else { axis(1, at=seq(min, max, by=width), labels=seq(min, max, by=width)) } # restore graphics params par(op) # plot density curve if required if(showcurve) { # scale the height of the curve and render it polygon(x=den$x, y=width*n*den$y, col=dCol, border=dBorder) } # render values over bars if(values) { op <- par(cex=0.7) # cache the graphics params plot(h, labels=TRUE, col="transparent", border="transparent", add=TRUE) par(op) # restore graphics params } if(stats) { op <- par(cex=0.7) avg <- mean(na.omit(data), trim=0) med <- median(na.omit(data)) mi <- min(na.omit(data)) ma <- max(na.omit(data)) maxCount <- length(which(round(data,3) == max)) subsStr <- paste("Submissions: ", length(na.omit(data)), sep="") avgStr <- paste("Mean: ", round(avg,1), " (", round((avg/max)*100,1),"%)", sep="") medStr <- paste("Median: ", round(med,1), " (", round((med/max)*100,1),"%)", sep="") minStr <- paste("Minimum: ", round(mi,1), " (",round((mi/max)*100,1),"%)", sep="") maxStr <- paste("Maximum: ", round(ma,1), " (",round((ma/max)*100,1),"%)", sep="") fullMarksStr <- paste("Full Marks: ", maxCount," (", round(maxCount/length(na.omit(data))*100,1),"%)", sep="") combinedStr <- paste(subsStr, avgStr, medStr, minStr, maxStr, fullMarksStr, sep="\n") strW <- strwidth(combinedStr) strH <- strheight(combinedStr) margin <- 2 xmargin <- margin * max / 100 ymargin <- margin * ylimit / 100 if(!statsRight) { rect(xleft=0, xright=strW+xmargin*2, ytop=ylimit, ybottom=ylimit-strH-ymargin*2, col="white") text(x=xmargin, y=ylimit-(strH / 2) - ymargin, combinedStr, adj=0) } else { rect(xleft=max-strW-(xmargin*2), max, ytop=ylimit, ybottom=ylimit-strH-ymargin*2, col="white") text(x=max-strW-xmargin, y=ylimit-(strH / 2) - ymargin, combinedStr, adj=0) } par(op) } } # function for producing a scatter plot summaryboxplot <- function(data, formula, drawOutlierLabels=F, extras=NULL, labs=NULL, ...) { # add padding if adding extras op <-if(!is.null(extras)) { par(mar=c(4+length(extras), 4, 4, 2) + 0.15) } else { par() } boxplot(formula=formula, data=data, axes=F, ylim=c(0,100), ...) abline(h=seq(0,100, by=5), col="grey", lty="solid", lwd=0.5) axis(2, at=seq(0,100, by=10), las=1) axis(1, labels=levels(labs), at=seq(1, length(levels(labs))), font=2, ...) if(!is.null(extras)) { for(i in 1:length(extras)) { mtext(line=i-1, text=names(extras[i]), side=1, padj=4, adj=1, at=0.55, cex=0.8, font=2); axis(line=i-1, side=1, padj=2, at=seq(1,nrow(extras[i])), labels=t(extras[i]), cex.axis=0.8, tick=F, ...) } } b <- boxplot(formula=formula, data=data, col="light blue", yaxt="n", xaxt="n", add=TRUE) for (g in unique(b$group)) { # for every group that has at least one outlier # get assessment label l <- b$name[g] # get outliers o <- b$out[which(b$group == g)] # get x,y coords of outliers and ID labels x0 <- rep(g, length(o)); x1 <- rep(g+0.1, length(o)) y0 <- o; y1 <- spread.labs(o, 1.25*strheight('A'), maxiter=6000, stepsize = 0.05) # plots IDs if(drawOutlierLabels) { # get IDs of outliers i <- data$ID[data$Assessment == l & data$Mark %in% o] text(x1, y1, i, adj=c(-0.1,0.5), col="black", cex=0.7) # plot lines linking IDs to points (swap y0 with y1 if the lines pointing to outliers are backwards) segments(x0+0.5*strwidth("X")+strwidth(" "), y0, x1-strwidth(""), y1, col="black", lty=1) } # replot outliers with coloured background points(x=x0, y=y0, bg="light blue", pch=21) } par(op) }