| | library(TeachingDemos) |
---|
| | |
---|
| | # define some colors |
---|
| | dCol <- rgb(1.0,0,0,alpha=0.2) |
---|
| | dBorder <- rgb(1.0,0,0,alpha=1.0) |
---|
| | |
---|
| | # function for producing a histogram |
---|
| | histogram <- function(data, width=5, step=10, min=0, max=100, xlab=columnName, ylab=NULL, overridelabels=NULL, showcurve=FALSE, stats=TRUE, showbinwidth=FALSE, ylimit=NULL, curveAdjust=0.6, centresRight=TRUE, statsRight=FALSE, values=TRUE, ...) { |
---|
| | |
---|
| | # get number of results of data in specified column |
---|
| | n <- length(data) |
---|
| | |
---|
| | # create histogram data |
---|
| | h <- hist(data, breaks=seq(min, max, by=width), include.lowest=TRUE, right=centresRight, plot=FALSE) |
---|
| | |
---|
| | if(showcurve) { |
---|
| | # calculate density for plotting a fitted curve |
---|
| | den <- density(data, curveAdjust, kernel="cosine", na.rm=TRUE) |
---|
| | } |
---|
| | |
---|
| | # calculate range for y-axis unless it is specified as a parameter |
---|
| | if(is.null(ylimit)) { |
---|
| | ylimit <- max(h$count) |
---|
| | ylimit <- ceiling(ylimit / step) * step |
---|
| | |
---|
| | if(showcurve) { |
---|
| | ylimit <- max(c(ylimit,width*n*den$y)) |
---|
| | } |
---|
| | } |
---|
| | |
---|
| | subText = "" |
---|
| | if(showbinwidth) { |
---|
| | subText = paste("(bin width of ", width,")",sep="") |
---|
| | } |
---|
| | |
---|
| | # create initial plot |
---|
| | |
---|
| | if(is.null(ylab)) { |
---|
| | ylab <- "Number of Students" |
---|
| | } |
---|
| | |
---|
| | plot(h,ylim=c(0, ylimit), xlab=xlab, ylab=ylab, sub=subText, axes=FALSE, ...) |
---|
| | |
---|
| | # plot some hrizontal grid lines |
---|
| | abline(h=seq(0, ylimit, by=step), col="grey", lty="solid", lwd=0.5) |
---|
| | |
---|
| | # plot the histogram |
---|
| | lines(h, ylim=c(0, ylimit), border="darkblue", col="light blue") |
---|
| | |
---|
| | mtext("Histogram", side=3, line=0.25) |
---|
| | |
---|
| | ## render the axis labels ## |
---|
| | |
---|
| | # cache the original graphics params and shrink the text a bit |
---|
| | op <- par(cex=0.7, font.lab=2) |
---|
| | |
---|
| | # y-axis |
---|
| | LEFT <- 2 |
---|
| | axis(LEFT, at=seq(0, ylimit, by=step), las=1) |
---|
| | |
---|
| | # x-axis |
---|
| | |
---|
| | # are we overriding x labels? |
---|
| | if (!is.null(overridelabels)) { |
---|
| | axis(1, at=seq(min+0.5*width, max-0.5*width, by=width), labels=overridelabels) |
---|
| | } else { |
---|
| | axis(1, at=seq(min, max, by=width), labels=seq(min, max, by=width)) |
---|
| | } |
---|
| | |
---|
| | # restore graphics params |
---|
| | par(op) |
---|
| | |
---|
| | # plot density curve if required |
---|
| | if(showcurve) { |
---|
| | # scale the height of the curve and render it |
---|
| | polygon(x=den$x, y=width*n*den$y, col=dCol, border=dBorder) |
---|
| | } |
---|
| | |
---|
| | # render values over bars |
---|
| | if(values) { |
---|
| | op <- par(cex=0.7) # cache the graphics params |
---|
| | plot(h, labels=TRUE, col="transparent", border="transparent", add=TRUE) |
---|
| | par(op) # restore graphics params |
---|
| | } |
---|
| | |
---|
| | if(stats) { |
---|
| | op <- par(cex=0.7) |
---|
| | avg <- mean(na.omit(data), trim=0) |
---|
| | med <- median(na.omit(data)) |
---|
| | mi <- min(na.omit(data)) |
---|
| | ma <- max(na.omit(data)) |
---|
| | |
---|
| | maxCount <- length(which(round(data,3) == max)) |
---|
| | |
---|
| | subsStr <- paste("Submissions: ", length(na.omit(data)), sep="") |
---|
| | avgStr <- paste("Mean: ", round(avg,1), " (", round((avg/max)*100,1),"%)", sep="") |
---|
| | medStr <- paste("Median: ", round(med,1), " (", round((med/max)*100,1),"%)", sep="") |
---|
| | minStr <- paste("Minimum: ", round(mi,1), " (",round((mi/max)*100,1),"%)", sep="") |
---|
| | maxStr <- paste("Maximum: ", round(ma,1), " (",round((ma/max)*100,1),"%)", sep="") |
---|
| | fullMarksStr <- paste("Full Marks: ", maxCount," (", round(maxCount/length(na.omit(data))*100,1),"%)", sep="") |
---|
| | |
---|
| | combinedStr <- paste(subsStr, avgStr, medStr, minStr, maxStr, fullMarksStr, sep="\n") |
---|
| | |
---|
| | strW <- strwidth(combinedStr) |
---|
| | strH <- strheight(combinedStr) |
---|
| | |
---|
| | margin <- 2 |
---|
| | xmargin <- margin * max / 100 |
---|
| | ymargin <- margin * ylimit / 100 |
---|
| | |
---|
| | if(!statsRight) { |
---|
| | rect(xleft=0, xright=strW+xmargin*2, ytop=ylimit, ybottom=ylimit-strH-ymargin*2, col="white") |
---|
| | text(x=xmargin, y=ylimit-(strH / 2) - ymargin, combinedStr, adj=0) |
---|
| | } else { |
---|
| | rect(xleft=max-strW-(xmargin*2), max, ytop=ylimit, ybottom=ylimit-strH-ymargin*2, col="white") |
---|
| | text(x=max-strW-xmargin, y=ylimit-(strH / 2) - ymargin, combinedStr, adj=0) |
---|
| | } |
---|
| | par(op) |
---|
| | } |
---|
| | |
---|
| | } |
---|
| | |
---|
| | # function for producing a scatter plot |
---|
| | |
---|
| | #pointlabels are labels to be rendered beside each point on the plot |
---|
| | scatter <- function(pointlabels=NULL, x, y, xlab, ylab, maxx=100, maxy=100, stats=TRUE, fit=TRUE, ...) { |
---|
| | |
---|
| | # replace nas with 0 |
---|
| | x[is.na(x)] <- 0 |
---|
| | y[is.na(y)] <- 0 |
---|
| | |
---|
| | # calculate correlation |
---|
| | r <- cor(x, y) |
---|
| | |
---|
| | r2 = round(r ** 2,2) |
---|
| | |
---|
| | # concat r string with rounded value |
---|
| | r <- substitute('r'^2*' = '*r2, list(r2 = r2)) |
---|
| | |
---|
| | |
---|
| | # plot x vs y as solid points (thats what the pch=19 means) points |
---|
| | plot(x, y, xlim=c(0,maxx), ylim=c(0,maxy), col="light blue", pch=19, xlab=xlab, ylab=ylab, xaxt='n',yaxt='n', ...) |
---|
| | |
---|
| | # plot some grid lines |
---|
| | abline(h=seq(0, maxy, 10), col="grey", lty="solid", lwd=0.5) |
---|
| | abline(v=seq(0, maxx, 10), col="grey", lty="solid", lwd=0.5) |
---|
| | abline(a=0, b=1, col="grey") |
---|
| | |
---|
| | # plot the graph |
---|
| | points(x, y, xlim=c(0,maxx), ylim=c(0,maxy), col="light blue", pch=19, xlab=xlab, ylab=ylab, ...) |
---|
| | |
---|
| | # cache the original graphics params and shrink the text a bit |
---|
| | op <- par(cex=0.7) |
---|
| | |
---|
| | # y-axis |
---|
| | LEFT <- 2 |
---|
| | axis(LEFT, at=seq(0, maxy, by=10), las=1) |
---|
| | axis(1, at=seq(0, maxx, by=10)) |
---|
| | |
---|
| | par(op) |
---|
| | |
---|
| | # replot the points with darker outline to create a border around the points |
---|
| | points(x,y, col="dark blue") |
---|
| | |
---|
| | # display point labels next to points if they were provided |
---|
| | if(!is.null(pointlabels)) { |
---|
| | # calculate a normalised offset for the x position of the labels |
---|
| | xlabelpos <- x-3.8*(maxx/100) |
---|
| | |
---|
| | # render the labels |
---|
| | text(xlabelpos,y,pointlabels, cex=0.5) |
---|
| | } |
---|
| | |
---|
| | if(stats) { |
---|
| | # render the r text in the bottom right |
---|
| | text(maxx-maxx*0.1,2,r) |
---|
| | } |
---|
| | |
---|
| | # change plot color |
---|
| | op <- par(col="red") |
---|
| | |
---|
| | co <- list() |
---|
| | co$x <- x |
---|
| | co$y <- y |
---|
| | |
---|
| | valid <- co$x > 10 & co$y>10 |
---|
| | |
---|
| | co$x <- co$x[valid] |
---|
| | co$y <- co$y[valid] |
---|
| | |
---|
| | if(fit) { |
---|
| | # render a lowess regression |
---|
| | lines(lowess(co)) |
---|
| | |
---|
| | } |
---|
| | |
---|
| | # restore original plot params |
---|
| | par(op) |
---|
| | } |
---|
| | |
---|
| | |
---|
| | # Sums two vectors in a coalescing fashion. |
---|
| | # If either value in x1 or x2 is NA that the result is the non-NA value. |
---|
| | # If both values in x1 or x2 are NA then the result is NA. |
---|
| | # If neither x1 or x2 is NA then the result is x1 + x2. |
---|
| | # coalescing_sum <- function (x1, x2) { |
---|
| | # |
---|
| | # # Iterative version. Keeping around as an example. |
---|
| | # |
---|
| | # results <- 1:length(x1) |
---|
| | # |
---|
| | # for(i in 1:length(x1)) { |
---|
| | # |
---|
| | # if(xor(is.na(x1[i]),is.na(x2[i]))) { |
---|
| | # results[i] <- ifelse(is.na(x1[i]), 0, x1[i]) + ifelse(is.na(x2[i]), 0, x2[i]) |
---|
| | # } else { |
---|
| | # results[i] <- x1[i] + x2[i] |
---|
| | # } |
---|
| | # |
---|
| | # } |
---|
| | # |
---|
| | # return(results) |
---|
| | # } |
---|
| | |
---|
| | # Sums two vectors in a coalescing fashion. |
---|
| | # If either value in x1 or x2 is NA that the result is the non-NA value. |
---|
| | # If both values in x1 or x2 are NA then the result is NA. |
---|
| | # If neither x1 or x2 is NA then the result is x1 + x2. |
---|
| | coalescing_sum <- function (x, y) { |
---|
| | |
---|
| | # keep track of which positions have both values as na |
---|
| | both.na <- is.na(x) & is.na(y) |
---|
| | |
---|
| | # zero nas |
---|
| | x[is.na(x)] <- 0 |
---|
| | y[is.na(y)] <- 0 |
---|
| | |
---|
| | # do the sum |
---|
| | z <- x + y; |
---|
| | |
---|
| | # replace positions with both na as na |
---|
| | z[both.na] <- NA |
---|
| | |
---|
| | return(z) |
---|
| | } |
---|
| | |
---|
| | summaryboxplot <- function(data, formula, drawOutlierLabels=F, extras=NULL, labs=NULL, ...) { |
---|
| | |
---|
| | # add padding if adding extras |
---|
| | op <-if(!is.null(extras)) { |
---|
| | par(mar=c(4+length(extras), 4, 4, 2) + 0.15) |
---|
| | } else { |
---|
| | par() |
---|
| | } |
---|
| | |
---|
| | boxplot(formula=formula, data=data, axes=F, ylim=c(0,100), ...) |
---|
| | abline(h=seq(0,100, by=5), col="grey", lty="solid", lwd=0.5) |
---|
| | axis(2, at=seq(0,100, by=10), las=1) |
---|
| | |
---|
| | # axis(1, labels=levels(labs), at=seq(1, length(levels(labs))), font=2, ...) |
---|
| | axis(1, labels=levels(data$Assessment), at=seq(1, length(levels(data$Assessment))), font=2, ...) |
---|
| | |
---|
| | if(!is.null(extras)) { |
---|
| | for(i in 1:length(extras)) { |
---|
| | mtext(line=i-1, text=names(extras[i]), side=1, padj=4, adj=1, at=0.55, cex=0.8, font=2); |
---|
| | axis(line=i-1, side=1, padj=2, at=seq(1,nrow(extras[i])), labels=t(extras[i]), cex.axis=0.8, tick=F, ...) |
---|
| | } |
---|
| | } |
---|
| | |
---|
| | b <- boxplot(formula=formula, data=data, col="light blue", yaxt="n", xaxt="n", add=TRUE) |
---|
| | |
---|
| | for (g in unique(b$group)) { # for every group that has at least one outlier |
---|
| | |
---|
| | # get assessment label |
---|
| | l <- b$name[g] |
---|
| | |
---|
| | # get outliers |
---|
| | o <- b$out[which(b$group == g)] |
---|
| | |
---|
| | |
---|
| | # get x,y coords of outliers and ID labels |
---|
| | x0 <- rep(g, length(o)); |
---|
| | x1 <- rep(g+0.1, length(o)) |
---|
| | y0 <- o; |
---|
| | y1 <- spread.labs(o, 1.25*strheight('A'), maxiter=6000, stepsize = 0.05) |
---|
| | |
---|
| | # plots IDs |
---|
| | if(drawOutlierLabels) { |
---|
| | # get IDs of outliers |
---|
| | i <- data$ID[data$Assessment == l & data$Mark %in% o] |
---|
| | |
---|
| | text(x1, y1, i, adj=c(-0.1,0.5), col="black", cex=0.7) |
---|
| | |
---|
| | # plot lines linking IDs to points (swap y0 with y1 if the lines pointing to outliers are backwards) |
---|
| | segments(x0+0.5*strwidth("X")+strwidth(" "), y0, x1-strwidth(""), y1, col="black", lty=1) |
---|
| | } |
---|
| | |
---|
| | # replot outliers with coloured background |
---|
| | points(x=x0, y=y0, bg="light blue", pch=21) |
---|
| | } |
---|
| | par(op) |
---|
| | } |
---|
| | |
---|
|