Newer
Older
templates / r / graph-functions.r
library(TeachingDemos)

# define some colors
dCol <- rgb(1.0,0,0,alpha=0.2)
dBorder <- rgb(1.0,0,0,alpha=1.0)

# function for producing a histogram
histogram <- function(data, width=5, step=10, min=0, max=100, xlab=columnName, ylab=NULL, overridelabels=NULL, showcurve=FALSE, stats=TRUE, showbinwidth=FALSE, ylimit=NULL, curveAdjust=0.6, centresRight=TRUE, statsRight=FALSE, values=TRUE, ...) {

	# get number of results of data in specified column
	n <- length(data)

	# create histogram data
	h <- hist(data, breaks=seq(min, max, by=width), include.lowest=TRUE, right=centresRight, plot=FALSE)

	if(showcurve) {
		# calculate density for plotting a fitted curve
		den <- density(data, curveAdjust, kernel="cosine", na.rm=TRUE)
	}

	# calculate range for y-axis unless it is specified as a parameter
	if(is.null(ylimit)) {
		ylimit <- max(h$count)
		ylimit <- ceiling(ylimit / step) * step

		if(showcurve) {
			ylimit <- max(c(ylimit,width*n*den$y))
		}
	}

	subText = ""
	if(showbinwidth) {
		subText = paste("(bin width of ", width,")",sep="")
	}

	# create initial plot
	
	if(is.null(ylab)) {
	  ylab <- "Number of Students"
	}
	
	plot(h,ylim=c(0, ylimit), xlab=xlab, ylab=ylab, sub=subText, axes=FALSE, ...)

 	# plot some hrizontal grid lines
	abline(h=seq(0, ylimit, by=step), col="grey", lty="solid", lwd=0.5)

	# plot the histogram
	lines(h, ylim=c(0, ylimit), border="darkblue", col="light blue")

	mtext("Histogram", side=3, line=0.25)

	## render the axis labels  ##

	# cache the original graphics params and shrink the text a bit
	op <- par(cex=0.7, font.lab=2)

	# y-axis
	LEFT <- 2
	axis(LEFT, at=seq(0, ylimit, by=step), las=1)
	
	# x-axis

	# are we overriding x labels?
	if (!is.null(overridelabels)) {
		axis(1, at=seq(min+0.5*width, max-0.5*width, by=width), labels=overridelabels)
	} else {
		axis(1, at=seq(min, max, by=width), labels=seq(min, max, by=width))
	}

	# restore graphics params
	par(op)

	# plot density curve if required
	if(showcurve) {
		# scale the height of the curve and render it
		polygon(x=den$x, y=width*n*den$y, col=dCol, border=dBorder)
	}

	# render values over bars
	if(values) {
  	op <- par(cex=0.7)  # cache the graphics params
  	plot(h, labels=TRUE, col="transparent", border="transparent", add=TRUE)
  	par(op) # restore graphics params
  }

	if(stats) {
	  op <- par(cex=0.7)
		avg <- mean(na.omit(data), trim=0)		
		med <- median(na.omit(data))
		mi <- min(na.omit(data))
    ma <- max(na.omit(data))
		
		maxCount <- length(which(round(data,3) == max))
		
		subsStr <- paste("Submissions: ", length(na.omit(data)), sep="")
		avgStr <- paste("Mean: ", round(avg,1), " (", round((avg/max)*100,1),"%)", sep="")
		medStr <- paste("Median: ", round(med,1), " (", round((med/max)*100,1),"%)", sep="")
		minStr <- paste("Minimum: ", round(mi,1), " (",round((mi/max)*100,1),"%)", sep="")
    maxStr <- paste("Maximum: ", round(ma,1), " (",round((ma/max)*100,1),"%)", sep="")
		fullMarksStr <- paste("Full Marks: ", maxCount," (", round(maxCount/length(na.omit(data))*100,1),"%)", sep="")
		
		combinedStr <- paste(subsStr, avgStr, medStr, minStr, maxStr, fullMarksStr, sep="\n")

		strW <- strwidth(combinedStr)
		strH <- strheight(combinedStr)

		margin <- 2
		xmargin <- margin * max / 100
		ymargin <- margin * ylimit / 100
		
		if(!statsRight) {
			rect(xleft=0, xright=strW+xmargin*2, ytop=ylimit, ybottom=ylimit-strH-ymargin*2, col="white") 		
			text(x=xmargin, y=ylimit-(strH / 2) - ymargin, combinedStr, adj=0)
		} else {
			rect(xleft=max-strW-(xmargin*2), max, ytop=ylimit, ybottom=ylimit-strH-ymargin*2, col="white") 		
			text(x=max-strW-xmargin, y=ylimit-(strH / 2) - ymargin, combinedStr, adj=0)
		}
    par(op)
	}

}

# function for producing a scatter plot

#pointlabels are labels to be rendered beside each point on the plot
scatter <- function(pointlabels=NULL, x, y, xlab, ylab, maxx=100, maxy=100, fit=FALSE, ...) {

	# plot x vs y as solid points (thats what the pch=19 means) points
	plot(x, y, xlim=c(0,maxx), ylim=c(0,maxy), col="light blue", pch=19, xlab=xlab, ylab=ylab, xaxt='n',yaxt='n', ...)	

 	# plot some grid lines
	abline(h=seq(0, maxy, 10), col="light grey", lty="solid", lwd=0.5)
	abline(v=seq(0, maxx, 10), col="light grey", lty="solid", lwd=0.5)
#	abline(a=0, b=1, col="light grey", lwd=0.5)

	# plot the graph
	points(x, y, xlim=c(0,maxx), ylim=c(0,maxy), col="light blue", pch=19, xlab=xlab, ylab=ylab, ...)

	# cache the original graphics params and shrink the text a bit
	op <- par(cex=0.7)

	# y-axis
	LEFT <- 2
	axis(LEFT, at=seq(0, maxy, by=10), las=1)
	axis(1, at=seq(0, maxx, by=10))
	
	par(op)

	mtext("Scatter Plot", side=3, line=0.25)
	
	# replot the points with darker outline to create a border around the points
	points(x,y, col="dark blue")
	
	# display point labels next to points if they were provided
	if(!is.null(pointlabels)) {
		# calculate a normalised offset for the x position of the labels
		xlabelpos <- x-3.8*(maxx/100)
		
		# render the labels
		text(xlabelpos,y,pointlabels, cex=0.5)
	}

	# change plot color
	op <- par(col="red")

	co <- list()
	co$x <- x
	co$y <- y

	valid <- is.na(co$x) | is.na(co$y)
	valid <- !valid

	co$x <- co$x[valid]		
	co$y <- co$y[valid]
	
	if(fit) {
		# render a lowess regression
		lines(lowess(co))
    #abline(reggie)
	}

	# restore original plot params
	par(op)
}


# Sums two vectors in a coalescing fashion.
# If either value in x1 or x2 is NA that the result is the non-NA value.
# If both values in x1 or x2 are NA then the result is NA.
# If neither x1 or x2 is NA then the result is x1 + x2.
# coalescing_sum <- function (x1, x2) {
# 
# # Iterative version.  Keeping around as an example.
# 
# 	results <- 1:length(x1)
# 
# 	for(i in 1:length(x1)) {
# 
# 		if(xor(is.na(x1[i]),is.na(x2[i]))) {
# 			results[i] <- ifelse(is.na(x1[i]), 0, x1[i]) + ifelse(is.na(x2[i]), 0, x2[i])
# 		} else {
# 			results[i] <- x1[i] + x2[i]
# 		}
# 
# 	}
# 
# 	return(results)
# }

# Sums two vectors in a coalescing fashion.
# If either value in x1 or x2 is NA that the result is the non-NA value.
# If both values in x1 or x2 are NA then the result is NA.
# If neither x1 or x2 is NA then the result is x1 + x2.
coalescing_sum <- function (x, y) {
  
  # keep track of which positions have both values as na
  both.na <- is.na(x) & is.na(y)
  
  # zero nas
  x[is.na(x)] <- 0
  y[is.na(y)] <- 0
  
  # do the sum
  z <- x + y;
  
  # replace positions with both na as na
  z[both.na] <- NA

  return(z)
}

summaryboxplot <- function(data, formula, drawOutlierLabels=F, extras=NULL, labs=NULL, ...) {
  
  # add padding if adding extras
  op <-if(!is.null(extras)) {
    par(mar=c(4+length(extras), 4, 4, 2) + 0.15)
  } else {
    par()
  }

  boxplot(formula=formula, data=data, axes=F, ylim=c(0,100), ...)
  abline(h=seq(0,100, by=5), col="grey", lty="solid", lwd=0.5)
  axis(2, at=seq(0,100, by=10), las=1)

  axis(1, labels=levels(labs), at=seq(1, length(levels(labs))), font=2, ...)

  if(!is.null(extras)) {
    for(i in 1:length(extras)) {      
      mtext(line=i-1, text=names(extras[i]), side=1, padj=4, adj=1, at=0.55, cex=0.8, font=2);
      axis(line=i-1, side=1, padj=2, at=seq(1,nrow(extras[i])), labels=t(extras[i]), cex.axis=0.8, tick=F, ...)    
    }
  }

	b <- boxplot(formula=formula, data=data, col="light blue", yaxt="n", xaxt="n", add=TRUE)

	for (g in unique(b$group)) { # for every group that has at least one outlier
		
		# get assessment label
		l <- b$name[g]
		
		# get outliers
		o <- b$out[which(b$group == g)]
		
	
		# get x,y coords of outliers and ID labels
		x0 <- rep(g, length(o));
		x1 <- rep(g+0.1, length(o))
		y0 <- o; 
		y1 <- spread.labs(o, 1.25*strheight('A'), maxiter=6000, stepsize = 0.05)
	
		# plots IDs
		if(drawOutlierLabels) {
		  # get IDs of outliers
		  i <- data$ID[data$Assessment == l & data$Mark %in% o]	
      
      text(x1, y1, i, adj=c(-0.1,0.5), col="black", cex=0.7)
	
			# plot lines linking IDs to points (swap y0 with y1 if the lines pointing to outliers are backwards)
			segments(x0+0.5*strwidth("X")+strwidth(" "), y0, x1-strwidth(""), y1, col="black", lty=1)
		}
		
		# replot outliers with coloured background
		points(x=x0, y=y0, bg="light blue", pch=21)
	}
	par(op)	
}