options(width=240) # put in the parameters grid.title <- "Test grid" top.rat <- 6 btm.rat <-1 n.cons <- 8 n.elem <- 7 # next is the tail proportions of correlation and distance matrices to print # e.g. .2 will get you a print out of the top and bottom 20% in order frac <- .2 # the next section strikes me as rather ugly but it works # it reads the grid data in from a file as a list # then converts them to a matrix of the right dimensions # could probably do better coming in through a data frame mat <- scan("d:\\grids\\progs\\s-plus\\ingrid.dat") mat <- as.matrix(mat) dim(mat) <- c(n.elem,n.cons) mat <- t(mat) # end of mucky input bit ## functions short.labels <- function(n) { # returns labels for a lower triangular matrix in format x*y # where x and y are numbers of column and row respectively lab <- 0 # clearing things k <- 1 for (i in 1:(n-1)) { for (j in (i+1):n) { tmp <- paste(i,"*",sep="") lab[k] <- paste(tmp,j,sep="") k <- k+1 } } lab } long.labels <- function(labels) { # returns labels for a lower triangular matrix in format label1*label2 # where labels are taken from vector labels # and refer to the column and row respectively lab <- 0 # clearing things if (!is.vector(labels)) return("labels is not a vector. It must be!") n <- length(labels) k <- 1 for (i in 1:(n-1)) { for (j in (i+1):n) { tmp <- paste(labels[i],"*",sep="") lab[k] <- paste(tmp,labels[j],sep="") k <- k+1 } } lab } ssq <- function(x) sum(x^2) rms <- function(x) sqrt(mean(x^2)) which.ge <- function(x,crit) { # returns index number of values in x greater than or equal to crit which.x <- seq(along=x)[x >= crit] which.x } extend <- function(array,extend) { # returns array of same length as array with values changed # ... increased by extend if positive # ... minus extend if negative new.array <- array for (i in 1:length(array)) { if (array[i] >= 0) new.array[i] <- array[i] + extend else new.array[i] <- array[i] - extend } new.array } ## end of functions mat # this creates labels cons.lab <- c( "Domineering", "Sexually attractive", "Easily controlled", "Rejecting", "Loving", "Neglecting", "Sexually intimidating", "another fictious construct label" ) elem.lab <- c( "U to moth", "Moth to U", "U to fath", "Fath to U", "U to part", "Part to U", "U to vict" ) dimnames(mat) <- list(cons.lab,elem.lab) # now calculate some more parameters mid.rat <- (top.rat + btm.rat)/2 range <- top.rat - btm.rat g <- range/2 # maximum possible number of principal components: if (n.cons < (n.elem-1)) n.comp <- n.cons else n.comp <- n.elem-1 # generate labels for components comp.lab <- paste("PC",1:n.comp,sep="") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out","Analysis of grid: ", grid.title,"\n") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,"Grid has ",n.cons," constructs and ",n.elem ,"elements\n") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,"That means the maximum number of components is ",n.comp,"\n") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,"Maximum rating allowed on the grid is ",top.rat,"\n") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,"Minimum rating allowed on the grid is ",btm.rat,"\n") obs.max <- max(mat) obs.min <- min(mat) cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,"The highest rating actually used was ",obs.max,"\n") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,"and the lowest was ",obs.min,"\n") cat(file="d:\\grids\\progs\\s-plus\\ingrid.out",append=T,sep="\n","The grid is as follows.\n") mat # calculate easy construct parameters av.c <- apply(mat,1,mean) max.c <- apply(mat,1,max) min.c <- apply(mat,1,min) var.c <- apply(mat,1,var) sd.c <- sqrt(var.c) # calculate the deviation matrix by removing the construct means dev.mat <- mat - av.c # get total sum of squares tot.var <- ssq(dev.mat) # get the same for each construct ssq.c <- apply(dev.mat,1,ssq) per.c <- 100*ssq.c/tot.var cons.res <- cbind(av.c,max.c,min.c,var.c,sd.c,ssq.c,per.c) round(cons.res,3) # represent them in reverse order by variance cons.var.order <- rev(order(var.c)) round(cons.res[cons.var.order,],3) # get vector of construct means minus rating mid-point bia.c <- av.c - mid.rat # Patrick's bias measure is RMS of those differences # divided by half the possible range of ratings bias <- rms(bia.c)/abs(g) # his variability measure is the RMS of the # deviations from the construct means (mean divisor # the n_cons*(nelem-1) d.o.f.) divided by the # full range between the rating extremes d.o.f <- n.cons*(n.elem-1) vari <- sqrt(tot.var/d.o.f)/abs(g) tmp <- c(tot.var,bias,vari) names(tmp) <- c("Total variation","Bias","Variability") round(tmp,3) # now get element and construct cross-products xprod <- t(dev.mat) %*% dev.mat cxprod <- dev.mat %*% t(dev.mat) # and convert the latter to a correlation matrix ccorr <- cor(t(mat)) # this is my attempt at intensity which doesn't match that produced by INGRIDA intens <- ssq(ccorr) intens <- 100*intens/(n.cons*n.cons) # Beware, intensity doesn't match that in INGRIDA! round(intens,3) # My version is 100 times the sum of squared construct # intercorrelations divided by the square of N_CONS # It may be that they excluded the leading diagonal (which # would result in a slightly lower value). Or it may be # that they used Spearman correlations or perhaps absolute # correlations. *** I need to check this *** dimnames(ccorr) <- list(cons.lab,cons.lab) cat("Correlation matrix\n") round(ccorr,2) # now trap the correlation values of 1 on the diagonal # (and off-diagonal) before they cause complaints when # extracting arccos angular distances cang <- ccorr tmp <- which.ge(ccorr,1) cang[tmp] <- .99999999 mult <- 90/asin(1) cang <- mult*acos(cang) cat("The following re-expresses those correlations as angular\n") cat(" distances, i.e. r=0 -> ang=90 or 270; r=1 -> ang=0;\n") cat(" r=-1 -> ang=180\n") round(cang,1) # very helpful to find the strongest negative and positive correlations # first extract the lower triangular matrices lt.ccorr <- lower.tri(ccorr) lt.ccorr.values <- ccorr[lt.ccorr] lt.cang.values <- cang[lt.ccorr] # now sort out long and short labels for these ccorr.lab <- short.labels(n.cons) ccorr.long.lab <- long.labels(cons.lab) # sort out how many to extract tail <- floor(frac*length(lt.ccorr.values)) perc <- round(frac*100,2) cat("The following are the ",tail," (",perc,"%) strongest negative correlations\n") raw.order <- order(lt.ccorr.values) neg.tail <- raw.order[1:tail] tmp <- data.frame(cbind(ccorr.lab[neg.tail], ccorr.long.lab[neg.tail], round(lt.ccorr.values[neg.tail],2), round(lt.cang.values[neg.tail],1))) names(tmp) <- c("","","Pearson r","Angular dist.") tmp cat("The following are the ",tail," (",perc,"%) strongest positive correlations\n") raw.order <- rev(order(lt.ccorr.values)) pos.tail <- raw.order[1:tail] tmp <- data.frame(cbind(ccorr.lab[pos.tail], ccorr.long.lab[pos.tail], round(lt.ccorr.values[pos.tail],2), round(lt.cang.values[pos.tail],1))) names(tmp) <- c("","","Pearson r","Angular dist.") tmp cat("The following are the ",tail," (",perc,"%) smallest absolute correlations\n") abs.order <- order(abs(lt.ccorr.values)) neg.tail <- abs.order[1:tail] tmp <- data.frame(cbind(ccorr.lab[neg.tail], ccorr.long.lab[neg.tail], round(lt.ccorr.values[neg.tail],2), round(lt.cang.values[neg.tail],1))) names(tmp) <- c("","","Pearson r","Angular dist.") tmp cat("The following are the ",tail," (",perc,"%) strongest absolute correlations\n") abs.order <- rev(order(abs(lt.ccorr.values))) pos.tail <- abs.order[1:tail] tmp <- data.frame(cbind(ccorr.lab[pos.tail], ccorr.long.lab[pos.tail], round(lt.ccorr.values[pos.tail],2), round(lt.cang.values[pos.tail],1))) names(tmp) <- c("","","Pearson r","Angular dist.") tmp # calculate the element parameters av.elem <- apply(dev.mat,2,sum) mean.e <- apply(mat,2,mean) var.elem <- apply(dev.mat,2,ssq) per.elem <- 100*var.elem/tot.var cat("Mean is the mean element rating. This is not provided\n") cat(" in INGRID and can be pretty meaningless but is included\n") cat(" here partly to underline the distinction between this and...\n") cat("Sum.dev is the sum of deviations from the construct means\n") cat(" for the element in question (N.B. NOT the mean rating of\n") cat(" of the element on all constructs). This follows INGRID\n") cat("SSQ is the sum of squared deviations from construct\n") cat(" means for the element\n") cat("Perc. expresses the latter as a percentage of the total\n") tmp <- cbind(mean.e,av.elem,var.elem,per.elem) tmp <- round(tmp,2) tmp <- data.frame(tmp) names(tmp) <- c("Mean","Sum.dev","SSQ","Perc.") tmp # get matrix of squared Euclidean inter-element distances sqeuc <- xprod for (i in 1:ncol(xprod)) { for (j in 1:ncol(xprod)) { if (i == j) sqeuc[i,j] <- 0 else sqeuc[i,j] <- xprod[i,i] + xprod[j,j] - 2*xprod[i,j] } } # take element square roots to get Euclidean dists. euc <- sqrt(sqeuc) cat("The following is the matrix of Euclidean inter-element\n") cat(" distances. A Euclidean distance is the square root\n") cat(" of the sum of squared differences between the element\n") cat(" ratings on all the constructs\n") # and work out the expected Euclidean distance round(euc,1) s <- sum(diag(xprod)) cat("The following is Patrick's 'expected distance'\n") cat(" This is the square root of: twice the trace of the\n") cat(" element deviation cross-product matrix divided by\n") cat(" one less than the number of elements.\n") expdist <- sqrt(2*s/(ncol(xprod)-1)) round(expdist,2) # and get the Slater distances cat("The following is Patrick's interelement distances\n") cat(" It's the Euclidean distances divided by the\n") cat(" 'expected' distances. This serves to correct the\n") cat(" values to take into account the number of elements\n") cat(" and (I think) their deviations from the construct\n") cat(" means, i.e. to correct somewhat for the rating scale\n") cat(" and how it is being used by the person (I think!)\n") slatdist <- euc/expdist round(slatdist,2) # get matrices ready for the eigenvalues and vects. # these will contain some negative roots if n_cons # is less than n_elem-1 eigsys <- eigen(xprod) eigval <- eigsys$values[1:n.comp] ### optional plot *** move to new line of its own to get scree plot: plot(eigval,type="l") perc.val <- 100*eigval/tot.var cum.val <- perc.val for (i in 1:n.comp) { cum.val[i] <- sum(perc.val[(1:i)]) } cat("The following gives the eigenvalues for each component\n") cat(" and, usually more comprehensible, the percentage of\n") cat(" the total variation in the grid each contains.\n") cat(" The rescaling is trivial as the eigenvalues just sum\n") cat(" to the total variation in the grid.\n") tmp <- cbind(eigval,perc.val,cum.val) tmp <- round(tmp,2) tmp <- data.frame(cbind(comp.lab,tmp)) names(tmp) <- c("PC","Eigenvalue","Perc.","Cumulative %") tmp # element vectors and construct loadings come out directly eigvec <- eigsys$vectors[,1:n.comp] c.load <- dev.mat %*% eigvec # whereas element loadings and construct vectors are # obtained by multiplying element vectors and construct # loadings by appropriate multipliers for each PC # deal with any negative eigenvalues eigval[eigval<0] <- 0 # now create multipliers to convert element vectors to loadings # and construct loadings to vectors mult <- sqrt(eigval) c.mult <- 1/mult e.load <- eigvec e.resi <- eigvec c.vect <- c.load c.resi <- c.vect for (i in 1:n.comp) { e.load[,i] <- e.load[,i]*mult[i] c.vect[,i] <- c.load[,i]*c.mult[i] # and work out the residuals by cumulative # subtraction from element and construct sosq var.elem <- var.elem - e.load[,i]^2 e.resi[,i] <- var.elem ssq.c <- ssq.c - c.load[,i]^2 c.resi[,i] <- ssq.c } cat("The following gives the element vectors. These are\n") cat(" standardised i.e. don't reflect the variance\n") cat(" in the component hence I think the loadings,\n") cat(" which are in the next matrix printed, are more\n") cat(" useful a reflection of the Principal Component\n") cat(" Analysis.\n") dimnames(eigvec) <- list(elem.lab,comp.lab) round(eigvec,2) dimnames(e.load) <- list(elem.lab,comp.lab) round(e.load,2) cat("The following is th element residuals, i.e. the \n") cat(" amount of the squared deviations from the construct\n") cat(" means left to be accounted for after extraction\n") cat(" of the components up that that PC\n") dimnames(e.resi) <- list(elem.lab,comp.lab) round(e.resi,2) cat("The next three matrices give the construct vectors,\n") cat(" loadings and residuals, as for the elements.\n") dimnames(c.vect) <- list(cons.lab,comp.lab) dimnames(c.load) <- list(cons.lab,comp.lab) dimnames(c.resi) <- list(cons.lab,comp.lab) round(c.vect,2) round(c.load,2) round(c.resi,2) ## res.mat <- dev.mat ## dimnames(res.mat) <- list(cons.lab,elem.lab) ## # now work out the actual residual grids for each PC ## cat("Residual unexplained deviation grid when no PCs considered\n") ## round(res.mat,2) ## for (i in 1:n.comp) { ## for (j in 1:n.cons) { ## for (k in 1:n.elem) { ## res.mat[j,k] <- res.mat[j,k] - c.vect[j,i]*e.load[k,i] ## } ##} ## cat("Residual unexplained in deviation grid after ",i," PCs considered\n") ## round(res.mat,2) ##} # put up labels for plots e.mark <- seq(1:n.elem) # need something more tricky for the constructs as using letters # need to allow for ridiculous numbers needing upper and lower case # and even for more than 52 constructs if (n.cons > 52) { c.mark <- paste("C",1:n.cons,sep="") # simply paste together as C1,C2.. } else { if (n.cons <= 26) { c.mark <- LETTERS[1:n.cons] # upper case } else { rest <- n.cons - 26 c.mark <- c(LETTERS,letters[1:rest]) } } all.pts.x <- c(e.load[,1],c.load[,1]) all.pts.y <- c(e.load[,2],c.load[,2]) all.pts.r <- sqrt(all.pts.x^2 + all.pts.y^2) max.x <- max(abs(all.pts.x)) max.y <- max(abs(all.pts.y)) max.r <- max(all.pts.r) max.axis <- ceiling(max.r) extension <- max.axis/20 circle.x <- seq(-max.r,max.r,length=100) circle.y1 <- sqrt(max.r^2 - circle.x^2) circle.y2 <- -sqrt(max.r^2 - circle.x^2) par(fin=c(8,8),pty="s") plot(c(-max.axis,max.axis,0,0),c(0,0,-max.axis,max.axis),xlim=c(-max.axis,max.axis), ylim=c(-max.axis,max.axis),type="n",xlab="PC1",ylab="PC2",axes=F) title(grid.title) axis(1, pos=0,at = c(-max.axis,max.axis)) axis(2, pos=0,at = c(-max.axis,max.axis)) lines(circle.x,circle.y1) lines(circle.x,circle.y2) arrows(0,0,c.load[,1],c.load[,2]) arrow.lab.x <- extend(c.load[,1],extension) arrow.lab.y <- extend(c.load[,2],extension) text(arrow.lab.x,arrow.lab.y,c.mark) points(e.load[,1],e.load[,2],pch=16) point.lab.x <- extend(e.load[,1],extension) point.lab.y <- extend(e.load[,2],extension) text(point.lab.x,point.lab.y,e.mark)