install.packages('ape', repos='http://cran.wustl.edu/') install.packages('seqinr', repos='http://cran.wustl.edu/') library(ape) library(seqinr) ## A chi-square test of all of the sites does not show differences ## in base frequencies. See below. data<- read.alignment('algae.fasta.txt', format="fasta") datamatrix<-as.matrix(data) chisqtable <- as.matrix(array(0, dim=c(nrow(datamatrix), 4))) row.names(chisqtable)=row.names(datamatrix) for(j in 1:ncol(datamatrix)){ for(i in 1:nrow(datamatrix)){ if(datamatrix[i,j]=="a"){chisqtable[i,1] = chisqtable[i,1]+1} if(datamatrix[i,j]=="c"){chisqtable[i,2] = chisqtable[i,2]+1} if(datamatrix[i,j]=="g"){chisqtable[i,3] = chisqtable[i,3]+1} if(datamatrix[i,j]=="u"){chisqtable[i,4] = chisqtable[i,4]+1} }} chisq.test(chisqtable) ## Checking Pairs of Species for(j in 2:nrow(chisqtable)){ for(i in 1:(j-1)){ print(c(i,j)) print(chisq.test(chisqtable[c(i,j),])) }} ## The results change when we limit the analysis to the sites that vary ## The sites that vary differ in their base composition among species. variable_sites=c() for(j in 1:ncol(datamatrix)){ switch=0 top=datamatrix[1,j] for(i in 2:nrow(datamatrix)){ if(datamatrix[i,j] != top){switch = 1}} if(switch==1){ variable_sites=append(variable_sites, c(j))} } reduceddatamatrix <- datamatrix[,variable_sites] chisqtable <- as.matrix(array(0, dim=c(nrow(reduceddatamatrix), 4))) row.names(chisqtable)=row.names(reduceddatamatrix) for(j in 1:ncol(reduceddatamatrix)){ for(i in 1:nrow(reduceddatamatrix)){ if(reduceddatamatrix[i,j]=="a"){chisqtable[i,1] = chisqtable[i,1]+1} if(reduceddatamatrix[i,j]=="c"){chisqtable[i,2] = chisqtable[i,2]+1} if(reduceddatamatrix[i,j]=="g"){chisqtable[i,3] = chisqtable[i,3]+1} if(reduceddatamatrix[i,j]=="u"){chisqtable[i,4] = chisqtable[i,4]+1} }} chisq.test(chisqtable)