#hcmv <- read.table("D:\\Claudio\\BioGenomics\\hcmv.data", header = TRUE) #ls() print("first 10 locations of palindromes for cmv data") print(hcmv[1:10,]) readline() par(mfrow=c(3,1)) plot(hcmv[1:100,],rep(0,100),pch="|") plot(hcmv[101:200,],rep(0,100),pch="|") plot(hcmv[201:296,],rep(0,96),pch="|") readline() hcmv4k <- cut(as.integer(hcmv[,1]),seq(0,228000,4000), labels=as.character(seq(0,224000,4000))) print("palindrome counts in the first 57 nonoverlaping intervals") print("of 4000 base pairs of CMV DNA") print(table(hcmv4k)) readline() print("distribution of palindrome counts for intervals of") print("4000 base pairs for the 294 palindromes in the first") print("228,000 base pairs of CMV DNA") print(table(table(hcmv4k))) readline() print("expected number of intervals with 0-2 palindromes") print(57*(dpois(0,5.16)+dpois(1,5.16)+dpois(2,5.16))) readline() print("expected and observed") print( 57*c((dpois(0,5.16)+dpois(1,5.16)+dpois(2,5.16)),dpois(3,5.16),dpois(4,5.16),dpois(5,5.16),dpois(6,5.16),dpois(7,5.16),dpois(8,5.16),1-ppois(8,5.16))) print(table(table(hcmv4k))) readline() print("goodness of fit = Sigma (O-E)^2 / E") print( sum( ((as.integer(table(cut(table(hcmv4k),c(0,2:8,15)))) - 57*c((dpois(0,5.16)+dpois(1,5.16)+dpois(2,5.16)),dpois(3,5.16), dpois(4,5.16),dpois(5,5.16),dpois(6,5.16),dpois(7,5.16), dpois(8,5.16),1-ppois(8,5.16)))^2) / (57*c((dpois(0,5.16)+dpois(1,5.16)+dpois(2,5.16)),dpois(3,5.16), dpois(4,5.16),dpois(5,5.16),dpois(6,5.16),dpois(7,5.16), dpois(8,5.16),1-ppois(8,5.16)))) ) print("significance") print(pchisq(1.0, 6, ncp=0, lower.tail = FALSE)) readline() print("observed palindrome counts for ten consecutive segments") print("of CMV DNA. compare to expected under uniform dist. = 29.6") print(table(cut(as.integer(hcmv[,1]),seq(0,229350,by=22935), labels=as.character(seq(0,216415,22935))))) readline()