Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(cvq2)
- #library(leaps)
- # cia is karto jau pKd skirtumas imtas:
- selectivity<-read.table("selectivityCA12.csv", sep=",", header=TRUE)
- qsar12_1<-data.frame(K<-selectivity$CA12.1)
- qsar12_1$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
- #random numbers:
- #> test <- sort(round(runif(10, 1, 40)))
- #> test
- test <- c(2, 3, 8, 9, 10, 16, 18, 19, 29, 35)
- #tada:
- train <- c(1, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40)
- r2=NULL
- r2good=NULL
- for (i in 1:1317 ) {
- fit.single<-lm(qsar12_1$K[train]~qsar12_1$x[train,i])
- r2[i]<-summary(fit.single)$r.squared
- if(r2[i]>0.3) {
- r2good[i]<-r2[i]
- }
- }
- #leaps<-regsubsets(qsar12_1$K[train]~qsar12_1$x[train,c(1:31)], data=qsar12_1, nvmax=5)
- #plot(leaps, scale="r2")
- qsar_12_1_train<-lm(qsar12_1$K[train] ~ qsar12_1$x[train, 105] + qsar12_1$x[train, 333] + qsar12_1$x[train, 1205])
- print(summary(qsar_12_1_train))
- qsar_12_1_test_pred_values<-coef(qsar_12_1_train)[1]+coef(qsar_12_1_train)[2]*qsar12_1$x[test, 105]+coef(qsar_12_1_train)[3]*qsar12_1$x[test, 333]+coef(qsar_12_1_train)[4]*qsar12_1$x[test, 1205]
- qsar_12_1_test<-lm(qsar_12_1_test_pred_values ~ qsar12_1$K[test])
- print(summary(qsar_12_1_test))
- x<-cbind(qsar12_1$x[train,c(105,333,1205)], qsar12_1$K[train])
- colnames(x)[4]<-"y"
- qsar_12_1_q2<-cvq2(x)
- print(qsar_12_1_q2)
- #==========================================
- qsar12_2<-data.frame(K<-selectivity$CA12.2)
- qsar12_2$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
- r2=NULL
- r2good=NULL
- for (i in 1:1317 ) {
- fit.single<-lm(qsar12_2$K[train]~qsar12_2$x[train,i])
- r2[i]<-summary(fit.single)$r.squared
- if(r2[i]>0.3) {
- r2good[i]<-r2[i]
- }
- }
- #kaip ir be leaps variantas:
- #tada imti didziausia is r2good ir salinti koreliacijas su kitais, rasti kuris nekoreliuoja
- #r2very_good istrintos koreliacijos su 1231 kuris labai geras sitame....
- #arba r2good kas gero tada dar ziureti su leaps
- qsar_12_2_train<-lm(qsar12_2$K[train] ~ qsar12_2$x[train, 293] +qsar12_2$x[train, 308] + qsar12_2$x[train, 311])
- print(summary(qsar_12_2_train))
- qsar_12_2_test_pred_values<-coef(qsar_12_2_train)[1]+coef(qsar_12_2_train)[2]*qsar12_2$x[test, 293]+coef(qsar_12_2_train)[3]*qsar12_2$x[test, 308]+coef(qsar_12_2_train)[4]*qsar12_2$x[test, 311]
- qsar_12_2_test<-lm(qsar_12_2_test_pred_values ~ qsar12_2$K[test])
- print(summary(qsar_12_2_test))
- x<-cbind(qsar12_2$x[train,c(293,308,311)], qsar12_2$K[train])
- colnames(x)[4]<-"y"
- qsar_12_2_q2<-cvq2(x)
- print(qsar_12_2_q2)
- #=============================================
- qsar_12_3_train<-lm(qsar$K[train] ~ qsar$x[train, 100] + qsar$x[train, 275] + qsar$x[train, 300])
- print(summary(qsar_12_3_train))
- qsar_12_3_test_pred_values<-coef(qsar_12_3_train)[1]+coef(qsar_12_3_train)[2]*qsar$x[test, 100]+coef(qsar_12_3_train)[3]*qsar$x[test, 275]+coef(qsar_12_3_train)[4]*qsar$x[test, 300]
- qsar_12_3_test<-lm(qsar_12_3_test_pred_values ~ qsar$K[test])
- print(summary(qsar_12_3_test))
- x<-cbind(qsar$x[train,c(100,275,300)], qsar$K[train])
- colnames(x)[4]<-"y"
- qsar_12_3_q2<-cvq2(x)
- print(qsar_12_3_q2)
- #===============================================
- #grafiko asys nuo/iki:
- minK<-5.8
- maxK<-9
- qsar1<-coef(qsar_12_1_train)[1]+coef(qsar_12_1_train)[2]*qsar$x[, 275]+coef(qsar_12_1_train)[3]*qsar$x[, 1074]+coef(qsar_12_1_train)[4]*qsar$x[, 1107]
- qsar2<-coef(qsar_12_2_train)[1]+coef(qsar_12_2_train)[2]*qsar$x[, 649]+coef(qsar_12_2_train)[3]*qsar$x[, 1074]+coef(qsar_12_2_train)[4]*qsar$x[, 1259]
- qsar3<-coef(qsar_12_3_train)[1]+coef(qsar_12_3_train)[2]*qsar$x[, 100]+coef(qsar_12_3_train)[3]*qsar$x[, 275]+coef(qsar_12_3_train)[4]*qsar$x[, 300]
- grafikui<-cbind(qsar$K, qsar1, qsar2, qsar3)
- colnames(grafikui)[1]<-"pKd"
- mod_qsar1<-lm(grafikui[train,1]~grafikui[train,2])
- mod_qsar2<-lm(grafikui[train,1]~grafikui[train,3])
- mod_qsar3<-lm(grafikui[train,1]~grafikui[train,4])
- mod_qsar1t<-lm(grafikui[test,1]~grafikui[test,2])
- mod_qsar2t<-lm(grafikui[test,1]~grafikui[test,3])
- mod_qsar3t<-lm(grafikui[test,1]~grafikui[test,4])
- png("Edita2013_grafikas2.png", width=600, height=900)
- par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2)
- plot(grafikui[train,1], grafikui[train,2], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
- abline(mod_qsar1)
- title('QSAR1', line = -3, cex.main=3)
- axis(1,col.axis = "transparent", tck = 0.02)
- plot(grafikui[test,1], grafikui[test,2], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n")
- abline(mod_qsar1t)
- title('QSAR1 test set', line = -3, cex.main=3)
- axis(1,col.axis = "transparent", tck = 0.02)
- axis(2,col.axis = "transparent", tck = 0.02)
- plot(grafikui[train,1], grafikui[train,3], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
- abline(mod_qsar2)
- title('QSAR2', line = -3, cex.main=3)
- plot(grafikui[test,1], grafikui[test,3], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n")
- abline(mod_qsar2t)
- title('QSAR2 test set', line = -3, cex.main=3)
- axis(1,col.axis = "transparent", tck = 0.02)
- axis(2,col.axis = "transparent", tck = 0.02)
- plot(grafikui[train,1], grafikui[train,4], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE)
- abline(mod_qsar3)
- title('QSAR3', line = -3, cex.main=3)
- plot(grafikui[test,1], grafikui[test,4], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3)
- abline(mod_qsar3t)
- title('QSAR3 test set', line = -3, cex.main=3)
- axis(2,col.axis = "transparent", tck = 0.02)
- mtext('pKd (experimental)', SOUTH<-1, line=2.5, cex=2, outer=TRUE)
- mtext('pKd (calculated)', WEST<-2, line=2.5, cex=2, outer=TRUE)
- dev.off()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement