Advertisement
ProzacR

Edita2013_visom_CA_QSAR

Aug 27th, 2015
479
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 11.89 KB | None | 0 0
  1. library(cvq2)
  2. library(leaps)
  3.  
  4.  
  5. # cia is karto jau pKd imta:
  6. visu_CA_pKd<-read.table("visu_CA_pKd.csv", sep=",", header=TRUE)
  7. qsar1<-data.frame(K<-visu_CA_pKd$CAI)
  8. qsar1$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  9.  
  10.  
  11. #random numbers:
  12. #> test <- sort(round(runif(10, 1, 40)))
  13. #> test
  14. test <- c(2, 3, 8, 9, 10, 16, 18, 19, 29, 35)
  15. #tada:
  16. train <- c(1, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40)
  17.  
  18. r2=NULL
  19. r2good=NULL
  20. for (i in 1:1317 ) {
  21. fit.single<-lm(qsar1$K[train]~qsar1$x[train,i])
  22. r2[i]<-summary(fit.single)$r.squared
  23. if(r2[i]>0.3) {
  24. r2good[i]<-r2[i]
  25. }
  26. }
  27.  
  28. good_deskr_nr<-NULL
  29. for (i in 1:1317 ) {
  30.      if(r2[i]>0.3) {
  31.           good_deskr_nr<-c(good_deskr_nr, i)
  32.  }
  33. }
  34. #leaps<-regsubsets(qsar1$K[train]~qsar1$x[train,good_deskr_nr], data=qsar1, nvmax=3)
  35. #plot(leaps, scale="r2")
  36.  
  37. qsar_1_train<-lm(qsar1$K[train] ~ qsar1$x[train, 283] + qsar1$x[train, 290] + qsar1$x[train, 314])
  38. print(summary(qsar_1_train))
  39. qsar_1_test_pred_values<-coef(qsar_1_train)[1]+coef(qsar_1_train)[2]*qsar1$x[test, 283]+coef(qsar_1_train)[3]*qsar1$x[test, 290]+coef(qsar_1_train)[4]*qsar1$x[test, 314]
  40. qsar_1_test<-lm(qsar_1_test_pred_values ~ qsar1$K[test])
  41. print(summary(qsar_1_test))
  42. x<-cbind(qsar1$x[train,c(283,290,314)], qsar1$K[train])
  43. colnames(x)[4]<-"y"
  44. qsar_1_q2<-cvq2(x)
  45. print(qsar_1_q2)
  46.  
  47. #==========================================
  48.  
  49. qsar2<-data.frame(K<-visu_CA_pKd$CAII)
  50. qsar2$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  51.  
  52. r2=NULL
  53. r2good=NULL
  54. for (i in 1:1317 ) {
  55. fit.single<-lm(qsar2$K[train]~qsar2$x[train,i])
  56. r2[i]<-summary(fit.single)$r.squared
  57. if(r2[i]>0.3) {
  58. r2good[i]<-r2[i]
  59. }
  60. }
  61.  
  62. #kaip ir be leaps variantas:
  63. #tada imti didziausia is r2good ir salinti koreliacijas su kitais, rasti kuris nekoreliuoja
  64. #r2very_good istrintos koreliacijos su 1231 kuris labai geras  sitame....
  65.  
  66. #arba r2good kas gero tada dar ziureti su leaps
  67. #cia panaudot sena gal geriau
  68.  
  69. qsar_2_train<-lm(qsar2$K[train] ~ qsar2$x[train, 275] +qsar2$x[train, 1074] + qsar2$x[train, 1107])
  70. print(summary(qsar_2_train))
  71. qsar_2_test_pred_values<-coef(qsar_2_train)[1]+coef(qsar_2_train)[2]*qsar2$x[test, 275]+coef(qsar_2_train)[3]*qsar2$x[test, 1074]+coef(qsar_2_train)[4]*qsar2$x[test, 1107]
  72. qsar_2_test<-lm(qsar_2_test_pred_values ~ qsar2$K[test])
  73. print(summary(qsar_2_test))
  74. x<-cbind(qsar2$x[train,c(275,1074,1107)], qsar2$K[train])
  75. colnames(x)[4]<-"y"
  76. qsar_2_q2<-cvq2(x)
  77. print(qsar_2_q2)
  78.  
  79. #=============================================
  80.  
  81. qsar6<-data.frame(K<-visu_CA_pKd$CAVI)
  82. qsar6$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  83.  
  84. r2=NULL
  85. r2good=NULL
  86. for (i in 1:1317 ) {
  87. fit.single<-lm(qsar6$K[train]~qsar6$x[train,i])
  88. r2[i]<-summary(fit.single)$r.squared
  89. if(r2[i]>0.3) {
  90. r2good[i]<-r2[i]
  91. }
  92. }
  93.  
  94. good_deskr_nr<-NULL
  95. for (i in 1:1317 ) {
  96.  if(r2[i]>0.3) {
  97.  good_deskr_nr<-c(good_deskr_nr, i)
  98.  }
  99. }
  100.  
  101. #leaps<-regsubsets(qsar6$K[train]~qsar6$x[train,good_deskr_nr[1:25]], data=qsar6, nvmax=3)
  102. #plot(leaps, scale="r2")
  103.  
  104. qsar_6_train<-lm(qsar6$K[train] ~ qsar6$x[train, 305] + qsar6$x[train, 1091] + qsar6$x[train, 1205])
  105. print(summary(qsar_6_train))
  106. qsar_6_test_pred_values<-coef(qsar_6_train)[1]+coef(qsar_6_train)[2]*qsar6$x[test, 305]+coef(qsar_6_train)[3]*qsar6$x[test, 1091]+coef(qsar_6_train)[4]*qsar6$x[test, 1205]
  107. qsar_6_test<-lm(qsar_6_test_pred_values ~ qsar6$K[test])
  108. print(summary(qsar_6_test))
  109. x<-cbind(qsar6$x[train,c(305,1091,1205)], qsar6$K[train])
  110. colnames(x)[4]<-"y"
  111. qsar_6_q2<-cvq2(x)
  112. print(qsar_6_q2)
  113.  
  114. #===============================================
  115.  
  116. qsar7<-data.frame(K<-visu_CA_pKd$CAVII)
  117. qsar7$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  118.  
  119. r2=NULL
  120. r2good=NULL
  121. for (i in 1:1317 ) {
  122. fit.single<-lm(qsar7$K[train]~qsar7$x[train,i])
  123. r2[i]<-summary(fit.single)$r.squared
  124. if(r2[i]>0.3) {
  125. r2good[i]<-r2[i]
  126. }
  127. }
  128.  
  129. good_deskr_nr<-NULL
  130. for (i in 1:1317 ) {
  131.  if(r2[i]>0.3) {
  132.  good_deskr_nr<-c(good_deskr_nr, i)
  133.  }
  134. }
  135. #leaps<-regsubsets(qsar7$K[train]~qsar7$x[train,good_deskr_nr[1:25]], data=qsar7, nvmax=3)
  136. #plot(leaps, scale="r2")
  137.  
  138. qsar_7_train<-lm(qsar7$K[train] ~ qsar7$x[train, 113] + qsar7$x[train, 153] + qsar7$x[train, 283])
  139. print(summary(qsar_7_train))
  140. qsar_7_test_pred_values<-coef(qsar_7_train)[1]+coef(qsar_7_train)[2]*qsar7$x[test, 113]+coef(qsar_7_train)[3]*qsar7$x[test, 153]+coef(qsar_7_train)[4]*qsar7$x[test, 283]
  141. qsar_7_test<-lm(qsar_7_test_pred_values ~ qsar7$K[test])
  142. print(summary(qsar_7_test))
  143. x<-cbind(qsar7$x[train,c(113,153,283)], qsar7$K[train])
  144. colnames(x)[4]<-"y"
  145. qsar_7_q2<-cvq2(x)
  146. print(qsar_7_q2)
  147.  
  148. #===============================================
  149. qsar13<-data.frame(K<-visu_CA_pKd$CAXIII)
  150. qsar13$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  151.  
  152. r2=NULL
  153. r2good=NULL
  154. for (i in 1:1317 ) {
  155. fit.single<-lm(qsar13$K[train]~qsar13$x[train,i])
  156. r2[i]<-summary(fit.single)$r.squared
  157. if(r2[i]>0.3) {
  158. r2good[i]<-r2[i]
  159. }
  160. }
  161.  
  162. good_deskr_nr<-NULL
  163. for (i in 1:1317 ) {
  164.  if(r2[i]>0.3) {
  165.  good_deskr_nr<-c(good_deskr_nr, i)
  166.  }
  167. }
  168. #leaps<-regsubsets(qsar13$K[train]~qsar13$x[train,good_deskr_nr], data=qsar13, nvmax=3)
  169. #plot(leaps, scale="r2")
  170.  
  171. qsar_13_train<-lm(qsar13$K[train] ~ qsar13$x[train, 365] + qsar13$x[train, 649] + qsar13$x[train, 807])
  172. print(summary(qsar_13_train))
  173. qsar_13_test_pred_values<-coef(qsar_13_train)[1]+coef(qsar_13_train)[2]*qsar13$x[test, 365]+coef(qsar_13_train)[3]*qsar13$x[test, 649]+coef(qsar_13_train)[4]*qsar13$x[test, 807]
  174. qsar_13_test<-lm(qsar_13_test_pred_values ~ qsar13$K[test])
  175. print(summary(qsar_13_test))
  176. x<-cbind(qsar13$x[train,c(365,649,807)], qsar13$K[train])
  177. colnames(x)[4]<-"y"
  178. qsar_13_q2<-cvq2(x)
  179. print(qsar_13_q2)
  180.  
  181. #===============================================
  182. qsar12<-data.frame(K<-visu_CA_pKd$CAXII)
  183. qsar12$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  184.  
  185. r2=NULL
  186. r2good=NULL
  187. for (i in 1:1317 ) {
  188. fit.single<-lm(qsar12$K[train]~qsar12$x[train,i])
  189. r2[i]<-summary(fit.single)$r.squared
  190. if(r2[i]>0.3) {
  191. r2good[i]<-r2[i]
  192. }
  193. }
  194.  
  195. good_deskr_nr<-NULL
  196. for (i in 1:1317 ) {
  197.  if(r2[i]>0.3) {
  198.  good_deskr_nr<-c(good_deskr_nr, i)
  199.  }
  200. }
  201. #leaps<-regsubsets(qsar12$K[train]~qsar12$x[train,good_deskr_nr], data=qsar12, nvmax=3)
  202. #plot(leaps, scale="r2")
  203.  
  204. qsar_12_train<-lm(qsar12$K[train] ~ qsar12$x[train, 101] + qsar12$x[train, 111] + qsar12$x[train, 275])
  205. print(summary(qsar_12_train))
  206. qsar_12_test_pred_values<-coef(qsar_12_train)[1]+coef(qsar_12_train)[2]*qsar12$x[test, 101]+coef(qsar_12_train)[3]*qsar12$x[test, 111]+coef(qsar_12_train)[4]*qsar12$x[test, 275]
  207. qsar_12_test<-lm(qsar_12_test_pred_values ~ qsar12$K[test])
  208. print(summary(qsar_12_test))
  209. x<-cbind(qsar12$x[train,c(101,111,275)], qsar12$K[train])
  210. colnames(x)[4]<-"y"
  211. qsar_12_q2<-cvq2(x)
  212. print(qsar_12_q2)
  213.  
  214. #===============================================
  215.  
  216.  
  217.  
  218.  
  219. #grafiko asys nuo/iki:
  220. minK<--7.4
  221. maxK<-1.8
  222.  
  223. qsar1_sv<-coef(qsar_1_train)[1]+coef(qsar_1_train)[2]*qsar1$x[, 283]+coef(qsar_1_train)[3]*qsar1$x[, 290]+coef(qsar_1_train)[4]*qsar1$x[, 314]
  224. qsar2_sv<-coef(qsar_2_train)[1]+coef(qsar_2_train)[2]*qsar2$x[, 275]+coef(qsar_2_train)[3]*qsar2$x[, 1074]+coef(qsar_2_train)[4]*qsar2$x[, 1107]
  225. qsar6_sv<-coef(qsar_6_train)[1]+coef(qsar_6_train)[2]*qsar6$x[, 305]+coef(qsar_6_train)[3]*qsar6$x[, 1091]+coef(qsar_6_train)[4]*qsar6$x[, 1205]
  226. qsar7_sv<-coef(qsar_7_train)[1]+coef(qsar_7_train)[2]*qsar7$x[, 113]+coef(qsar_7_train)[3]*qsar7$x[, 153]+coef(qsar_7_train)[4]*qsar7$x[, 283]
  227. qsar13_sv<-coef(qsar_13_train)[1]+coef(qsar_13_train)[2]*qsar13$x[, 365]+coef(qsar_13_train)[3]*qsar13$x[, 649]+coef(qsar_13_train)[4]*qsar13$x[, 807]
  228. qsar12_sv<-coef(qsar_12_train)[1]+coef(qsar_12_train)[2]*qsar12$x[, 101]+coef(qsar_12_train)[3]*qsar12$x[, 111]+coef(qsar_12_train)[4]*qsar12$x[, 275]
  229.  
  230.  
  231.  
  232. #grafikui<-cbind(qsar$K, qsar1, qsar2, qsar3)
  233. #colnames(grafikui)[1]<-"pKd"
  234. mod_qsar1<-lm(qsar1$K[train]~qsar1_sv[train])
  235. mod_qsar2<-lm(qsar2$K[train]~qsar2_sv[train])
  236. mod_qsar6<-lm(qsar6$K[train]~qsar6_sv[train])
  237. mod_qsar7<-lm(qsar7$K[train]~qsar7_sv[train])
  238. mod_qsar13<-lm(qsar13$K[train]~qsar13_sv[train])
  239. mod_qsar12<-lm(qsar12$K[train]~qsar12_sv[train])
  240.  
  241. mod_qsar1t<-lm(qsar1$K[test]~qsar1_sv[test])
  242. mod_qsar2t<-lm(qsar2$K[test]~qsar2_sv[test])
  243. mod_qsar6t<-lm(qsar6$K[test]~qsar6_sv[test])
  244. mod_qsar7t<-lm(qsar7$K[test]~qsar7_sv[test])
  245. mod_qsar13t<-lm(qsar13$K[test]~qsar13_sv[test])
  246. mod_qsar12t<-lm(qsar12$K[test]~qsar12_sv[test])
  247.  
  248.  
  249. png("Edita2013_visom_CA_train.png", width=600, height=900)
  250. par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2)
  251. plot(qsar1$K[train], qsar1_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  252. abline(mod_qsar1)
  253. title('QSAR CAI train set', line = -3, cex.main=3)
  254. axis(1,col.axis = "transparent", tck = 0.02)
  255.  
  256. plot(qsar2$K[train], qsar2_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n")
  257. abline(mod_qsar2)
  258. title('QSAR CAII train set', line = -3, cex.main=3)
  259. axis(1,col.axis = "transparent", tck = 0.02)
  260. axis(2,col.axis = "transparent", tck = 0.02)
  261.  
  262. plot(qsar6$K[train], qsar6_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  263. abline(mod_qsar6)
  264. title('QSAR CAVI train set', line = -3, cex.main=3)
  265.  
  266. plot(qsar7$K[train], qsar7_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n")
  267. abline(mod_qsar7)
  268. title('QSAR CAVII train set', line = -3, cex.main=3)
  269. axis(1,col.axis = "transparent", tck = 0.02)
  270. axis(2,col.axis = "transparent", tck = 0.02)
  271.  
  272. plot(qsar12$K[train], qsar12_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE)
  273. abline(mod_qsar12)
  274. title('QSAR CAXII train set', line = -3, cex.main=3)
  275.  
  276. plot(qsar13$K[train], qsar13_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3)
  277. abline(mod_qsar13)
  278. title('QSAR CAXIII train set', line = -3, cex.main=3)
  279. axis(2,col.axis = "transparent", tck = 0.02)
  280.  
  281. mtext('pKd difference (experimental)', SOUTH<-1, line=2.5, cex=2, outer=TRUE)
  282. mtext('pKd difference (calculated)', WEST<-2, line=2.5, cex=2, outer=TRUE)
  283.  
  284. dev.off()
  285.  
  286.  
  287. png("Edita2013_visom_CA_test.png", width=600, height=900)
  288. par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2)
  289. plot(qsar1$K[test], qsar1_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  290. abline(mod_qsar1t)
  291. title('QSAR CAI test set', line = -3, cex.main=3)
  292. axis(1,col.axis = "transparent", tck = 0.02)
  293.  
  294. plot(qsar2$K[test], qsar2_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n")
  295. abline(mod_qsar2t)
  296. title('QSAR CAII test set', line = -3, cex.main=3)
  297. axis(1,col.axis = "transparent", tck = 0.02)
  298. axis(2,col.axis = "transparent", tck = 0.02)
  299.  
  300. plot(qsar6$K[test], qsar6_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  301. abline(mod_qsar6t)
  302. title('QSAR CAVI test set', line = -3, cex.main=3)
  303.  
  304. plot(qsar7$K[test], qsar7_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n")
  305. abline(mod_qsar7t)
  306. title('QSAR CAVII test set', line = -3, cex.main=3)
  307. axis(1,col.axis = "transparent", tck = 0.02)
  308. axis(2,col.axis = "transparent", tck = 0.02)
  309.  
  310. plot(qsar12$K[test], qsar12_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE)
  311. abline(mod_qsar12t)
  312. title('QSAR CAXII test set', line = -3, cex.main=3)
  313.  
  314. plot(qsar13$K[test], qsar13_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3)
  315. abline(mod_qsar13t)
  316. title('QSAR CAXIII test set', line = -3, cex.main=3)
  317. axis(2,col.axis = "transparent", tck = 0.02)
  318.  
  319. mtext('pKd difference (experimental)', SOUTH<-1, line=2.5, cex=2, outer=TRUE)
  320. mtext('pKd difference (calculated)', WEST<-2, line=2.5, cex=2, outer=TRUE)
  321.  
  322. dev.off()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement