Advertisement
ProzacR

intrinsiniai_QSAR_2.R

May 10th, 2016
451
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.21 KB | None | 0 0
  1. #QSAR su intrinciniais parametrais
  2. #
  3. # VR
  4.  
  5. #pradiniai duomenys:
  6. deskriptoriai<-as.matrix(read.csv(file='sutvarkyti_descriptoriai.csv'))
  7. matavimai<-read.csv(file='Intinciniai_2_pradinis_can_dedupe.csv')
  8.  
  9. #random numbers:
  10. #test<-sort(sample(1:86, 16, replace=F))
  11. test <- c(5, 14, 16, 21, 26, 39, 46, 59, 60, 62, 64, 65, 75, 79, 82, 85)
  12. train <- c(1:86)
  13. #r-bloggers.com:
  14. outersect <- function(x, y) {
  15.        sort(c(setdiff(x, y),
  16.            setdiff(y, x)))
  17.      }
  18. train<-outersect(test, train)
  19.  
  20. library(cvq2)
  21.  
  22. #Q2TEST f-ja kaip ir PHASE Q2 lygiai tokia pati
  23. q2test<-function(activity, predicted_activity) {
  24.                     prediction_error_sq<-(predicted_activity-activity)^2
  25.                 avg_activity<-mean(activity)
  26.                         sigma_y_sq<-(activity-avg_activity)^2
  27.                         q2test_val<-1-sum(prediction_error_sq)/sum(sigma_y_sq)
  28.                                 return(q2test_val)
  29. }
  30.  
  31.  
  32.  
  33. #Genetic Algorithm (GA) for Variable Selection from High-Dimensional Data:
  34. library(gaselect)
  35. ctrl <- genAlgControl(populationSize = 64000, numGenerations = 2000, minVariables = 3, maxVariables = 4, verbosity = 1)
  36. evaluatorRDCV <- evaluatorPLS(numReplications = 2, innerSegments = 5, outerSegments = 3,
  37.                   numThreads = 3)
  38. #pirma 2–3 deskriptoriai, paskui 3–4 t.t., kol Q2test>0.4
  39.  
  40. #CA1:
  41. #resultRDCV.CA1 <- genAlg(matavimai$CA1[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
  42. #subsets(resultRDCV.CA1, 1:5)
  43.  
  44. qsar_1_train<-lm(matavimai$CA1[train] ~ deskriptoriai[train, "SPAM"] + deskriptoriai[train, "E1m"] + deskriptoriai[train, "E2s"])
  45. print(summary(qsar_1_train))
  46. qsar_1_test_pred_values<-coef(qsar_1_train)[1] + coef(qsar_1_train)[2]*deskriptoriai[test, "SPAM"] + coef(qsar_1_train)[3]*deskriptoriai[test, "E1m"] + coef(qsar_1_train)[4]*deskriptoriai[test, "E2s"]
  47. qsar_1_test<-lm(qsar_1_test_pred_values ~ matavimai$CA1[test])
  48. print(summary(qsar_1_test))
  49. x<-cbind(deskriptoriai[train, "SPAM"], deskriptoriai[train, "E1m"], deskriptoriai[train,"E2s"], matavimai$CA1[train])
  50. colnames(x)<-c('x1', 'x2', 'x3', 'y')
  51. qsar_1_q2<-cvq2(x)
  52. print(qsar_1_q2)
  53. print(q2test(matavimai$CA1[test], qsar_1_test_pred_values))
  54. #O.K.
  55.  
  56.  
  57. #dabar:
  58. #CA2:
  59. #resultRDCV.CA2 <- genAlg(matavimai$CA2[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 777)
  60. #subsets(resultRDCV.CA2, 1:5)
  61.  
  62. qsar_2_train<-lm(matavimai$CA2[train] ~ deskriptoriai[train, "PCR"] + deskriptoriai[train, "MATS1p"] + deskriptoriai[train, "DISPe"] + deskriptoriai[train, "R6e"])
  63. print(summary(qsar_2_train))
  64. qsar_2_test_pred_values<-coef(qsar_2_train)[1] + coef(qsar_2_train)[2]*deskriptoriai[test, "PCR"] + coef(qsar_2_train)[3]*deskriptoriai[test, "MATS1p"] + coef(qsar_2_train)[4]*deskriptoriai[test, "DISPe"] + coef(qsar_2_train)[5]*deskriptoriai[test, "R6e"]
  65. qsar_2_test<-lm(qsar_2_test_pred_values ~ matavimai$CA2[test])
  66. print(summary(qsar_2_test))
  67. x<-cbind(deskriptoriai[train, "PCR"], deskriptoriai[train, "MATS1p"], deskriptoriai[train,"DISPe"], deskriptoriai[train,"R6e"], matavimai$CA2[train])
  68. colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
  69. qsar_2_q2<-cvq2(x)
  70. print(qsar_2_q2)
  71. print(q2test(matavimai$CA2[test], qsar_2_test_pred_values))
  72. #3 deskr.: ir su train set nlb. R2=0.66-0.64
  73. #4 deskr.: O.K.
  74.  
  75.  
  76. #CA7:
  77. #resultRDCV.CA7 <- genAlg(matavimai$CA7[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 777)
  78. #subsets(resultRDCV.CA7, 1:5)
  79.  
  80. qsar_3_train<-lm(matavimai$CA7[train] ~ deskriptoriai[train, "PW4"] + deskriptoriai[train, "R3v"] + deskriptoriai[train, "R8v."] + deskriptoriai[train, "ALOGP2"])
  81. print(summary(qsar_3_train))
  82. qsar_3_test_pred_values<-coef(qsar_3_train)[1] + coef(qsar_3_train)[2]*deskriptoriai[test, "PW4"] + coef(qsar_3_train)[3]*deskriptoriai[test, "R3v"] + coef(qsar_3_train)[4]*deskriptoriai[test, "R8v."] + coef(qsar_3_train)[5]*deskriptoriai[test, "ALOGP2"]
  83. qsar_3_test<-lm(qsar_3_test_pred_values ~ matavimai$CA7[test])
  84. print(summary(qsar_3_test))
  85. x<-cbind(deskriptoriai[train, "PW4"], deskriptoriai[train, "R3v"], deskriptoriai[train,"R8v."], deskriptoriai[train,"ALOGP2"], matavimai$CA7[train])
  86. colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
  87. qsar_3_q2<-cvq2(x)
  88. print(qsar_3_q2)
  89. print(q2test(matavimai$CA7[test], qsar_3_test_pred_values))
  90. #3 deskr.: ir su train set nlb. R2=0.66
  91. #4 deskr.: nope
  92.  
  93.  
  94. #CA12:
  95. #resultRDCV.CA12 <- genAlg(matavimai$CA12[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 777)
  96. #subsets(resultRDCV.CA12, 1:5)
  97.  
  98. qsar_4_train<-lm(matavimai$CA12[train] ~ deskriptoriai[train, "RDF080m"] + deskriptoriai[train, "RDF135v"] + deskriptoriai[train, "HATS6u"] + deskriptoriai[train, "HATS6"])
  99. print(summary(qsar_4_train))
  100. qsar_4_test_pred_values<-coef(qsar_4_train)[1] + coef(qsar_4_train)[2]*deskriptoriai[test, "RDF080m"] + coef(qsar_4_train)[3]*deskriptoriai[test, "RDF135v"] + coef(qsar_4_train)[4]*deskriptoriai[test, "HATS6u"] + coef(qsar_4_train)[5]*deskriptoriai[test, "HATS6"]
  101. qsar_4_test<-lm(qsar_4_test_pred_values ~ matavimai$CA12[test])
  102. print(summary(qsar_4_test))
  103. x<-cbind(deskriptoriai[train, "RDF080m"], deskriptoriai[train, "RDF135v"], deskriptoriai[train,"HATS6u"], deskriptoriai[train,"HATS6"], matavimai$CA12[train])
  104. colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
  105. qsar_4_q2<-cvq2(x)
  106. print(qsar_4_q2)
  107. print(q2test(matavimai$CA12[test], qsar_4_test_pred_values))
  108. #3 deskr.: gal ir panasu, bet su train set R2=0.51
  109.  
  110.  
  111. #CA13:
  112. #resultRDCV.CA13 <- genAlg(matavimai$CA13[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
  113. #subsets(resultRDCV.CA13, 1:5)
  114.  
  115. qsar_5_train<-lm(matavimai$CA13[train] ~ deskriptoriai[train, "DISPe"] + deskriptoriai[train, "Mor04m"] + deskriptoriai[train, "R6e"])
  116. print(summary(qsar_5_train))
  117. qsar_5_test_pred_values<-coef(qsar_5_train)[1] + coef(qsar_5_train)[2]*deskriptoriai[test, "DISPe"] + coef(qsar_5_train)[3]*deskriptoriai[test, "Mor04m"] + coef(qsar_5_train)[4]*deskriptoriai[test, "R6e"]
  118. qsar_5_test<-lm(qsar_5_test_pred_values ~ matavimai$CA13[test])
  119. print(summary(qsar_5_test))
  120. x<-cbind(deskriptoriai[train, "DISPe"], deskriptoriai[train, "Mor04m"], deskriptoriai[train,"R6e"], matavimai$CA13[train])
  121. colnames(x)<-c('x1', 'x2', 'x3', 'y')
  122. qsar_5_q2<-cvq2(x)
  123. print(qsar_5_q2)
  124. print(q2test(matavimai$CA13[test], qsar_5_test_pred_values))
  125. #O.K.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement