Advertisement
ProzacR

QSAR su intrinciniais parametrais

Apr 5th, 2016
427
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 5.98 KB | None | 0 0
  1. #QSAR su intrinciniais parametrais
  2. #
  3. # VR
  4.  
  5. #pradiniai duomenys:
  6. deskriptoriai<-as.matrix(read.csv(file='sutvarkyti_descriptoriai.csv'))
  7. matavimai<-read.csv(file='Intinciniai_matavimai.csv')
  8.  
  9. #random numbers:
  10. # test <- sort(round(runif(8, 1, 28)))
  11. # test
  12. #test <- c(1, 9, 10, 13, 14, 19, 22, 26)
  13. #tada:
  14. #train <- c(2, 3, 4, 5, 6, 7, 8, 11, 12, 15, 16, 17, 18, 20, 21, 23, 24, 25, 27, 28)
  15.  
  16. # gal geriau tiesiog ismesti 3 nefuorintus
  17. # test <- sort(round(runif(5, 1, 25)))
  18. test <- c(8, 11, 13, 17, 23)
  19. train <- c(1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 24, 25)
  20.  
  21.  
  22. library(cvq2)
  23.  
  24. #Q2TEST f-ja kaip ir PHASE Q2 lygiai tokia pati
  25. q2test<-function(activity, predicted_activity) {
  26.                         prediction_error_sq<-(predicted_activity-activity)^2
  27.                 avg_activity<-mean(activity)
  28.                                 sigma_y_sq<-(activity-avg_activity)^2
  29.                                 q2test_val<-1-sum(prediction_error_sq)/sum(sigma_y_sq)
  30.                                                 return(q2test_val)
  31. }
  32.  
  33.  
  34.  
  35. #Genetic Algorithm (GA) for Variable Selection from High-Dimensional Data:
  36. library(gaselect)
  37. ctrl <- genAlgControl(populationSize = 5000, numGenerations = 750, minVariables = 3,
  38.                       maxVariables = 4, verbosity = 1)
  39. evaluatorRDCV <- evaluatorPLS(numReplications = 2, innerSegments = 5, outerSegments = 3,
  40.                               numThreads = 3)
  41.  
  42. #GintrCAI:
  43. #cia su greitesniu nustatymu buvo...:"WD.unity", "E1v","maxssCH2", "TDB5u"
  44. #resultRDCV.GintrCAI <- genAlg(matavimai$GintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
  45. #subsets(resultRDCV.GintrCAI, 1:5)
  46.  
  47. qsar_1_train<-lm(matavimai$GintrCAI[train] ~ deskriptoriai[train, "WD.unity"] + deskriptoriai[train, "E1v"] + deskriptoriai[train, "maxssCH2"] + deskriptoriai[train, "TDB5u"])
  48. print(summary(qsar_1_train))
  49. qsar_1_test_pred_values<-coef(qsar_1_train)[1] + coef(qsar_1_train)[2]*deskriptoriai[test, "WD.unity"] + coef(qsar_1_train)[3]*deskriptoriai[test, "E1v"] + coef(qsar_1_train)[4]*deskriptoriai[test, "maxssCH2"] + coef(qsar_1_train)[5]*deskriptoriai[test, "TDB5u"]
  50. qsar_1_test<-lm(qsar_1_test_pred_values ~ matavimai$GintrCAI[test])
  51. print(summary(qsar_1_test))
  52. x<-cbind(deskriptoriai[train, "WD.unity"], deskriptoriai[train, "E1v"], deskriptoriai[train,"maxssCH2"], deskriptoriai[train, "TDB5u"], matavimai$GintrCAI[train])
  53. colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
  54. qsar_1_q2<-cvq2(x)
  55. print(qsar_1_q2)
  56. print(q2test(matavimai$GintrCAI[test], qsar_1_test_pred_values))
  57.  
  58. #HintrCAI:
  59. #resultRDCV.HintrCAI <- genAlg(matavimai$HintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
  60. #subsets(resultRDCV.HintrCAI, 1:5)
  61.  
  62. qsar_2_train<-lm(matavimai$HintrCAI[train] ~ deskriptoriai[train, "Wlambda2.unity"] + deskriptoriai[train, "S9"] + deskriptoriai[train, "MATSm4"] + deskriptoriai[train, "MoRSEV15"])
  63. print(summary(qsar_2_train))
  64. qsar_2_test_pred_values<-coef(qsar_2_train)[1] + coef(qsar_2_train)[2]*deskriptoriai[test, "Wlambda2.unity"] + coef(qsar_2_train)[3]*deskriptoriai[test, "S9"] + coef(qsar_2_train)[4]*deskriptoriai[test, "MATSm4"] + coef(qsar_2_train)[5]*deskriptoriai[test, "MoRSEV15"]
  65. qsar_2_test<-lm(qsar_2_test_pred_values ~ matavimai$HintrCAI[test])
  66. print(summary(qsar_2_test))
  67. x<-cbind(deskriptoriai[train, "Wlambda2.unity"], deskriptoriai[train, "S9"], deskriptoriai[train,"MATSm4"], deskriptoriai[train, "MoRSEV15"], matavimai$HintrCAI[train])
  68. colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
  69. qsar_2_q2<-cvq2(x)
  70. print(qsar_2_q2)
  71. print(q2test(matavimai$HintrCAI[test], qsar_2_test_pred_values))
  72.  
  73. #TsintrCAI:
  74. #dar ilgesni skaiciavimai
  75. ctrl <- genAlgControl(populationSize = 50000, numGenerations = 750, minVariables = 3,
  76.                       maxVariables = 4, verbosity = 1)
  77.  
  78. #resultRDCV.TsintrCAI <- genAlg(matavimai$TsintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
  79. #subsets(resultRDCV.TsintrCAI, 1:5)
  80.  
  81. qsar_3_train<-lm(matavimai$TsintrCAI[train] ~ deskriptoriai[train, "MOMI.Z"] + deskriptoriai[train, "S9"] + deskriptoriai[train, "MoRSEV15"] + deskriptoriai[train, "hmin"])
  82. print(summary(qsar_3_train))
  83. qsar_3_test_pred_values<-coef(qsar_3_train)[1] + coef(qsar_3_train)[2]*deskriptoriai[test, "MOMI.Z"] + coef(qsar_3_train)[3]*deskriptoriai[test, "S9"] + coef(qsar_3_train)[4]*deskriptoriai[test, "MoRSEV15"] + coef(qsar_3_train)[5]*deskriptoriai[test, "hmin"]
  84. qsar_3_test<-lm(qsar_3_test_pred_values ~ matavimai$TsintrCAI[test])
  85. print(summary(qsar_3_test))
  86. x<-cbind(deskriptoriai[train, "MOMI.Z"], deskriptoriai[train, "S9"], deskriptoriai[train,"MoRSEV15"], deskriptoriai[train, "hmin"], matavimai$TsintrCAI[train])
  87. colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
  88. qsar_3_q2<-cvq2(x)
  89. print(qsar_3_q2)
  90. print(q2test(matavimai$TsintrCAI[test], qsar_3_test_pred_values))
  91.  
  92. #GintrCAII:
  93. #dar kitokie nustatymai:
  94. ctrl <- genAlgControl(populationSize = 50000, numGenerations = 1000, minVariables = 2, maxVariables = 3, verbosity = 1)
  95. #Tikriausiai, kad neoverfitint tai imti maziau deskriptoriu, jei jau su tiek iseina tai ok matyt.
  96. #resultRDCV.GintrCAII <- genAlg(matavimai$GintrCAII[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
  97. #subsets(resultRDCV.GintrCAII, 1:5)
  98.  
  99. qsar_4_train<-lm(matavimai$GintrCAII[train] ~ deskriptoriai[train, "RPCS"] + deskriptoriai[train, "nRotBt"] + deskriptoriai[train, "RDF55s"])
  100. print(summary(qsar_4_train))
  101. qsar_4_test_pred_values<-coef(qsar_4_train)[1] + coef(qsar_4_train)[2]*deskriptoriai[test, "RPCS"] + coef(qsar_4_train)[3]*deskriptoriai[test, "nRotBt"] + coef(qsar_4_train)[4]*deskriptoriai[test, "RDF55s"]
  102. qsar_4_test<-lm(qsar_4_test_pred_values ~ matavimai$GintrCAII[test])
  103. print(summary(qsar_4_test))
  104. x<-cbind(deskriptoriai[train, "RPCS"], deskriptoriai[train, "nRotBt"], deskriptoriai[train,"RDF55s"], matavimai$GintrCAII[train])
  105. colnames(x)<-c('x1', 'x2', 'x3', 'y')
  106. qsar_4_q2<-cvq2(x)
  107. print(qsar_4_q2)
  108. print(q2test(matavimai$GintrCAII[test], qsar_4_test_pred_values))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement