Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #QSAR su intrinciniais parametrais
- #
- # VR
- #pradiniai duomenys:
- deskriptoriai<-as.matrix(read.csv(file='sutvarkyti_descriptoriai.csv'))
- matavimai<-read.csv(file='Intinciniai_matavimai.csv')
- #random numbers:
- # test <- sort(round(runif(8, 1, 28)))
- # test
- #test <- c(1, 9, 10, 13, 14, 19, 22, 26)
- #tada:
- #train <- c(2, 3, 4, 5, 6, 7, 8, 11, 12, 15, 16, 17, 18, 20, 21, 23, 24, 25, 27, 28)
- # gal geriau tiesiog ismesti 3 nefuorintus
- # test <- sort(round(runif(5, 1, 25)))
- test <- c(8, 11, 13, 17, 23)
- train <- c(1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 24, 25)
- library(cvq2)
- #Q2TEST f-ja kaip ir PHASE Q2 lygiai tokia pati
- q2test<-function(activity, predicted_activity) {
- prediction_error_sq<-(predicted_activity-activity)^2
- avg_activity<-mean(activity)
- sigma_y_sq<-(activity-avg_activity)^2
- q2test_val<-1-sum(prediction_error_sq)/sum(sigma_y_sq)
- return(q2test_val)
- }
- #Genetic Algorithm (GA) for Variable Selection from High-Dimensional Data:
- library(gaselect)
- ctrl <- genAlgControl(populationSize = 5000, numGenerations = 750, minVariables = 3,
- maxVariables = 4, verbosity = 1)
- evaluatorRDCV <- evaluatorPLS(numReplications = 2, innerSegments = 5, outerSegments = 3,
- numThreads = 3)
- #GintrCAI:
- #cia su greitesniu nustatymu buvo...:"WD.unity", "E1v","maxssCH2", "TDB5u"
- #resultRDCV.GintrCAI <- genAlg(matavimai$GintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
- #subsets(resultRDCV.GintrCAI, 1:5)
- qsar_1_train<-lm(matavimai$GintrCAI[train] ~ deskriptoriai[train, "WD.unity"] + deskriptoriai[train, "E1v"] + deskriptoriai[train, "maxssCH2"] + deskriptoriai[train, "TDB5u"])
- print(summary(qsar_1_train))
- qsar_1_test_pred_values<-coef(qsar_1_train)[1] + coef(qsar_1_train)[2]*deskriptoriai[test, "WD.unity"] + coef(qsar_1_train)[3]*deskriptoriai[test, "E1v"] + coef(qsar_1_train)[4]*deskriptoriai[test, "maxssCH2"] + coef(qsar_1_train)[5]*deskriptoriai[test, "TDB5u"]
- qsar_1_test<-lm(qsar_1_test_pred_values ~ matavimai$GintrCAI[test])
- print(summary(qsar_1_test))
- x<-cbind(deskriptoriai[train, "WD.unity"], deskriptoriai[train, "E1v"], deskriptoriai[train,"maxssCH2"], deskriptoriai[train, "TDB5u"], matavimai$GintrCAI[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
- qsar_1_q2<-cvq2(x)
- print(qsar_1_q2)
- print(q2test(matavimai$GintrCAI[test], qsar_1_test_pred_values))
- #HintrCAI:
- #resultRDCV.HintrCAI <- genAlg(matavimai$HintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
- #subsets(resultRDCV.HintrCAI, 1:5)
- qsar_2_train<-lm(matavimai$HintrCAI[train] ~ deskriptoriai[train, "Wlambda2.unity"] + deskriptoriai[train, "S9"] + deskriptoriai[train, "MATSm4"] + deskriptoriai[train, "MoRSEV15"])
- print(summary(qsar_2_train))
- qsar_2_test_pred_values<-coef(qsar_2_train)[1] + coef(qsar_2_train)[2]*deskriptoriai[test, "Wlambda2.unity"] + coef(qsar_2_train)[3]*deskriptoriai[test, "S9"] + coef(qsar_2_train)[4]*deskriptoriai[test, "MATSm4"] + coef(qsar_2_train)[5]*deskriptoriai[test, "MoRSEV15"]
- qsar_2_test<-lm(qsar_2_test_pred_values ~ matavimai$HintrCAI[test])
- print(summary(qsar_2_test))
- x<-cbind(deskriptoriai[train, "Wlambda2.unity"], deskriptoriai[train, "S9"], deskriptoriai[train,"MATSm4"], deskriptoriai[train, "MoRSEV15"], matavimai$HintrCAI[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
- qsar_2_q2<-cvq2(x)
- print(qsar_2_q2)
- print(q2test(matavimai$HintrCAI[test], qsar_2_test_pred_values))
- #TsintrCAI:
- #dar ilgesni skaiciavimai
- ctrl <- genAlgControl(populationSize = 50000, numGenerations = 750, minVariables = 3,
- maxVariables = 4, verbosity = 1)
- #resultRDCV.TsintrCAI <- genAlg(matavimai$TsintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
- #subsets(resultRDCV.TsintrCAI, 1:5)
- qsar_3_train<-lm(matavimai$TsintrCAI[train] ~ deskriptoriai[train, "MOMI.Z"] + deskriptoriai[train, "S9"] + deskriptoriai[train, "MoRSEV15"] + deskriptoriai[train, "hmin"])
- print(summary(qsar_3_train))
- qsar_3_test_pred_values<-coef(qsar_3_train)[1] + coef(qsar_3_train)[2]*deskriptoriai[test, "MOMI.Z"] + coef(qsar_3_train)[3]*deskriptoriai[test, "S9"] + coef(qsar_3_train)[4]*deskriptoriai[test, "MoRSEV15"] + coef(qsar_3_train)[5]*deskriptoriai[test, "hmin"]
- qsar_3_test<-lm(qsar_3_test_pred_values ~ matavimai$TsintrCAI[test])
- print(summary(qsar_3_test))
- x<-cbind(deskriptoriai[train, "MOMI.Z"], deskriptoriai[train, "S9"], deskriptoriai[train,"MoRSEV15"], deskriptoriai[train, "hmin"], matavimai$TsintrCAI[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
- qsar_3_q2<-cvq2(x)
- print(qsar_3_q2)
- print(q2test(matavimai$TsintrCAI[test], qsar_3_test_pred_values))
- #GintrCAII:
- #dar kitokie nustatymai:
- ctrl <- genAlgControl(populationSize = 50000, numGenerations = 1000, minVariables = 2, maxVariables = 3, verbosity = 1)
- #Tikriausiai, kad neoverfitint tai imti maziau deskriptoriu, jei jau su tiek iseina tai ok matyt.
- #resultRDCV.GintrCAII <- genAlg(matavimai$GintrCAII[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
- #subsets(resultRDCV.GintrCAII, 1:5)
- qsar_4_train<-lm(matavimai$GintrCAII[train] ~ deskriptoriai[train, "RPCS"] + deskriptoriai[train, "nRotBt"] + deskriptoriai[train, "RDF55s"])
- print(summary(qsar_4_train))
- qsar_4_test_pred_values<-coef(qsar_4_train)[1] + coef(qsar_4_train)[2]*deskriptoriai[test, "RPCS"] + coef(qsar_4_train)[3]*deskriptoriai[test, "nRotBt"] + coef(qsar_4_train)[4]*deskriptoriai[test, "RDF55s"]
- qsar_4_test<-lm(qsar_4_test_pred_values ~ matavimai$GintrCAII[test])
- print(summary(qsar_4_test))
- x<-cbind(deskriptoriai[train, "RPCS"], deskriptoriai[train, "nRotBt"], deskriptoriai[train,"RDF55s"], matavimai$GintrCAII[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'y')
- qsar_4_q2<-cvq2(x)
- print(qsar_4_q2)
- print(q2test(matavimai$GintrCAII[test], qsar_4_test_pred_values))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement