Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #QSAR su intrinciniais parametrais
- #
- # VR
- #pradiniai duomenys:
- deskriptoriai<-as.matrix(read.csv(file='sutvarkyti_descriptoriai.csv'))
- matavimai<-read.csv(file='Intinciniai_2_pradinis_can_dedupe.csv')
- #random numbers:
- #test<-sort(sample(1:86, 16, replace=F))
- test <- c(5, 14, 16, 21, 26, 39, 46, 59, 60, 62, 64, 65, 75, 79, 82, 85)
- train <- c(1:86)
- #r-bloggers.com:
- outersect <- function(x, y) {
- sort(c(setdiff(x, y),
- setdiff(y, x)))
- }
- train<-outersect(test, train)
- library(cvq2)
- #Q2TEST f-ja kaip ir PHASE Q2 lygiai tokia pati
- q2test<-function(activity, predicted_activity) {
- prediction_error_sq<-(predicted_activity-activity)^2
- avg_activity<-mean(activity)
- sigma_y_sq<-(activity-avg_activity)^2
- q2test_val<-1-sum(prediction_error_sq)/sum(sigma_y_sq)
- return(q2test_val)
- }
- #Genetic Algorithm (GA) for Variable Selection from High-Dimensional Data:
- library(gaselect)
- ctrl <- genAlgControl(populationSize = 64000, numGenerations = 2000, minVariables = 3, maxVariables = 4, verbosity = 1)
- evaluatorRDCV <- evaluatorPLS(numReplications = 2, innerSegments = 5, outerSegments = 3,
- numThreads = 3)
- #pirma 2–3 deskriptoriai, paskui 3–4 t.t., kol Q2test>0.4
- #CA1:
- #resultRDCV.CA1 <- genAlg(matavimai$CA1[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
- #subsets(resultRDCV.CA1, 1:5)
- qsar_1_train<-lm(matavimai$CA1[train] ~ deskriptoriai[train, "SPAM"] + deskriptoriai[train, "E1m"] + deskriptoriai[train, "E2s"])
- print(summary(qsar_1_train))
- qsar_1_test_pred_values<-coef(qsar_1_train)[1] + coef(qsar_1_train)[2]*deskriptoriai[test, "SPAM"] + coef(qsar_1_train)[3]*deskriptoriai[test, "E1m"] + coef(qsar_1_train)[4]*deskriptoriai[test, "E2s"]
- qsar_1_test<-lm(qsar_1_test_pred_values ~ matavimai$CA1[test])
- print(summary(qsar_1_test))
- x<-cbind(deskriptoriai[train, "SPAM"], deskriptoriai[train, "E1m"], deskriptoriai[train,"E2s"], matavimai$CA1[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'y')
- qsar_1_q2<-cvq2(x)
- print(qsar_1_q2)
- print(q2test(matavimai$CA1[test], qsar_1_test_pred_values))
- #O.K.
- #dabar:
- #CA2:
- #resultRDCV.CA2 <- genAlg(matavimai$CA2[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 777)
- #subsets(resultRDCV.CA2, 1:5)
- qsar_2_train<-lm(matavimai$CA2[train] ~ deskriptoriai[train, "PCR"] + deskriptoriai[train, "MATS1p"] + deskriptoriai[train, "DISPe"] + deskriptoriai[train, "R6e"])
- print(summary(qsar_2_train))
- qsar_2_test_pred_values<-coef(qsar_2_train)[1] + coef(qsar_2_train)[2]*deskriptoriai[test, "PCR"] + coef(qsar_2_train)[3]*deskriptoriai[test, "MATS1p"] + coef(qsar_2_train)[4]*deskriptoriai[test, "DISPe"] + coef(qsar_2_train)[5]*deskriptoriai[test, "R6e"]
- qsar_2_test<-lm(qsar_2_test_pred_values ~ matavimai$CA2[test])
- print(summary(qsar_2_test))
- x<-cbind(deskriptoriai[train, "PCR"], deskriptoriai[train, "MATS1p"], deskriptoriai[train,"DISPe"], deskriptoriai[train,"R6e"], matavimai$CA2[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
- qsar_2_q2<-cvq2(x)
- print(qsar_2_q2)
- print(q2test(matavimai$CA2[test], qsar_2_test_pred_values))
- #3 deskr.: ir su train set nlb. R2=0.66-0.64
- #4 deskr.: O.K.
- #CA7:
- #resultRDCV.CA7 <- genAlg(matavimai$CA7[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 777)
- #subsets(resultRDCV.CA7, 1:5)
- qsar_3_train<-lm(matavimai$CA7[train] ~ deskriptoriai[train, "PW4"] + deskriptoriai[train, "R3v"] + deskriptoriai[train, "R8v."] + deskriptoriai[train, "ALOGP2"])
- print(summary(qsar_3_train))
- qsar_3_test_pred_values<-coef(qsar_3_train)[1] + coef(qsar_3_train)[2]*deskriptoriai[test, "PW4"] + coef(qsar_3_train)[3]*deskriptoriai[test, "R3v"] + coef(qsar_3_train)[4]*deskriptoriai[test, "R8v."] + coef(qsar_3_train)[5]*deskriptoriai[test, "ALOGP2"]
- qsar_3_test<-lm(qsar_3_test_pred_values ~ matavimai$CA7[test])
- print(summary(qsar_3_test))
- x<-cbind(deskriptoriai[train, "PW4"], deskriptoriai[train, "R3v"], deskriptoriai[train,"R8v."], deskriptoriai[train,"ALOGP2"], matavimai$CA7[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
- qsar_3_q2<-cvq2(x)
- print(qsar_3_q2)
- print(q2test(matavimai$CA7[test], qsar_3_test_pred_values))
- #3 deskr.: ir su train set nlb. R2=0.66
- #4 deskr.: nope
- #CA12:
- #resultRDCV.CA12 <- genAlg(matavimai$CA12[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 777)
- #subsets(resultRDCV.CA12, 1:5)
- qsar_4_train<-lm(matavimai$CA12[train] ~ deskriptoriai[train, "RDF080m"] + deskriptoriai[train, "RDF135v"] + deskriptoriai[train, "HATS6u"] + deskriptoriai[train, "HATS6"])
- print(summary(qsar_4_train))
- qsar_4_test_pred_values<-coef(qsar_4_train)[1] + coef(qsar_4_train)[2]*deskriptoriai[test, "RDF080m"] + coef(qsar_4_train)[3]*deskriptoriai[test, "RDF135v"] + coef(qsar_4_train)[4]*deskriptoriai[test, "HATS6u"] + coef(qsar_4_train)[5]*deskriptoriai[test, "HATS6"]
- qsar_4_test<-lm(qsar_4_test_pred_values ~ matavimai$CA12[test])
- print(summary(qsar_4_test))
- x<-cbind(deskriptoriai[train, "RDF080m"], deskriptoriai[train, "RDF135v"], deskriptoriai[train,"HATS6u"], deskriptoriai[train,"HATS6"], matavimai$CA12[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y')
- qsar_4_q2<-cvq2(x)
- print(qsar_4_q2)
- print(q2test(matavimai$CA12[test], qsar_4_test_pred_values))
- #3 deskr.: gal ir panasu, bet su train set R2=0.51
- #CA13:
- #resultRDCV.CA13 <- genAlg(matavimai$CA13[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123)
- #subsets(resultRDCV.CA13, 1:5)
- qsar_5_train<-lm(matavimai$CA13[train] ~ deskriptoriai[train, "DISPe"] + deskriptoriai[train, "Mor04m"] + deskriptoriai[train, "R6e"])
- print(summary(qsar_5_train))
- qsar_5_test_pred_values<-coef(qsar_5_train)[1] + coef(qsar_5_train)[2]*deskriptoriai[test, "DISPe"] + coef(qsar_5_train)[3]*deskriptoriai[test, "Mor04m"] + coef(qsar_5_train)[4]*deskriptoriai[test, "R6e"]
- qsar_5_test<-lm(qsar_5_test_pred_values ~ matavimai$CA13[test])
- print(summary(qsar_5_test))
- x<-cbind(deskriptoriai[train, "DISPe"], deskriptoriai[train, "Mor04m"], deskriptoriai[train,"R6e"], matavimai$CA13[train])
- colnames(x)<-c('x1', 'x2', 'x3', 'y')
- qsar_5_q2<-cvq2(x)
- print(qsar_5_q2)
- print(q2test(matavimai$CA13[test], qsar_5_test_pred_values))
- #O.K.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement