Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(pROC)
- ## Type 'citation("pROC")' for a citation.
- ##
- ## Attaching package: 'pROC'
- ## The following objects are masked from 'package:stats':
- ##
- ## cov, smooth, var
- library(readxl)
- library(openxlsx)
- library(gmodels)
- ##
- ## Attaching package: 'gmodels'
- ## The following object is masked from 'package:pROC':
- ##
- ## ci
- library(tidyr)
- library(ggplot2)
- today <- Sys.Date()
- modeldatabase <- paste0("C:/Users/SimonSantos/Documents/Integration/SHINY/CALCULODEPROBABILIDAD/FlexiCredit - Datos Modelo ", today, ".csv")
- data_flexi <- read.csv(modeldatabase)
- C#28
- data_flexi$Impago <- as.factor(data_flexi$Impago)
- data_flexi$Periodicidad <- as.factor(data_flexi$Periodicidad)
- data_flexi$Sexo <- as.factor(data_flexi$Sexo)
- data_flexi$Empresa_Pagadora <- as.factor(data_flexi$Empresa_Pagadora)
- data_flexi <- data_flexi[-which(data_flexi$Edad > 60),]
- summary(data_flexi)
- ## Numero_de_Contrato Periodicidad Edad Fecha_de_nacimiento
- ## Length:868 Catorcenal:131 Min. :18.37 Length:868
- ## Class :character Mensual : 1 1st Qu.:28.50 Class :character
- ## Mode :character Quincenal : 47 Median :35.53 Mode :character
- ## Semanal :689 Mean :36.18
- ## 3rd Qu.:43.23
- ## Max. :59.96
- ##
- ## Sexo Pago_Periodico Antiguedad Fecha_de_Ingreso
- ## Femenino :379 Min. : 54.89 Min. : 0.3915 Length:868
- ## Masculino:489 1st Qu.: 212.52 1st Qu.: 2.7981 Class :character
- ## Median : 274.47 Median : 5.6920 Mode :character
- ## Mean : 399.44 Mean : 7.8038
- ## 3rd Qu.: 467.99 3rd Qu.:11.0151
- ## Max. :3885.05 Max. :31.0856
- ##
- ## Fecha_de_Desembolso Total_de_Gastos Total_de_Ingresos Sueldo_Neto
- ## Length:868 Min. : 0 Min. : 4689 Min. : 870
- ## Class :character 1st Qu.: 3786 1st Qu.: 9430 1st Qu.: 4188
- ## Mode :character Median : 6123 Median :12080 Median : 6657
- ## Mean : 7034 Mean :14712 Mean : 7679
- ## 3rd Qu.: 8569 3rd Qu.:16793 3rd Qu.: 9441
- ## Max. :39442 Max. :72800 Max. :64580
- ##
- ## Financiamiento Numero_de_Periodos State Empresa_Pagadora
- ## Min. : 1763 Min. : 2.00 Length:868 Denso :498
- ## 1st Qu.: 5100 1st Qu.: 26.00 Class :character MINCO :196
- ## Median :10200 Median : 52.00 Mode :character Muebles Krill : 57
- ## Mean :11835 Mean : 54.67 La Tradicional: 53
- ## 3rd Qu.:15300 3rd Qu.: 78.00 Roan : 27
- ## Max. :88838 Max. :156.00 Accionistas : 19
- ## (Other) : 18
- ## Impago PeriodicidadDos DTI
- ## 0:687 Min. :0.08333 Min. :0.0000
- ## 1:181 1st Qu.:0.75833 1st Qu.:0.3212
- ## Median :1.01111 Median :0.4820
- ## Mean :1.19214 Mean :0.4729
- ## 3rd Qu.:1.51667 3rd Qu.:0.6335
- ## Max. :5.05556 Max. :0.9366
- ##
- order <- c('Semanal','Catorcenal','Quincenal','Mensual')
- data_flexi$Periodicidad <- factor(data_flexi$Periodicidad, levels = order)
- levels(data_flexi$Periodicidad)
- ## [1] "Semanal" "Catorcenal" "Quincenal" "Mensual"
- set.seed(450)
- index_train <- cbind(runif(1 : nrow(data_flexi), 0 , 1),c(1 : nrow(data_flexi)))
- index_train <- order(index_train[, 1])
- index_train <- index_train[1: (2/3 * nrow(data_flexi))]
- training_set <- data_flexi[index_train, ]
- test_set <- data_flexi[-index_train, ]
- logi_full <- glm(Impago ~ PeriodicidadDos + Empresa_Pagadora + Edad + log(Antiguedad) + Sueldo_Neto, family = "binomial",
- data = training_set)
- pred_logi_full <- predict(logi_full, newdata = test_set,
- type = "response")
- summary(logi_full)
- ##
- ## Call:
- ## glm(formula = Impago ~ PeriodicidadDos + Empresa_Pagadora + Edad +
- ## log(Antiguedad) + Sueldo_Neto, family = "binomial", data = training_set)
- ##
- ## Coefficients:
- ## Estimate Std. Error z value Pr(>|z|)
- ## (Intercept) -1.409e+01 5.739e+02 -0.025 0.980417
- ## PeriodicidadDos 1.379e+00 1.945e-01 7.089 1.35e-12 ***
- ## Empresa_PagadoraDenso 1.359e+01 5.739e+02 0.024 0.981106
- ## Empresa_PagadoraLa Tradicional 1.466e+01 5.739e+02 0.026 0.979624
- ## Empresa_PagadoraMINCO 1.379e+01 5.739e+02 0.024 0.980829
- ## Empresa_PagadoraMuebles Krill 1.523e+01 5.739e+02 0.027 0.978823
- ## Empresa_PagadoraOtros 1.531e+01 5.739e+02 0.027 0.978720
- ## Empresa_PagadoraRoan 1.390e+01 5.739e+02 0.024 0.980674
- ## Empresa_PagadoraSeguridad MG 1.469e+01 5.739e+02 0.026 0.979574
- ## Edad -3.916e-02 1.409e-02 -2.778 0.005461 **
- ## log(Antiguedad) -6.421e-01 1.714e-01 -3.747 0.000179 ***
- ## Sueldo_Neto -7.813e-05 3.739e-05 -2.089 0.036681 *
- ## ---
- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- ##
- ## (Dispersion parameter for binomial family taken to be 1)
- ##
- ## Null deviance: 595.77 on 577 degrees of freedom
- ## Residual deviance: 479.51 on 566 degrees of freedom
- ## AIC: 503.51
- ##
- ## Number of Fisher Scoring iterations: 15
- range(pred_logi_full)
- ## [1] 2.706328e-10 8.020608e-01
- logi_full$aic
- ## [1] 503.5134
- pred_logi <- data.frame(cbind(pred_logi_full))
- pred_logi <- gather(pred_logi, key = "model", value = "pred")
- ggplot(pred_logi[pred_logi$model == "pred_logi_full", ],
- aes(x = pred, fill = model)) +
- geom_density(alpha = 0.4) +
- labs(y = "Density", x = "Default prediction") +
- theme(legend.position = "bottom", legend.title = element_blank())
- cutoff <- quantile(pred_logi_full, .75)
- pred_full_20 <- ifelse(pred_logi_full > cutoff, 1, 0)
- real_pred_20 <- cbind.data.frame(test_set$Impago, pred_full_20,
- "Did the model succeed?" = test_set$Impago==pred_full_20)
- accepted_loans <- real_pred_20[pred_full_20 == 0, 1]
- bad_rate <- sum(accepted_loans == 1)/length(accepted_loans)
- bad_rate
- ## [1] 0.1013825
- bank <- function(prob_of_def){
- cutoff <- rep(NA, 21)
- bad_rate <- rep(NA, 21)
- accept_rate <- seq(1, 0, by = -0.05)
- for (i in 1:21){
- cutoff[i] <- quantile(prob_of_def, accept_rate[i])
- pred_i <- ifelse(prob_of_def > cutoff[i], 1, 0)
- pred_as_good <- test_set$Impago[pred_i == 0]
- bad_rate[i] <- sum(pred_as_good == 1)/length(pred_as_good)}
- table <- cbind(accept_rate, cutoff = round(cutoff, 4),
- bad_rate = round(bad_rate, 4))
- return(list(table = table, bad_rate = bad_rate,
- accept_rate = accept_rate, cutoff = cutoff))
- }
- bank_logi_full <- bank((pred_logi_full))
- data.frame(accept_rate = bank_logi_full$accept_rate,
- "Good_model_bad_rate)" = bank_logi_full$bad_rate)
- ## accept_rate Good_model_bad_rate.
- ## 1 1.00 0.20344828
- ## 2 0.95 0.18545455
- ## 3 0.90 0.16091954
- ## 4 0.85 0.14634146
- ## 5 0.80 0.13362069
- ## 6 0.75 0.10138249
- ## 7 0.70 0.09359606
- ## 8 0.65 0.09574468
- ## 9 0.60 0.09195402
- ## 10 0.55 0.06918239
- ## 11 0.50 0.06896552
- ## 12 0.45 0.06870229
- ## 13 0.40 0.06896552
- ## 14 0.35 0.03921569
- ## 15 0.30 0.03448276
- ## 16 0.25 0.04109589
- ## 17 0.20 0.03448276
- ## 18 0.15 0.02272727
- ## 19 0.10 0.03448276
- ## 20 0.05 0.06666667
- ## 21 0.00 0.00000000
- ROC_logi_full <- roc(test_set$Impago, pred_logi_full)
- ## Setting levels: control = 0, case = 1
- ## Setting direction: controls < cases
- # Draw the ROCs on one plot
- plot(ROC_logi_full, col = "red")
- auc(ROC_logi_full)
- ## Area under the curve: 0.7911
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement