Advertisement
897bhgy

Untitled

Jul 14th, 2023
533
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.06 KB | Source Code | 0 0
  1. # Load necessary library
  2. library(ggplot2)
  3. library(corrplot)
  4. library(car)  # for vif() function
  5.  
  6. # mtcars is a built-in dataset in R. Let's see what it looks like.
  7. head(mtcars)
  8.  
  9. # Calculate correlation matrix
  10. corr_matrix <- cor(mtcars)
  11.  
  12. # Print correlation matrix
  13. print(corr_matrix)
  14.  
  15. # Create a correlation plot using corrplot()
  16. corrplot(corr_matrix, method = "circle")
  17.  
  18. # You can also create a correlation heatmap with ggplot2
  19. corr_data <- reshape2::melt(corr_matrix)
  20. names(corr_data) <- c("Variable 1", "Variable 2", "Correlation")
  21. ggplot(corr_data, aes('Variable 1', 'Variable 2', fill = Correlation)) +
  22.   geom_tile() +
  23.   scale_fill_gradient2(low = "blue", high = "red", mid = "white",
  24.                        midpoint = 0, limit = c(-1,1), space = "Lab",
  25.                        name="Pearson\nCorrelation") +
  26.   theme_minimal() +
  27.   theme(axis.text.x = element_text(angle = 45, vjust = 1,
  28.                                    size = 12, hjust = 1)) +
  29.   coord_fixed()
  30.  
  31. # Fit a multiple linear regression model to the data using "lm" function
  32. # We will try to predict "mpg" (Miles/(US) gallon) using "hp" (Gross horsepower) and "wt" (Weight (1000 lbs))
  33. model <- lm(mpg ~ hp + wt, data = mtcars)
  34.  
  35. # Print a summary of the model
  36. summary(model)
  37.  
  38. # The summary includes coefficients for each predictor variable (Intercept, hp, and wt), and their significance levels.
  39. # For instance, the p-value associated with the hp and wt variable tells us whether that variable is a significant predictor
  40. # of mpg after accounting for the other variables in the model.
  41.  
  42. # Check multicollinearity using VIF
  43. vif_values <- vif(model)
  44.  
  45. # Print VIF values
  46. print(vif_values)
  47.  
  48. # If VIF values are high (>5 or >10 typically), then there's multicollinearity.
  49. # You may want to remove one of the predictors or use regularization techniques to handle it.
  50.  
  51. # For instance, let's assume that the VIF for "hp" was high.
  52. # We would fit the model without "hp"
  53. model2 <- lm(mpg ~ wt, data = mtcars)
  54.  
  55. # Checking the VIF for the new model
  56. print(vif(model2))
  57.  
  58. # We can generate predictions from our previous model
  59. mtcars$predicted_mpg <- predict(model, mtcars)
  60.  
  61. # Create a scatter plot of actual vs predicted values
  62. ggplot(mtcars, aes(x = mpg, y = predicted_mpg)) +
  63.   geom_point() +
  64.   geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
  65.   labs(title = "Actual vs Predicted MPG",
  66.        x = "Actual MPG",
  67.        y = "Predicted MPG") +
  68.   theme_minimal()
  69.  
  70. # You can see the line of best fit in red, and the individual predictions as points.
  71. # The closer these points are to the line, the more accurate our predictions are.
  72.  
  73. # To check the assumptions of the linear regression model, we can look at the residuals.
  74. residuals <- residuals(model)
  75.  
  76. # Plot the residuals
  77. plot(residuals, main="Residuals of the Model", ylab="Residuals", xlab="Index")
  78. abline(h=0, col="red")
  79.  
  80. # In this plot, residuals are plotted against the index of observations.
  81. # Ideally, we want to see residuals scattered randomly around zero, which would suggest that our model's assumptions are met.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement