Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Load necessary library
- library(ggplot2)
- library(corrplot)
- library(car) # for vif() function
- # mtcars is a built-in dataset in R. Let's see what it looks like.
- head(mtcars)
- # Calculate correlation matrix
- corr_matrix <- cor(mtcars)
- # Print correlation matrix
- print(corr_matrix)
- # Create a correlation plot using corrplot()
- corrplot(corr_matrix, method = "circle")
- # You can also create a correlation heatmap with ggplot2
- corr_data <- reshape2::melt(corr_matrix)
- names(corr_data) <- c("Variable 1", "Variable 2", "Correlation")
- ggplot(corr_data, aes('Variable 1', 'Variable 2', fill = Correlation)) +
- geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white",
- midpoint = 0, limit = c(-1,1), space = "Lab",
- name="Pearson\nCorrelation") +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, vjust = 1,
- size = 12, hjust = 1)) +
- coord_fixed()
- # Fit a multiple linear regression model to the data using "lm" function
- # We will try to predict "mpg" (Miles/(US) gallon) using "hp" (Gross horsepower) and "wt" (Weight (1000 lbs))
- model <- lm(mpg ~ hp + wt, data = mtcars)
- # Print a summary of the model
- summary(model)
- # The summary includes coefficients for each predictor variable (Intercept, hp, and wt), and their significance levels.
- # For instance, the p-value associated with the hp and wt variable tells us whether that variable is a significant predictor
- # of mpg after accounting for the other variables in the model.
- # Check multicollinearity using VIF
- vif_values <- vif(model)
- # Print VIF values
- print(vif_values)
- # If VIF values are high (>5 or >10 typically), then there's multicollinearity.
- # You may want to remove one of the predictors or use regularization techniques to handle it.
- # For instance, let's assume that the VIF for "hp" was high.
- # We would fit the model without "hp"
- model2 <- lm(mpg ~ wt, data = mtcars)
- # Checking the VIF for the new model
- print(vif(model2))
- # We can generate predictions from our previous model
- mtcars$predicted_mpg <- predict(model, mtcars)
- # Create a scatter plot of actual vs predicted values
- ggplot(mtcars, aes(x = mpg, y = predicted_mpg)) +
- geom_point() +
- geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
- labs(title = "Actual vs Predicted MPG",
- x = "Actual MPG",
- y = "Predicted MPG") +
- theme_minimal()
- # You can see the line of best fit in red, and the individual predictions as points.
- # The closer these points are to the line, the more accurate our predictions are.
- # To check the assumptions of the linear regression model, we can look at the residuals.
- residuals <- residuals(model)
- # Plot the residuals
- plot(residuals, main="Residuals of the Model", ylab="Residuals", xlab="Index")
- abline(h=0, col="red")
- # In this plot, residuals are plotted against the index of observations.
- # Ideally, we want to see residuals scattered randomly around zero, which would suggest that our model's assumptions are met.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement