Advertisement
897bhgy

Untitled

Jul 14th, 2023
553
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.85 KB | Source Code | 0 0
  1. library(corrplot)
  2. library(ggplot2)
  3. library(reshape2)
  4.  
  5.  
  6. # Set a seed for reproducibility of results
  7. set.seed(123)
  8.  
  9. # Simulate the sizes of houses and their prices
  10. size <- rnorm(100, mean = 1500, sd = 500) # Size of houses in square feet
  11. price <- 50000 + 100 * size + rnorm(100, mean = 0, sd = 50000) # Prices of houses. The '50000 + 100 * size' part simulates a linear relationship between size and price, and the 'rnorm()' part simulates random noise.
  12.  
  13. # Create a dataframe combining the sizes and corresponding prices
  14. df <- data.frame(size, price)
  15.  
  16. # Calculate and print correlation between size and price
  17. correlation <- cor(df$size, df$price)
  18. cat("Correlation between size and price: ", correlation, "\n")
  19.  
  20. # Generate a scatter plot
  21. plot(df$size, df$price, main = "Scatter plot of size vs price", xlab = "Size", ylab = "Price", pch = 19)
  22.  
  23. # Generate a correlation matrix
  24. corr_matrix <- cor(df)
  25. print(corr_matrix)
  26.  
  27. # Generate a heatmap of correlations
  28. corrplot(corr_matrix, method = "color", type = "upper", order = "hclust",
  29.          addCoef.col = "black", # Add correlation coefficients on the heatmap
  30.          tl.col = "black", # Text label color
  31.          tl.srt = 45) # Text label rotation
  32.  
  33. # We are going to use a linear regression model to analyze the relationship between house size (independent variable) and its price (dependent variable).
  34. # Null Hypothesis (H0): There is no linear relationship between house size and its price (the slope is zero).
  35. # Alternative Hypothesis (H1): There is a linear relationship between house size and its price (the slope is not zero).
  36.  
  37. # Fit a linear regression model to the data
  38. model <- lm(price ~ size, data = df) # 'price' is the dependent variable and 'size' is the independent variable
  39.  
  40. # Generate a scatter plot
  41. plot(df$size, df$price, main = "Scatter plot of size vs price", xlab = "Size", ylab = "Price", pch = 19)
  42. abline(model, col = "red") # Add regression line to the plot
  43.  
  44.  
  45. # Print a summary of the regression model, which includes the coefficients, the R-squared value, and the p-value
  46. summary(model)
  47.  
  48. # Interpretation Hints:
  49. # 1. Look at the 'Estimate' for 'size' in the 'Coefficients' table. This is the slope of the regression line, and it indicates the change in house price for each one-unit increase in house size. If the p-value associated with this estimate is less than 0.05, then the relationship is significant, and we can reject the null hypothesis.
  50. # 2. The '(Intercept)' in the 'Coefficients' table is the y-intercept of the regression line, which is the predicted price when the size is zero.
  51. # 3. The 'Residuals' section gives you information about the distribution of the residuals, which should ideally be normally distributed. Look for any large deviations in these values.
  52. # 4. The 'R-squared' value indicates the proportion of variance in the dependent variable that can be explained by the independent variable(s). The closer this value is to 1, the better the fit of the model.
  53.  
  54. # Predict values based on the model
  55. predicted_prices <- predict(model, df)
  56. print("Predicted prices based on the model:")
  57. print(predicted_prices)
  58.  
  59. # Generate predicted values
  60. predicted_prices <- predict(model, df)
  61.  
  62. # Create a data frame for plotting
  63. plot_df <- data.frame(Actual = df$price, Predicted = predicted_prices)
  64.  
  65. # Generate the plot of actual price v/s predicted price
  66. ggplot(data = plot_df, aes(x = Actual, y = Predicted)) +
  67.   geom_point() +
  68.   geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
  69.   labs(title = "Actual vs Predicted Prices",
  70.        x = "Actual Price",
  71.        y = "Predicted Price") +
  72.   theme_minimal()
  73.  
  74. # Show residuals of the model
  75. residuals <- resid(model)
  76. print("Residuals of the model:")
  77. print(residuals)
  78.  
  79. # Plot residuals to check if they're normally distributed
  80. hist(residuals, main = "Histogram of Residuals", xlab = "Residuals", col = "lightblue")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement