mmdata1 <- read.csv('/Users/dpwal/downloads/mmdata1.csv') 
#change this to your file location

Splitting the Data

set.seed(123) # For reproducibility
trainingIndex <- createDataPartition(mmdata1$Net.Rating, p = .8, 
                                      list = FALSE, 
                                      times = 1)
trainingData <- mmdata1[trainingIndex, ]
testingData <- mmdata1[-trainingIndex, ]

model <- lm(Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + Off.FT.Rank + Def.2PT.FG.Rank + Def.3PT.FG.Rank + Def.FT.Rank + Off.TO...Rank + Def.TO...Rank + RankTempo + Active.Coaching.Length.Index + RankOppBlockPct + RankBlockPct, 
            data = trainingData)

summary(model)
## 
## Call:
## lm(formula = Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + 
##     Off.FT.Rank + Def.2PT.FG.Rank + Def.3PT.FG.Rank + Def.FT.Rank + 
##     Off.TO...Rank + Def.TO...Rank + RankTempo + Active.Coaching.Length.Index + 
##     RankOppBlockPct + RankBlockPct, data = trainingData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.282  -6.014  -1.113   5.945  27.774 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.398960  14.758578   0.163   0.8710    
## Off.2PT.FG.Rank               0.050578   0.024435   2.070   0.0394 *  
## Off.3PT.FG.Rank               0.038685   0.028258   1.369   0.1721    
## Off.FT.Rank                   0.007103   0.015106   0.470   0.6386    
## Def.2PT.FG.Rank              -0.007586   0.017305  -0.438   0.6615    
## Def.3PT.FG.Rank               0.002431   0.018116   0.134   0.8933    
## Def.FT.Rank                   0.006204   0.013673   0.454   0.6504    
## Off.TO...Rank                -0.056667   0.005609 -10.104  < 2e-16 ***
## Def.TO...Rank                -0.013946   0.006111  -2.282   0.0232 *  
## RankTempo                    -0.002448   0.005442  -0.450   0.6531    
## Active.Coaching.Length.Index  0.193254   0.104687   1.846   0.0660 .  
## RankOppBlockPct              -0.009022   0.005852  -1.542   0.1243    
## RankBlockPct                 -0.034842   0.005720  -6.091 3.72e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.393 on 278 degrees of freedom
## Multiple R-squared:  0.409,  Adjusted R-squared:  0.3834 
## F-statistic: 16.03 on 12 and 278 DF,  p-value: < 2.2e-16

Model Evaluation

# Stepwise regression to optimize the model
model_stepwise <- step(model, direction="both")
## Start:  AIC=1316.34
## Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + Off.FT.Rank + 
##     Def.2PT.FG.Rank + Def.3PT.FG.Rank + Def.FT.Rank + Off.TO...Rank + 
##     Def.TO...Rank + RankTempo + Active.Coaching.Length.Index + 
##     RankOppBlockPct + RankBlockPct
## 
##                                Df Sum of Sq   RSS    AIC
## - Def.3PT.FG.Rank               1       1.6 24527 1314.3
## - Def.2PT.FG.Rank               1      17.0 24542 1314.5
## - RankTempo                     1      17.9 24543 1314.5
## - Def.FT.Rank                   1      18.2 24544 1314.5
## - Off.FT.Rank                   1      19.5 24545 1314.6
## - Off.3PT.FG.Rank               1     165.3 24691 1316.3
## <none>                                      24525 1316.3
## - RankOppBlockPct               1     209.7 24735 1316.8
## - Active.Coaching.Length.Index  1     300.6 24826 1317.9
## - Off.2PT.FG.Rank               1     378.0 24903 1318.8
## - Def.TO...Rank                 1     459.4 24985 1319.7
## - RankBlockPct                  1    3273.5 27799 1350.8
## - Off.TO...Rank                 1    9006.0 33531 1405.3
## 
## Step:  AIC=1314.35
## Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + Off.FT.Rank + 
##     Def.2PT.FG.Rank + Def.FT.Rank + Off.TO...Rank + Def.TO...Rank + 
##     RankTempo + Active.Coaching.Length.Index + RankOppBlockPct + 
##     RankBlockPct
## 
##                                Df Sum of Sq   RSS    AIC
## - RankTempo                     1      18.3 24545 1312.6
## - Off.FT.Rank                   1      19.6 24547 1312.6
## - Def.FT.Rank                   1      42.8 24570 1312.9
## - Off.3PT.FG.Rank               1     166.0 24693 1314.3
## <none>                                      24527 1314.3
## - RankOppBlockPct               1     209.2 24736 1314.8
## - Def.2PT.FG.Rank               1     264.2 24791 1315.5
## - Active.Coaching.Length.Index  1     304.9 24832 1316.0
## + Def.3PT.FG.Rank               1       1.6 24525 1316.3
## - Off.2PT.FG.Rank               1     379.5 24907 1316.8
## - Def.TO...Rank                 1     458.2 24985 1317.7
## - RankBlockPct                  1    3280.5 27808 1348.9
## - Off.TO...Rank                 1    9260.3 33787 1405.6
## 
## Step:  AIC=1312.57
## Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + Off.FT.Rank + 
##     Def.2PT.FG.Rank + Def.FT.Rank + Off.TO...Rank + Def.TO...Rank + 
##     Active.Coaching.Length.Index + RankOppBlockPct + RankBlockPct
## 
##                                Df Sum of Sq   RSS    AIC
## - Off.FT.Rank                   1      15.3 24561 1310.8
## - Def.FT.Rank                   1      42.7 24588 1311.1
## - Off.3PT.FG.Rank               1     159.1 24704 1312.5
## <none>                                      24545 1312.6
## - RankOppBlockPct               1     206.2 24752 1313.0
## - Def.2PT.FG.Rank               1     269.6 24815 1313.8
## - Active.Coaching.Length.Index  1     307.6 24853 1314.2
## + RankTempo                     1      18.3 24527 1314.3
## + Def.3PT.FG.Rank               1       2.0 24543 1314.5
## - Off.2PT.FG.Rank               1     367.0 24912 1314.9
## - Def.TO...Rank                 1     463.3 25009 1316.0
## - RankBlockPct                  1    3296.2 27842 1347.2
## - Off.TO...Rank                 1    9245.8 33791 1403.6
## 
## Step:  AIC=1310.75
## Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + Def.2PT.FG.Rank + 
##     Def.FT.Rank + Off.TO...Rank + Def.TO...Rank + Active.Coaching.Length.Index + 
##     RankOppBlockPct + RankBlockPct
## 
##                                Df Sum of Sq   RSS    AIC
## - Def.FT.Rank                   1      44.0 24605 1309.3
## <none>                                      24561 1310.8
## - RankOppBlockPct               1     214.1 24775 1311.3
## - Def.2PT.FG.Rank               1     272.7 24833 1312.0
## - Active.Coaching.Length.Index  1     309.3 24870 1312.4
## + Off.FT.Rank                   1      15.3 24545 1312.6
## + RankTempo                     1      13.9 24547 1312.6
## + Def.3PT.FG.Rank               1       2.1 24559 1312.7
## - Def.TO...Rank                 1     454.2 25015 1314.1
## - Off.3PT.FG.Rank               1     560.9 25122 1315.3
## - Off.2PT.FG.Rank               1    1280.6 25841 1323.5
## - RankBlockPct                  1    3341.7 27902 1345.9
## - Off.TO...Rank                 1    9344.2 33905 1402.6
## 
## Step:  AIC=1309.27
## Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + Def.2PT.FG.Rank + 
##     Off.TO...Rank + Def.TO...Rank + Active.Coaching.Length.Index + 
##     RankOppBlockPct + RankBlockPct
## 
##                                Df Sum of Sq   RSS    AIC
## <none>                                      24605 1309.3
## - RankOppBlockPct               1     219.8 24824 1309.9
## + Def.FT.Rank                   1      44.0 24561 1310.8
## + Def.3PT.FG.Rank               1      26.1 24579 1311.0
## - Def.2PT.FG.Rank               1     316.5 24921 1311.0
## + Off.FT.Rank                   1      16.6 24588 1311.1
## + RankTempo                     1      13.7 24591 1311.1
## - Active.Coaching.Length.Index  1     346.6 24951 1311.3
## - Def.TO...Rank                 1     414.4 25019 1312.1
## - Off.3PT.FG.Rank               1     518.6 25123 1313.3
## - Off.2PT.FG.Rank               1    1236.8 25841 1321.5
## - RankBlockPct                  1    3307.2 27912 1344.0
## - Off.TO...Rank                 1    9644.4 34249 1403.5
summary(model_stepwise)
## 
## Call:
## lm(formula = Net.Rating ~ Off.2PT.FG.Rank + Off.3PT.FG.Rank + 
##     Def.2PT.FG.Rank + Off.TO...Rank + Def.TO...Rank + Active.Coaching.Length.Index + 
##     RankOppBlockPct + RankBlockPct, data = trainingData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.9351  -6.0004  -0.8214   5.8979  27.9550 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.517290   4.176807   2.279 0.023438 *  
## Off.2PT.FG.Rank               0.038995   0.010357   3.765 0.000203 ***
## Off.3PT.FG.Rank               0.025279   0.010369   2.438 0.015391 *  
## Def.2PT.FG.Rank              -0.010560   0.005545  -1.905 0.057859 .  
## Off.TO...Rank                -0.057105   0.005432 -10.514  < 2e-16 ***
## Def.TO...Rank                -0.012107   0.005556  -2.179 0.030139 *  
## Active.Coaching.Length.Index  0.205199   0.102959   1.993 0.047222 *  
## RankOppBlockPct              -0.009211   0.005803  -1.587 0.113586    
## RankBlockPct                 -0.034422   0.005591  -6.157 2.55e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.341 on 282 degrees of freedom
## Multiple R-squared:  0.407,  Adjusted R-squared:  0.3902 
## F-statistic:  24.2 on 8 and 282 DF,  p-value: < 2.2e-16
# Predict on testing data
predictions <- predict(model_stepwise, newdata = testingData)

# Calculate and print RMSE
rmse <- sqrt(mean((predictions - testingData$Net.Rating)^2))
print(paste("RMSE:", rmse))
## [1] "RMSE: 9.62033884774095"
# Calculate and print R^2
r_squared <- summary(model_stepwise)$r.squared
print(paste("R-squared:", r_squared))
## [1] "R-squared: 0.407047857794528"

The stepwise selection process refined the model to focus on the variables most predictive of Net.Rating, yielding a model with Off.2PT.FG.Rank, Off.3PT.FG.Rank, Def.2PT.FG.Rank, Off.TO.Rank, Def.TO.Rank, Active.Coaching.Length.Index, RankOppBlockPct, and RankBlockPct. This refined model emphasizes the significance of both offensive and defensive metrics in determining a team’s net rating, highlighting the balance between scoring efficiency, turnover management, and blocking.

Visualize Results

Actual v. Predicted

ggplot() +
  geom_point(aes(x = testingData$Net.Rating, y = predictions), colour = "blue") +
  geom_line(aes(x = testingData$Net.Rating, y = testingData$Net.Rating), colour = "red") +
  labs(title = "Actual vs. Predicted Net.Rating", x = "Actual Net.Rating", y = "Predicted Net.Rating") +
  theme_minimal()

Coefficients Interpretation:

Offensive Metrics: Both Off.2PT.FG.Rank and Off.3PT.FG.Rank are significant, with positive coefficients indicating that teams better at shooting to have higher net ratings. The significant negative coefficient for Off.TO.Rank underscores the negative impact of turnovers on team performance.

Defensive Metrics: Def.TO.Rank has a significant negative relationship with Net.Rating, similar to offensive turnovers, indicating that forcing turnovers is beneficial. However, Def.2PT.FG.Rank, though included in the final model, shows a less significant negative impact, suggesting a nuanced role in performance.

RMSE & R-squared: Our model, optimized through stepwise regression, yields an RMSE of 9.62 and an R² of 0.407. The RMSE indicates our predictions typically vary from the actual net ratings by about 9.62 points, a measure of precision, whereas the R² value suggests that 40.7% of the variability in net ratings is explained by our selected variables. This reflects a moderate predictive ability, highlighting both the model’s strengths in capturing key factors affecting team performance. Essentially, our findings underscore the impact of specific offensive and defensive metrics on team net ratings.

Graphs of Our Most Significant Variables

# Off.2PT.FG.Rank vs. Net.Rating
ggplot(trainingData, aes(x = Off.2PT.FG.Rank, y = Net.Rating)) +
  geom_point() +
  geom_smooth(method = "lm", col = "blue") +
  labs(title = "Off.2PT.FG.Rank vs. Net.Rating", x = "Off.2PT.FG.Rank", y = "Net.Rating")
## `geom_smooth()` using formula = 'y ~ x'

# Off.TO.Rank vs. Net.Rating
ggplot(trainingData, aes(x = Off.TO...Rank, y = Net.Rating)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Off.TO.Rank vs. Net.Rating", x = "Off.TO.Rank", y = "Net.Rating")
## `geom_smooth()` using formula = 'y ~ x'

# RankBlockPct vs. Net.Rating
ggplot(trainingData, aes(x = RankBlockPct, y = Net.Rating)) +
  geom_point() +
  geom_smooth(method = "lm", col = "green") +
  labs(title = "RankBlockPct vs. Net.Rating", x = "RankBlockPct", y = "Net.Rating")
## `geom_smooth()` using formula = 'y ~ x'

# Active.Coaching.Length.Index vs. Net.Rating
ggplot(trainingData, aes(x = Active.Coaching.Length.Index, y = Net.Rating)) +
  geom_point() +
  geom_smooth(method = "lm", color = "purple") +
  labs(title = "Active.Coaching.Length.Index vs. Net.Rating", x = "Active Coaching Length Index", y = "Net.Rating") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'