#setwd(path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#clear the environment
rm(list=ls())


# install and load the relevant packages 
# packages used
listofpackages <- c("lrmest", "dplyr","assertthat","bindrcpp","glue","pkgconfig","utf8","cli","ellipse","reshape2","ggplot2","dygraphs","aod")

for (j in listofpackages){
  if(sum(installed.packages()[, 1] == j) == 0) {
    install.packages(j)
  }
  library(j, character.only = T)
}

NBAdata=read.csv("../data_L_2020/Teams_overall2025.csv", header = T, stringsAsFactors = F, sep = ",")
head(NBAdata)
typeof(NBAdata)#to check the type of data
#remember that season 2004/2005 is coded as 2005 and Oklahoma City Thunder were Seattle Supersonics until 2007



#DATA TRANSFORMATION
NBAdata$FGMISS=NBAdata$FGA-NBAdata$FG
NBAdata$FTMISS=NBAdata$FTA-NBAdata$FT
NBAdata$OFGMISS=NBAdata$OFGA-NBAdata$OFG
NBAdata$OFTMISS=NBAdata$OFTA-NBAdata$OFT
NBAdata$MISS=NBAdata$FGMISS+NBAdata$FTMISS
NBAdata$OMISS=NBAdata$OFGMISS+NBAdata$OFTMISS
NBAdata$W.=NBAdata$W/(NBAdata$W+NBAdata$L)



#NBA EFFICIENCY MEASURES 
#PIR
NBAdata$PIR_eff=NBAdata$PTS+NBAdata$TRB+NBAdata$STL+NBAdata$BLK-NBAdata$MISS-NBAdata$TOV
NBAdata$PIR_eff_o=NBAdata$OPTS+NBAdata$OTRB+NBAdata$OSTL+NBAdata$OBLK-NBAdata$OMISS-NBAdata$OTOV
NBAdata$PIR_d=NBAdata$PIR_eff-NBAdata$PIR_eff_o

#WINSCORE
NBAdata$empl_poss=NBAdata$FGA + 0.44*NBAdata$FTA + NBAdata$TOV - NBAdata$ORB
NBAdata$ptsxgame=NBAdata$PTS/NBAdata$G
NBAdata$ptsxposs=NBAdata$PTS/NBAdata$empl_poss
NBAdata$FGAD=NBAdata$FGA-NBAdata$OFG-NBAdata$OTOV-NBAdata$TRB+NBAdata$TOV
NBAdata$TEAM_R=NBAdata$FGAD-0.45*NBAdata$OFT+0.45*NBAdata$FTA
NBAdata$acq_poss=NBAdata$OTOV + NBAdata$DRB+NBAdata$TEAM_R+ NBAdata$OFG + 0.45*NBAdata$OFT
NBAdata$ptsaxgame=NBAdata$OPTS/NBAdata$G
NBAdata$ptsall_poss=NBAdata$OPTS/NBAdata$acq_poss
NBAdata$WS_d=(NBAdata$ptsxposs-NBAdata$ptsall_poss)

#FOUR FACTORS 
NBAdata$F1=NBAdata$X1_OF-NBAdata$X1_DEF
NBAdata$F2=0.01*(NBAdata$X2_OF-NBAdata$X2_DEF)
NBAdata$F3=0.01*(NBAdata$X3_OF+NBAdata$X3_DEF)-1
NBAdata$F3_W=0.01*(NBAdata$X3_OF-NBAdata$X3_DEF)
NBAdata$F4=NBAdata$X4_OF-NBAdata$X4_DEF

# MODEL ESTIMATION measuring the impact of defensive and offensive efficiency 
regeff = subset(NBAdata,(Season>1993 & Season<2006 & Season!=1999))
regeff$Season_dum=as.character(regeff$Season)
plot(x = regeff$Season, y = regeff$W, main = "WINS", ylab = "Wins of each team",ylim = c(0,85), xlab = "Seasons",col = "red")

datacor00 = subset(regeff,select=c(W,PIR_d,WS_d))
datacor00 = na.omit(datacor00)
cor.datacor = cor(datacor00, use="complete.obs")
cor.datacor

# nice graphics presentation of correlations needs package ellipse 
ord = order(cor.datacor[1,])
ordered.cor.datacor = cor.datacor[ord, ord]
plotcorr(ordered.cor.datacor, col=cm.colors(11)[5*ordered.cor.datacor + 6])

# even nicer correlation heatmap  needs packages reshape2 ggplot2
cormat = round(cor(datacor00),2)
head(cormat)
melted_cormat = melt(cormat)
head(melted_cormat)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile()
# Get lower triangle of the correlation matrix
get_lower_tri=function(cormat){
  cormat[upper.tri(cormat)] = NA
  return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri = function(cormat){
  cormat[lower.tri(cormat)] = NA
  return(cormat)
}
upper_tri = get_upper_tri(cormat)
upper_tri
# Melt the correlation matrix
melted_cormat = melt(upper_tri, na.rm = TRUE)
# Heatmap
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal()+ 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 12, hjust = 1))+
  coord_fixed()

reg_PIR = lm(regeff$W ~  regeff$PIR_d )
summary(reg_PIR)
regeff$fit_PIR <- fitted(reg_PIR)

# Create a plot of actual vs fitted values 
ggplot(regeff, aes(x = regeff$PIR_d)) +
  geom_point(aes(y = W), color = "blue", alpha = 0.6, size = 2, shape = 16) +  # Actual values
  geom_line(aes(y = fit_PIR), color = "red", size = 1) +                     # Fitted line
  labs(title = "Actual vs Fitted Values from Regression",
       x = "PIR_d",
       y = "W",
       caption = "Blue dots: actual values; Red line: fitted values") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))


ggplot(regeff, aes(x = PIR_d)) +
  # Actual values
  geom_point(aes(y = W), color = "blue", alpha = 0.6, size = 2) +
  
  # Highlight Boston Celtics with green points and labels
  geom_point(data = subset(regeff, Team == "Boston Celtics"),
             aes(y = W), color = "darkgreen", size = 3) +
  geom_text(data = subset(regeff, Team == "Boston Celtics"),
            aes(y = W, label = "BC"), color = "darkgreen", vjust = -1, size = 3) +
  
  # Fitted values line
  geom_line(aes(y = fit_PIR), color = "red", size = 1) +
  
  # Labels and theme
  labs(title = "Actual vs Fitted Values with Boston Celtics Highlighted",
       x = "PIR_d",
       y = "W",
       caption = "Blue dots: actual values | Red line: fitted values | Green points: Boston Celtics (BC)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

reg_4F=lm(regeff$W ~  regeff$F1+regeff$F2+regeff$F3+regeff$F4)
summary(reg_4F)
regeff$fit_4F <- fitted(reg_4F)

reg_WS=lm(regeff$W ~  regeff$WS_d  )
summary(reg_WS)
regeff$fit_WS <- fitted(reg_WS)

reg_enc1= lm(regeff$W ~  regeff$fit_PIR+regeff$fit_WS -1)
summary(reg_enc1)

reg_enc2= lm(regeff$W ~  regeff$fit_PIR+regeff$fit_4F -1)
summary(reg_enc2)

reg_enc3= lm(regeff$W ~  regeff$fit_WS+regeff$fit_4F -1)
summary(reg_enc3)

reg_check=lm(regeff$W ~ regeff$WS_d+regeff$Season_dum )
summary(reg_check)

#Check on WINSCORE MODEL
regeff1 = subset(regeff,((Season>2003 & Season<2005 )))
regeff1$W_scor = regeff1$X3P*0.066+regeff1$X2P*0.033+regeff1$FT*0.018 - (regeff1$FGA-regeff1$FG)*0.034 - (regeff1$FTA-regeff1$FT)*0.015
regeff1$W_pos_st = 0.034*(regeff1$ORB - regeff1$TOV + regeff1$DRB + regeff1$STL)
regeff1$W_pfblk = regeff1$BLK*0.021 - regeff1$PF*0.018
regeff1$W_all = regeff1$W_scor+regeff1$W_pos_st+regeff1$W_pfblk

ggplot(regeff1, aes(x = W_all)) +
  geom_point(aes(y = W), color = "blue", alpha = 0.6, size = 2, shape = 16) +  # Actual values
    labs(title = "Fitted vs Fitted Values from Regression",
       x = "W_allc",
       y = "W",
       caption = "Blue dots: actual values; Red line: fitted values") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

regeff1$OW_scor = regeff1$O3P*0.066+regeff1$O2P*0.033+regeff1$OFT*0.018 - (regeff1$OFGA-regeff1$OFG)*0.034 - (regeff1$OFTA-regeff1$OFT)*0.015
regeff1$OW_pos_st = 0.034*(regeff1$OORB - regeff1$OTOV + regeff1$ODRB + regeff1$OSTL)
regeff1$OW_pfblk = regeff1$OBLK*0.021 - regeff1$OPF*0.018
regeff1$OW_all = regeff1$OW_scor+regeff1$OW_pos_st+regeff1$OW_pfblk
regeff1$W_allc = regeff1$W_all -regeff1$OW_all+41

ggplot(regeff1, aes(x = W_allc)) +
  geom_point(aes(y = W), color = "blue", alpha = 0.6, size = 2, shape = 16) +  # Actual values
  labs(title = "Fitted vs Fitted Values from Regression",
       x = "W_allc",
       y = "W",
       caption = "Blue dots: actual values; Red line: fitted values") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(regeff1, aes(x = W_allc)) +
  # Actual values
  geom_point(aes(y = W), color = "blue", alpha = 0.6, size = 2) +
  
  # Highlight Los Angeles Lakers with green points and labels
  geom_point(data = subset(regeff1, Team == "Los Angeles Lakers"),
             aes(y = W), color = "darkgreen", size = 3) +
  geom_text(data = subset(regeff1, Team == "Los Angeles Lakers"),
            aes(y = W, label = "LAL"), color = "darkgreen", vjust = -1, size = 3) +
  
   # Labels and theme
  labs(title = "Actual vs Fitted Values with LAL Highlighted",
       x = "W_allc",
       y = "W",
       caption = "Blue dots: actual values | Red line: fitted values | Green points: Boston Celtics (BC)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))
