#setwd(path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#clear the environment
rm(list=ls())

# install and load the relevant packages 
# packages used
listofpackages <- c("lrmest", "dplyr","assertthat","bindrcpp","glue","pkgconfig","utf8","cli","ellipse","reshape2","ggplot2","dygraphs","aod")

for (j in listofpackages){
  if(sum(installed.packages()[, 1] == j) == 0) {
    install.packages(j)
  }
  library(j, character.only = T)
}


#Let's now work with a database considering stats about players in the season 2003-2004
Players = read.csv("../data_L_2020/Players03-04.csv", header = T, stringsAsFactors = F, sep = ";")
Players$MFG = Players$FGA - Players$FG
Players$MFT = Players$FTA - Players$FT

#PIR MEASURE OF EFFICIENCY 

Players$eff_NBA=Players$PTS+Players$TRB+Players$STL+Players$BLK-Players$MFG-Players$MFT-Players$TOV

#with the coefficients calculated with Table6_4 and the others estimated in our reference chapter, we want to estimate how many wins can be attributed to each player
#Step 1
Players$W_scor = Players$X3P*0.066+Players$X2P*0.033+Players$FT*0.018 - Players$MFG*0.034 - Players$MFT*0.015
Players$W_pos_st = 0.034*(Players$ORB - Players$TOV + Players$DRB + Players$STL)
Players$W_pfblk = Players$BLK*0.021 - Players$PF*0.018
Players$W_all = Players$W_scor+Players$W_pos_st+Players$W_pfblk

#here we insert other 2 variables to the database, as they will become useful later on
Players$MP. = Players$MP/(48*82)  #4032
Players$AVEPLW = Players$MP.*(0.5*82/5) 
Players$W_all_100=Players$W_all/(Players$MP.)


#replicate Table 6_6
TABLE_6.6.1=subset(Players,(Tm=="LAL" ),select=c(Player,MP,MP.,Tm,W_scor,W_pos_st,W_pfblk,W_all))
TABLE_6.6.2=subset(Players,(Tm=="MIA" ),select=c(Player,MP,MP.,Tm,W_scor,W_pos_st,W_pfblk,W_all))
table_6.6=rbind(TABLE_6.6.1, TABLE_6.6.2)
rank.table_6.6 = table_6.6[order(table_6.6$W_all, table_6.6$Tm, decreasing = TRUE),]


#Step 2, accounting for the position. Calculate means for every position for an average player always on court and consider the relative difference btw one player and the average one
#library(dplyr)
Players_pos = group_by(Players, Pos)
Wmeanxpos <- summarise(Players_pos, WTmeanxpos
                       = mean(W_all_100))
#then subtract the relevant adjustment to each players by rescaling for his time on court  
Players$adj=1
Players$adj[(Players$Pos == "C") ]<-Players$MP.[(Players$Pos == "C") ]*Wmeanxpos$WTmeanxpos[1]-Players$MP.[(Players$Pos == "C") ]*(0.5*82/5)
Players$adj[(Players$Pos == "C-PF") ]<-Players$MP.[(Players$Pos == "C-PF") ]*Wmeanxpos$WTmeanxpos[2]-Players$MP.[(Players$Pos == "C-PF") ]*(0.5*82/5)
Players$adj[(Players$Pos == "PF") ]<-Players$MP.[(Players$Pos == "PF") ]*Wmeanxpos$WTmeanxpos[3]-Players$MP.[(Players$Pos == "PF") ]*(0.5*82/5)
Players$adj[(Players$Pos == "PF-C") ]<-Players$MP.[(Players$Pos == "PF-C") ]*Wmeanxpos$WTmeanxpos[4]-Players$MP.[(Players$Pos == "PF-C") ]*(0.5*82/5)
Players$adj[(Players$Pos == "PF-SF") ]<-Players$MP.[(Players$Pos == "PF-SF") ]*Wmeanxpos$WTmeanxpos[5]-Players$MP.[(Players$Pos == "PF-SF") ]*(0.5*82/5)
Players$adj[(Players$Pos == "PG") ]<-Players$MP.[(Players$Pos == "PG") ]*Wmeanxpos$WTmeanxpos[6]-Players$MP.[(Players$Pos == "PG") ]*(0.5*82/5)
Players$adj[(Players$Pos == "PG-SG") ]<-Players$MP.[(Players$Pos == "PG-SG") ]*Wmeanxpos$WTmeanxpos[7]-Players$MP.[(Players$Pos == "PG-SG") ]*(0.5*82/5)
Players$adj[(Players$Pos == "SF") ]<-Players$MP.[(Players$Pos == "SF") ]*Wmeanxpos$WTmeanxpos[8]-Players$MP.[(Players$Pos == "SF") ]*(0.5*82/5)
Players$adj[(Players$Pos == "SF-PF") ]<-Players$MP.[(Players$Pos == "SF-PF") ]*Wmeanxpos$WTmeanxpos[9]-Players$MP.[(Players$Pos == "SF-PF") ]*(0.5*82/5)  
Players$adj[(Players$Pos == "SF-SG") ]<-Players$MP.[(Players$Pos == "SF-SG") ]*Wmeanxpos$WTmeanxpos[10]-Players$MP.[(Players$Pos == "SF-SG") ]*(0.5*82/5)  
Players$adj[(Players$Pos == "SG") ]<-Players$MP.[(Players$Pos == "SG") ]*Wmeanxpos$WTmeanxpos[11]-Players$MP.[(Players$Pos == "SG") ]*(0.5*82/5)  
Players$adj[(Players$Pos == "SG-SF") ]<-Players$MP.[(Players$Pos == "SG-SF") ]*Wmeanxpos$WTmeanxpos[12]-Players$MP.[(Players$Pos == "SG-SF") ]*(0.5*82/5)  

Players$W_all_ua=Players$W_all-Players$adj
Players$W_48_ua=(Players$W_all_ua/Players$MP)*48*Players$G
#replicate table 6.7 

TABLE_6.7.1=subset(Players,(Player=="Kobe Bryant"),select=c(Player,MP,Tm,W_scor,W_pos_st,W_pfblk,W_all,W_all_ua,AST,W_48_ua))
TABLE_6.7.2=subset(Players,(Player=="Shaquille O'Neal*"),select=c(Player,MP,Tm,W_scor,W_pos_st,W_pfblk,W_all,W_all_ua,AST,W_48_ua))

table_6.7=rbind(TABLE_6.7.1, TABLE_6.7.2)

TABLE_7=subset(Players,(Tm=="LAL" ),select=c(Player,W_all,W_all_ua))
# Sum of the last column (W_all_ua)
sum_W_all_ua <- sum(TABLE_7$W_all_ua, na.rm = TRUE)

# Print the result
print(sum_W_all_ua)
#adjusting for assist
#useful exercise 
#adjust for assists 
Players$W_all_AST=Players$W_all_ua+0.022*Players$AST

#check that assisted win is a best predictor of unassisted win 
reg_wins=lm(Players$W_all_AST ~  Players$W_all_ua )
summary(reg_wins)

# rank players according W_48 and salary 
WAGEPROD=subset(Players,MP >= 1000 & G >= 30,select=c(Player,Tm,Pos,W_48_ua,eff_NBA,SALARY))
WAGEPROD <- na.omit(WAGEPROD)
rank.table_WP = WAGEPROD[order(WAGEPROD$SALARY, decreasing = TRUE),]

WAGEPROD$l_SALARY=log(WAGEPROD$SALARY)
scatter.smooth(x=WAGEPROD$W_48_ua, y=WAGEPROD$eff_NBA, main="W48 and NBA ")  # scatterplot

reg_prod=lm(WAGEPROD$SALARY ~  WAGEPROD$W_48_ua )
summary(reg_prod)

plot(WAGEPROD$SALARY, pch=20, ylim=c(1000000, 18000000), ylab="Salary",col = "blue")
lines(reg_prod$fitted.values,col = "red", lwd = 2,type="l")


reg_prod_1=lm(WAGEPROD$SALARY ~  WAGEPROD$eff_NBA )
summary(reg_prod_1)

reg_comp=lm(WAGEPROD$SALARY ~  reg_prod$fitted +reg_prod_1$fitted -1)
summary(reg_comp)

plot(WAGEPROD$SALARY, pch=20, ylim=c(1000000, 18000000), ylab="Salary",col = "blue")
lines(reg_prod_1$fitted.values,col = "red", lwd = 2,type="l")

WAGEPROD$res_EFF=reg_prod$residuals
WAGEPROD$res_NBA=reg_prod_1$residuals
WAGEPROD$Pos_dum=as.character(WAGEPROD$Pos)

par(mfrow = c(1, 2))
plot (WAGEPROD$res_NBA[(WAGEPROD$Pos == "C") ])
plot (WAGEPROD$res_NBA[(WAGEPROD$Pos == "SG") ])
par(mfrow = c(1, 1))

summary(WAGEPROD$res_NBA[(WAGEPROD$Pos == "C") ])
summary(WAGEPROD$res_NBA[(WAGEPROD$Pos == "SG") ])

Reg_DUM = lm(WAGEPROD$SALARY ~  WAGEPROD$eff_NBA +WAGEPROD$Pos_dum )
summary(Reg_DUM)

Reg_DUM1 = lm(WAGEPROD$SALARY[(WAGEPROD$Pos == "C") ] ~  WAGEPROD$eff_NBA[(WAGEPROD$Pos == "C") ] )
summary(Reg_DUM1)
Reg_DUM2 = lm(WAGEPROD$SALARY[(WAGEPROD$Pos == "SG") ] ~  WAGEPROD$eff_NBA[(WAGEPROD$Pos == "SG") ] )
summary(Reg_DUM2)
Reg_DUM3 = lm(WAGEPROD$SALARY[(WAGEPROD$Pos == "C") ] ~  WAGEPROD$W_48_ua[(WAGEPROD$Pos == "C") ] )
summary(Reg_DUM3)
Reg_DUM4 = lm(WAGEPROD$SALARY[(WAGEPROD$Pos == "SG") ] ~  WAGEPROD$W_48_ua[(WAGEPROD$Pos == "SG") ] )
summary(Reg_DUM4)


