#------------------------
# QUESTION 1 
#------------------------
#clear the environment 
rm(list=ls()) 
## ------------------------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
## -------


## Load packages
listofpackages <- c("ellipse", "reshape2", "ggplot2", "dygraphs", "plyr", "dplyr")
for (pkg in listofpackages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
}
## Read data using readr package
library(readr)
NBAdata <- read_csv("Teams_overall2023.csv")

head(NBAdata)
typeof(NBAdata)#to check the type of data
#------------------------
# QUESTION 2 
#------------------------
# done with two alternative methods 
TEAMSxSEAS = ddply(NBAdata, c("Season"), summarise, TEAMS = length(Team))

SEASsxTEAM = ddply(NBAdata, c("Team"), summarise, Season = length(Season))


count.seas_team = aggregate(list(Season=NBAdata$Season), 
                            by=list(Team=NBAdata$Team),
                            FUN=length)

count.team_seas = aggregate(list(Team=NBAdata$Team), 
                            by=list(Season=NBAdata$Season),
                            FUN=length)

#------------------------
# QUESTION 3 
#------------------------
#extracting data for a single team: Golden State and Boston Celtics
AH = subset(NBAdata, Team == "Atlanta Hawks", select = Season:G)
plot(y = AH$G, x = AH$Season, type = "l",col = "blue",ylim = c(50,100),ylab = "Games Played",
     xlab = "Season",
     main = "Atlanta Hawks")


AH <- NBAdata %>% filter(Team == "Atlanta Hawks") %>% select(Season, G)
ggplot(AH, aes(x = Season, y = G)) +
  geom_line(color = "blue") +
  ylim(50, 100) +
  xlab("Season") +
  ylab("Games Played") +
  ggtitle("Atlanta Hawks")
#------------------------
# QUESTION 4 
#------------------------
TABLE_1=subset(NBAdata,(Season== 2022),select=c(Team,W,G))
TABLE_1$WP<-TABLE_1$W/TABLE_1$G
ranking_2022 = TABLE_1[order(TABLE_1$WP, TABLE_1$Team, decreasing = TRUE),]
ranking_2022

TABLE_1 <- NBAdata %>% 
  filter(Season == 2022) %>% 
  select(Team, W, G) %>% 
  mutate(WP = W/G) %>% 
  arrange(desc(WP), Team)
print(TABLE_1, n = 30)
#------------------------
# QUESTION 5 
#------------------------

NBAdata$WP<-NBAdata$W/NBAdata$G
plot(y=NBAdata$WP, x=NBAdata$Age,col="blue", main="Age and Performance")

# alternative solution that looks better 
ggplot(NBAdata, aes(Age, WP)) +
  geom_point(colour = 'red', size = 3) +
  ggtitle("Age and Performance")
# creating a TS plot 
#------------------------
# QUESTION 6 
#------------------------
NBAdata$AgeSq=NBAdata$Age^2
reg_1 = lm(NBAdata$WP ~ NBAdata$Age + NBAdata$AgeSq)
summary(reg_1)
plot(y=NBAdata$WP, x=NBAdata$Age,col="blue", main="Age and Performance")
lines(y=reg_1$fitted.values,x =NBAdata$Age, col = "red",type="l", lwd = 2) # plot of the series agains the predictable part

#------------------------
# QUESTION 7 
#------------------------

#EMPLOYED POSSESSIONS
NBAdata$empl_poss=NBAdata$FGA + 0.45*NBAdata$FTA + NBAdata$TOV - NBAdata$ORB
NBAdata$ptsxgame=NBAdata$PTS/NBAdata$G
NBAdata$ptsxposs=NBAdata$PTS/NBAdata$empl_poss

#ACQUIRED POSSESSIONS
NBAdata$FGAD=NBAdata$FGA-NBAdata$OFG-NBAdata$OTOV-NBAdata$TRB+NBAdata$TOV
NBAdata$TEAM_R=NBAdata$FGAD-0.45*NBAdata$OFT+0.45*NBAdata$FTA
NBAdata$acq_poss=NBAdata$OTOV + NBAdata$DRB+NBAdata$TEAM_R+ NBAdata$OFG + 0.45*NBAdata$OFT
NBAdata$ptsaxgame=NBAdata$OPTS/NBAdata$G
NBAdata$ptsall_poss=NBAdata$OPTS/NBAdata$acq_poss

NBAdata$eff=(NBAdata$ptsxposs-NBAdata$ptsall_poss)


#Regression Analysis to check variable construction
reg0010 = subset(NBAdata,(Season<2011 & Season>1999 ),select=c(FGAD,OFT,FTA,Season))
reg_2 = lm(reg0010$FGAD ~ reg0010$OFT + reg0010$FTA )
summary(reg_2)
#Hypothesis Testing
wald.test(vcov(reg_2), coef(reg_2), H0 = c(0.45, -0.45), df = 313,
          L = matrix(c(0, 0, 1, 0,
                       0, 1), ncol = 3), verbose = T)

#USING Regressions results to create proxy
reg0010$TEAM_R1=coef(reg_2)[1]+reg_2$residuals
plot(x = reg0010$TEAM_R1, y = reg0010$TEAM_R, main = "Estimated Team Rebounds", ylab = "TR", xlab = "Seasons",col = "red")

#------------------------
# QUESTION 8
#------------------------
#Insert here the answer to question 8 as a comment 


