#clear the environment 
rm(list=ls()) 
## ------------------------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# Install and load necessary packages

# Install and load necessary packages
packages <- c('tidyverse', "rvest", "dplyr","openxlsx")

# Install & load each package
for (pkg in packages) {
  # Check if the package is installed
  if (!requireNamespace(pkg, quietly = TRUE)) {
    # If not installed, install it
    install.packages(pkg, dependencies = TRUE)
  }
  # Load the package
  library(pkg, character.only = TRUE)
}

# Specify the URL of the webpage
url <- "https://www.basketball-reference.com/leagues/NBA_2025.html#per_game-team"

# Read the HTML content of the webpage
webpage <- read_html(url)

# Extract all tables using rvest
tables <- webpage %>% html_nodes("table")

# Read all tables into a list
table_list <- lapply(tables, function(table) {
  html_table(table, header = TRUE, fill = TRUE) 
})

# Get the three needed tables 
data_team <- table_list[[7]]
data_opp <- table_list[[8]]
data_misc<- table_list[[11]]


# Align data_team with format of previous data 
head(data_team)
data_team <- data_team %>% 
  filter(Team != "League Average")
data_team <- data_team %>% 
  select(-Rk)
data_team <- data_team %>% 
  arrange(Team)
data_team <- data_team %>% 
  mutate(Season = 2025) %>%
  select(Team, Season, everything())
NBAdata=read.csv("Teams_overall2025.csv", header = T, stringsAsFactors = F, sep = ",")
NBAdata_2023= subset(NBAdata,(Season==2023))
data_team$Team<-NBAdata_2023$Team

write.xlsx(data_team, "data_team_46.xlsx", rowNames = FALSE)

head(data_opp)
data_opp <- data_opp %>% 
  filter(Team != "League Average")
data_opp <- data_opp %>% 
  select(-c(Rk,G,MP))
data_opp <- data_opp %>% 
  arrange(Team)
data_opp <- data_opp %>%
  rename_with(~paste0("O", .x), -Team)
write.xlsx(data_opp, "data_opp_46.xlsx", rowNames = FALSE)

head(data_misc) 
colnames(data_misc) <- c("Rk","Team","Age","W","L","PW","PL","MOV","SOS","SRS","ORtg","DRtg","NRtg","Pace","FTr","3PAr","TS%","VOID1","1_OF","2_OF","3_OF","4_OF","VOID2","1_DEF","2_DEF","3_DEF","4_DEF","VOID3","Arena","Attend","AttendG")
data_misc <- data_misc %>% 
  select(-c(Rk,VOID1,VOID2,VOID3,AttendG))
data_misc <- data_misc %>% 
  arrange(Team)
data_misc <- data_misc %>% 
  filter(Team != "Team")
data_misc <- data_misc %>% 
  filter(Team != "League Average")
write.xlsx(data_misc, "data_misc_46.xlsx", rowNames = FALSE)




