# links #https://www.programmableweb.com/news/how-to-access-any-restful-api-using-r-language/how-to/2017/07/21 #https://www.r-bloggers.com/accessing-apis-from-r-and-a-little-r-programming/ # packages used rm(list=ls()) listofpackages <- c("httr", "jsonlite", "lubridate") for (j in listofpackages){ if(sum(installed.packages()[, 1] == j) == 0) { install.packages(j) } library(j, character.only = T) } options(stringsAsFactors = FALSE) url <- "http://api.epdb.eu" path <- "eurlex/directory_code" raw.result <- GET(url = url, path = path) names(raw.result) #problem which is the most popular day for having energy relted documents into force ? raw.result$status_code head(raw.result$content) this.raw.content <- rawToChar(raw.result$content) nchar(this.raw.content) substr(this.raw.content, 1, 100) this.content <- fromJSON(this.raw.content) class(this.content) #it's a list length(this.content) this.content[[1]] this.content.df <- do.call(what = "rbind", args = lapply(this.content, as.data.frame)) head(this.content.df) headClass <- substr(x = this.content.df[, "directory_code"], start = 1, stop = 2) isEnergy <- headClass == "12" table(isEnergy) # 19 of the topic classifiers start with 12 relevant.df <- this.content.df[isEnergy, ] relevant.dc <- relevant.df[, "directory_code"] relevant.dc #NOW RETRIEVING ENERGY DOCUMENTS METADATA makeQuery <- function(classifier) { this.query <- list(classifier) names(this.query) <- "dc" return(this.query) } queries <- lapply(as.list(relevant.dc), makeQuery) #Now we have a list (queries) that is composed of all the individual #queries that result from our function. Now we are good to go to #actually execute the API calls. this.raw.result <- GET(url = url, path = path, query = queries[[1]]) this.result <- fromJSON(rawToChar(this.raw.result$content)) names(this.result[[1]]) #NOW FOR ALL all.results <- vector(mode = "list", length = length(relevant.dc)) for (i in 1:length(all.results)) { this.query <- queries[[i]] this.raw.answer <- GET(url = url, path = path, query = this.query) this.answer <- fromJSON(rawToChar(this.raw.answer$content)) all.results[[i]] <- this.answer message(".", appendLF = FALSE) Sys.sleep(time = 1) } #all.results is now no empty list no more. It is filled with the answers #the API has produced as result to our 19 queries. #Of all the parts of the answer, we are interested in form, date_document and of_effect. Let's #create another function that returns just these parts as a data frame. parseAnswer <- function(answer) { this.form <- answer$form this.date <- answer$date this.effect <- answer$of_effect result <- data.frame(form = this.form, date = this.date, effect = this.effect) return(result) } parseAnswer(all.results[[1]][[2]]) parsedAnswers <- lapply(all.results, function(x) do.call("rbind", lapply(x, parseAnswer))) #Let's combine these 19 data frames in a single one. finalResult <- do.call("rbind", parsedAnswers) class(finalResult) #data.frame #All the final results are now contained neatly in a single data frame. #Note that the data frame's row names are actually the document IDs. #We can use them to retrieve the actual document's meta data. #NOW DATES date.character <- "1981-05-02" date.POSIXct <- ymd(date.character) finalResult$date <- ymd(finalResult$date) finalResult$effect <- ymd(finalResult$effect) finalResult$effectDay <- wday(finalResult$effect, label = TRUE) table(finalResult$effectDay)