
Senhores, bom dia! Não sei se ainda há interesse na questão, mas retomei a ideia data mining do whoscored.com e gostaria de compartilhar uma solução. No código apliquei só para o Fluminense, mas já gerei os índices pra montar o loop para outros times (objeto teams ou teamID). ### <code r> # setwd(choose.dir()) setwd("C:/LAB/RBAS/dataMining") sapply(c("RCurl", "XML", "RJSONIO"), require, character.only=T) # browseURL("http://www.whoscored.com/Teams/1232") myURL <- "http://www.whoscored.com/Teams/1232" htmRaw <- getURL(myURL) htmLin <- readLines(txtCon <- textConnection(htmRaw)); close(txtCon) ### Teams (IDs for future looping) pageTree <- htmlTreeParse(htmLin, error=function(...){}, useInternalNodes = TRUE) teamsNames <- as.character(xpathApply(pageTree, "//*/select[@id='teams']//option", xmlValue)) teamsID <- xpathApply(pageTree, "//*/select[@id='teams']//option") teamsID <- sapply(teamsID, xmlGetAttr, 'value') teamsID <- as.integer(gsub("^.*Teams\\/(.*)", "\\1", teamsID)) teams <- data.frame(teamsID, teamsNames, stringsAsFactors=F); teams ### Info about a specific team (Fluminense) sLin <- grep("DataStore.prime\\(\\'stage-player-stat\\'", htmLin) sDat <- htmLin[sLin] dJSON <- gsub("^.*DataStore.*\\[(.*)\\]);", "\\[\\1\\]", sDat) convertJSONDate = function(x) { if(grepl("/?(new )?Date\\(", x)) { val = gsub(".*Date\\(([0-9]+)\\).*", "\\1", x) as.Date(structure((as.numeric(val)/1000), class = c("POSIXct", "POSIXt"))) } else x } myList <- fromJSON(dJSON, nullValue=NA, stringFun=convertJSONDate) length(myList) myListVars <- as.vector(sapply(myList[1], names)); myListVars fullDF <- data.frame(t(sapply(myList, as.vector)), stringsAsFactors=FALSE) shortListVars <- c("TeamRegionCode","Name","PositionShort","Age","Height","Weight","GameStarted","Goals","Assists","Yellow","Red","TotalShots","TotalPasses", "AccuratePasses","AerialWon","ManOfTheMatch","Rating") head(fullDF[shortListVars]) ### <code> Éder Comunello <c <comunello.eder@gmail.com>omunello.eder@gmail.com> Dourados, MS - [22 16.5'S, 54 49'W]