Caros,Tenho uma dataframe com cerca de 1500 variáveis e gostaria de saber a correlação entre essas variáveis, então gerei uma matriz de correlação.Como muitas variáveis estão altamente correlacionadas, eu gostaria de selecionar apenas as variáveis que tenham, por exemplo, no máximo uma correlação de 0,3. Para isso eu utilizei a função findCorrelation do pacote caret. Pela descrição, a função foi feita para isso, selecionar as variáveis mais correlacionadas, considerando um limite de corte.Como eu gostaria das variáveis menos correlacionadas, eu inverti minha matriz de correlação, pelo seguinte comando: as.matrix(sqrt((matrix.cor-1)^2), desta forma, a função selecionaria as variáveis menos correlacionada.No entanto, o resultado da função sempre me retorna bandas altamente correlacionada. Por isso gostaria de fazer de uma forma alternativa. Segue um pequeno exemplo para ajudar nas considerações.Se alguém souber uma forma de programar para obter esse resultado, aceito um direcionamento.Desde já agradeço pela atenção.require(caret)filtro=c(0.7) #Este parâmetro foi criado para definir o liminar de corte, como a #matriz está inversa, um valor de 0,7 que dizer que a correlação máxima entre as #variáveis deve cer de 0,3.matrix.cor <- as.data.frame(cor(bandas))# Gerar a matriz de correlaçãobanda.selecao=bandas[,findCorrelation(as.matrix(sqrt((matrix.cor-1)^2)),cutoff=as.numeric(filtro), verbose=FALSE)] # selecionar do dataframe #original apenas as variáveis com fraca correlação.--bandas=structure(list(X1417 = c(0.161042, 0.186075, 0.202966, 0.211156,0.172813, 0.167929, 0.175015, 0.200225, 0.178143, 0.182516, 0.162152,0.169258, 0.14955, 0.143907, 0.161655, 0.164643, 0.157703, 0.160282,0.177145, 0.169043), X1366 = c(0.352479, 0.376561, 0.372766,0.39279, 0.36767, 0.368855, 0.361146, 0.390109, 0.373625, 0.383793,0.371615, 0.375047, 0.363033, 0.341382, 0.372219, 0.375438, 0.359356,0.372921, 0.375115, 0.36681), X469 = c(0.07749, 0.098493, 0.083867,0.101454, 0.087098, 0.088587, 0.079879, 0.083187, 0.087261, 0.091197,0.085591, 0.091496, 0.081759, 0.093462, 0.094817, 0.089399, 0.087421,0.08966, 0.097024, 0.084687), X1549 = c(0.233143, 0.262691, 0.266292,0.280087, 0.247662, 0.242888, 0.237161, 0.269429, 0.25326, 0.260258,0.239942, 0.244771, 0.224855, 0.211901, 0.237555, 0.242471, 0.232943,0.234706, 0.248407, 0.243808), X1424 = c(0.149535, 0.176297,0.193145, 0.197999, 0.16312, 0.156738, 0.165102, 0.190595, 0.165941,0.171179, 0.149601, 0.158143, 0.139422, 0.131351, 0.150353, 0.153668,0.145654, 0.149609, 0.162426, 0.158941), X1957 = c(0.045334,0.078347, 0.055058, 0.071466, 0.04449, 0.052894, 0.066396, 0.087972,0.053841, 0.073682, 0.068851, 0.04738, 0.044667, 0.060964, 0.061176,0.065722, 0.052965, 0.05974, 0.07101, 0.076109), X1848 = c(0.247498,0.280584, 0.274104, 0.278935, 0.252584, 0.256848, 0.235115, 0.274966,0.268281, 0.267282, 0.255826, 0.23335, 0.237109, 0.230515, 0.262255,0.258836, 0.2387, 0.265159, 0.271541, 0.265838), X1537 = c(0.218761,0.248475, 0.254953, 0.264756, 0.233748, 0.230395, 0.224644, 0.256149,0.236666, 0.24462, 0.223847, 0.229429, 0.210074, 0.198872, 0.220081,0.227677, 0.217309, 0.21968, 0.231893, 0.226282), X1621 = c(0.285036,0.31607, 0.313035, 0.331176, 0.302131, 0.300669, 0.290153, 0.323815,0.307194, 0.318355, 0.298653, 0.303098, 0.285467, 0.269884, 0.297272,0.301428, 0.291555, 0.300164, 0.302502, 0.299489), X1602 = c(0.273662,0.302662, 0.304277, 0.319991, 0.29179, 0.291713, 0.281382, 0.313998,0.296813, 0.305874, 0.286391, 0.294462, 0.27532, 0.260575, 0.287217,0.289802, 0.279721, 0.285657, 0.291856, 0.289307), X1865 = c(0.199668,0.229541, 0.230852, 0.245059, 0.226876, 0.215756, 0.207998, 0.254432,0.20988, 0.226653, 0.206045, 0.194866, 0.195796, 0.183774, 0.214694,0.231647, 0.207081, 0.208682, 0.223025, 0.199142), X1476 = c(0.144097,0.170145, 0.187837, 0.192702, 0.158221, 0.154393, 0.160075, 0.184072,0.162413, 0.166724, 0.147404, 0.15375, 0.133576, 0.128008, 0.146151,0.150793, 0.143929, 0.141969, 0.158288, 0.154279), X2034 = c(0.081108,0.110882, 0.100852, 0.109914, 0.109893, 0.06942, 0.128299, 0.139094,0.10447, 0.125726, 0.118003, 0.075258, 0.090784, 0.055216, 0.102326,0.101315, 0.08443, 0.123796, 0.104067, 0.102786), X1460 = c(0.136055,0.161915, 0.177241, 0.184844, 0.149423, 0.143712, 0.150499, 0.173385,0.150807, 0.156626, 0.138878, 0.145282, 0.125618, 0.120778, 0.138968,0.139961, 0.133014, 0.132243, 0.150618, 0.144916), X1236 = c(0.41637,0.440245, 0.422308, 0.453948, 0.428499, 0.436163, 0.427196, 0.454866,0.433779, 0.451874, 0.447265, 0.44747, 0.44684, 0.422101, 0.451307,0.452207, 0.436043, 0.454355, 0.44641, 0.437131), X1913 = c(0.049649,0.06384, 0.080675, 0.0761, 0.062756, 0.056635, 0.074517, 0.084222,0.054835, 0.05488, 0.06389, 0.058421, 0.054827, 0.050249, 0.063544,0.066275, 0.057604, 0.072578, 0.070545, 0.061034), X1885 = c(0.105368,0.136086, 0.161411, 0.146339, 0.115904, 0.121671, 0.149059, 0.145293,0.125417, 0.145147, 0.124081, 0.112018, 0.101411, 0.096761, 0.122112,0.114244, 0.105535, 0.122283, 0.140519, 0.121402), X492 = c(0.080821,0.102833, 0.088743, 0.10809, 0.093344, 0.096766, 0.083556, 0.089436,0.091601, 0.100154, 0.089925, 0.096294, 0.087353, 0.097834, 0.102614,0.09692, 0.096883, 0.094752, 0.102917, 0.089361), X1916 = c(0.037402,0.063136, 0.08053, 0.071535, 0.059838, 0.0555, 0.060523, 0.079581,0.044433, 0.055137, 0.055831, 0.054455, 0.054067, 0.055332, 0.062323,0.0651, 0.05896, 0.075877, 0.069466, 0.064714), X1249 = c(0.416616,0.440854, 0.423568, 0.455739, 0.428821, 0.436904, 0.42868, 0.455141,0.432762, 0.45234, 0.447037, 0.448069, 0.445743, 0.420802, 0.455476,0.453444, 0.43817, 0.454327, 0.444599, 0.437981)), .Names = c("X1417","X1366", "X469", "X1549", "X1424", "X1957", "X1848", "X1537","X1621", "X1602", "X1865", "X1476", "X2034", "X1460", "X1236","X1913", "X1885", "X492", "X1916", "X1249"), row.names = c(NA,20L), class = "data.frame")Rodrigo A. MunizEng. Agrônomo. Ms Produção Vegetal (UENF)Doutorando em Engenharia de Sistemas Agrícolas (ESALQ/USP)E-mail - muniz.ra@usp.brCel (19) 98300-4333 (Pessoal)
_______________________________________________
R-br mailing list
R-br@listas.c3sl.ufpr.br
https://listas.inf.ufpr.br/cgi-bin/mailman/listinfo/r-br
Leia o guia de postagem (http://www.leg.ufpr.br/r-br-guia) e forneça código mínimo reproduzível.