[R-br] Erro em função out_rem
Robert Iquiapaza
rbali em ufmg.br
Quarta Maio 21 10:19:05 BRT 2014
Allison,
Você esta substituindo por NA não pela média. Mas o alerta deve-se a um erro na sua função quando usa which(x == outliers), x e outliers têm cumprimento diferente e a comparação produz resultados inesperados, use o operador %in%:
tab2 <- apply(tab1[,-1],2,out_rem)
out_rem1 <-function(x) {
outliers <- boxplot(x, plot = FALSE)$out
if (length(outliers) != 0){
x[which(x %in% outliers)] = NA # mean(x,na.rm=T)
}
return(x)
}
tab3 <- apply(tab1[,-1],2,out_rem1)
tab1[,-1]==tab3
apply(tab2,2,function(x)sum(is.na(x))) #alguns outliers não são identificados corretamente
apply(tab3,2,function(x)sum(is.na(x))) # ok
Sds
From: Alisson Lucrecio
Sent: Wednesday, May 21, 2014 8:46 AM
To: r-br
Subject: [R-br] Erro em função out_rem
Caros Colegas,
Bom dia.
Eu estou tentanto criar uma função para encontrar os outlier em uma data frame e substituir pela média, mas esta acontecendo a seguinte mensagem de erro descrito abaixo. Alguém saberia como solucionar esse problema?
Obrigado.
> str(tab1)
'data.frame': 40 obs. of 13 variables:
$ Tempo : num 0 0 0 0 0 0 0 0 0 0 ...
$ Fo : int 58 84 69 67 90 85 77 86 85 76 ...
$ Fm : int 240 427 290 331 424 373 351 375 393 302 ...
$ Fv.Fm : num 0.758 0.803 0.762 0.798 0.788 0.772 0.781 0.771 0.784 0.748 ...
$ ETR : num 20 22.4 22.3 23.9 20.1 20.7 18 23.9 27.5 24.2 ...
$ Clorofila: num 58.3 67.8 49.8 74.8 59.6 63.6 52.2 56.6 54.4 58.5 ...
$ MS : num 0.57 0.69 0.71 0.81 0.48 1.2 0.55 0.68 0.55 0.3 ...
$ Umid : num 58.1 62.5 63 58 63.6 ...
$ Zn : num 33.5 100.5 93.9 127.4 535 ...
$ Cu : num 27.7 27.7 27.7 27.7 27.6 ...
$ Fe : num 1714 857 1660 1145 1141 ...
$ Mn : num 612 1434 1068 1541 716 ...
$ Ca : num 0.158 0.493 0.118 0.355 0.374 ...
> dput(tab1)
structure(list(Tempo = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
2, 2, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 10, 10,
10, 10, 10, 12, 12, 12, 12, 12), Fo = c(58L, 84L, 69L, 67L, 90L,
85L, 77L, 86L, 85L, 76L, 91L, 84L, 81L, 180L, 82L, 126L, 132L,
128L, 136L, 122L, 201L, 247L, 184L, 227L, 221L, 268L, 304L, 284L,
311L, 318L, 249L, 258L, 286L, 243L, 275L, 240L, 241L, 250L, 241L,
228L), Fm = c(240L, 427L, 290L, 331L, 424L, 373L, 351L, 375L,
393L, 302L, 231L, 217L, 207L, 349L, 137L, 233L, 232L, 202L, 239L,
197L, 446L, 533L, 418L, 337L, 492L, 463L, 396L, 429L, 430L, 455L,
481L, 469L, 599L, 524L, 460L, 244L, 543L, 418L, 398L, 474L),
Fv.Fm = c(0.758, 0.803, 0.762, 0.798, 0.788, 0.772, 0.781,
0.771, 0.784, 0.748, 0.602, 0.613, 0.609, 0.484, 0.401, 0.459,
0.431, 0.366, 0.431, 0.381, 0.549, 0.537, 0.56, 0.326, 0.551,
0.421, 0.232, 0.338, 0.277, 0.301, 0.482, 0.45, 0.523, 0.536,
0.402, 0.016, 0.556, 0.402, 0.394, 0.519), ETR = c(20, 22.4,
22.3, 23.9, 20.1, 20.7, 18, 23.9, 27.5, 24.2, 14.7, 16.3,
17.2, 6.1, 10.5, 6.5, 7.4, 4.8, 9.7, 7.1, 8, 12.1, 9, 5.5,
5.8, 7.3, 2.9, 4.9, 4.1, 4.6, 5.4, 6.9, 3.9, 6, 4.9, 0.3,
6.3, 4.1, 3.5, 8.2), Clorofila = c(58.3, 67.8, 49.8, 74.8,
59.6, 63.6, 52.2, 56.6, 54.4, 58.5, 58.1, 64.8, 46.4, 49.2,
43.7, 60.1, 48.1, 66, 50.4, 53, 56.2, 61, 50.8, 45.3, 56.6,
46, 45.9, 43, 46.1, 37.3, 57.6, 58.8, 46.7, 48.6, 41.9, 71,
42.6, 45.4, 44.2, 52.5), MS = c(0.57, 0.69, 0.71, 0.81, 0.48,
1.2, 0.55, 0.68, 0.55, 0.3, 0.52, 0.88, 0.46, 0.25, 0.29,
0.54, 0.48, 0.62, 0.26, 0.38, 0.46, 0.39, 0.4, 0.39, 0.37,
0.37, 0.74, 0.33, 0.5, 0.47, 0.54, 0.7, 0.38, 0.36, 0.17,
0.96, 0.54, 0.61, 0.41, 0.48), Umid = c(58.09, 62.5, 63.02,
58.03, 63.64, 52.94, 63.58, 63.83, 60.71, 64.29, 60.9, 51.65,
56.6, 51.92, 50.85, 53.04, 54.29, 40.38, 51.85, 53.66, 57.01,
55.17, 61.54, 63.21, 59.78, 53.75, 56.98, 61.63, 55.36, 60.83,
56.45, 58.08, 62.75, 58.14, 59.52, 22.58, 62.5, 55.15, 55.43,
60.33), Zn = c(33.4872691, 100.4618073, 93.905, 127.4425,
534.995015, 120.1941264, 140.4361914, 180.9215784, 46.88217673,
113.9705147, 95.33502538, 141.1397796, 113.7999002, 53.84846964,
53.79448622, 86.93668993, 87.11038961, 120.4339152, 63.88095238,
173.9600998, 153.9645709, 80.36944583, 93.905, 46.81206381,
100.5622189, 136.1928934, 107.5350701, 53.55289421, 80.77270447,
47.07017544, 93.81118881, 107.2663668, 80.40959041, 107.2663668,
134.15, 142.9980013, 107.1057884, 80.49, 46.9525, 221.3475
), Cu = c(27.65851223, 27.65851223, 27.7, 27.7, 27.61714855,
27.57590841, 27.61714855, 27.67232767, 27.65851223, 27.68615692,
28.12182741, 27.75551102, 55.28942116, 83.39187155, 27.76942356,
27.61714855, 27.67232767, 27.63092269, 52.76190476, 27.63092269,
27.64471058, 27.65851223, 27.7, 27.61714855, 27.68615692,
28.12182741, 55.51102204, 27.64471058, 55.59458103, 27.76942356,
27.67232767, 27.68615692, 27.67232767, 27.68615692, 27.7,
36.90872751, 110.5788423, 55.4, 27.7, 27.7), Fe = c(1714.328507,
857.1642536, 1659.67, 1144.6, 1141.176471, 569.7361872, 912.9411765,
1257.802198, 1657.184224, 3317.681159, 2614.568528, 2408.476954,
5026.187625, 5168.790768, 688.481203, 1940, 1257.802198,
1712.618454, 981.0857143, 1084.658354, 799.6207585, 3142.935597,
2918.73, 1141.176471, 1029.625187, 2033.553299, 8716.392786,
2741.556886, 5743.100853, 4245.634085, 1372.147852, 1487.236382,
1029.110889, 2002.048976, 3548.26, 762.5582945, 12108.54291,
2174.74, 2174.74, 1945.82), Mn = c(611.9321018, 1433.669496,
1068.11, 1540.88, 715.7627119, 1621.134893, 1536.271186,
717.1928072, 1625.991013, 840.05997, 1457.685279, 789.5290581,
1258.203593, 755.5745108, 737.2631579, 1309.322034, 787.1628372,
1275.042394, 333.5238095, 1327.441397, 1048.502994, 1608.507239,
752.93, 611.0169492, 1137.581209, 871.0558376, 1140.430862,
664.0518962, 1071.861515, 473.9548872, 1311.938062, 1347.596202,
437.3126873, 542.5387306, 595.34, 2053.137908, 2097.005988,
437.75, 1068.11, 1383.29), Ca = c(0.15765352, 0.492667249,
0.1184175, 0.3552525, 0.373867149, 0.432252364, 0.35418993,
0.374614136, 0.433547179, 0.216990255, 0.280515228, 0.435067635,
0.334846557, 0.297083542, 0.197857143, 0.806765952, 0.492913337,
0.452801746, 0.300742857, 0.472488778, 0.630299401, 0.394133799,
0.21709875, 0.295158275, 0.611517991, 0.30055203, 0.37574023,
0.315149701, 0.455528098, 0.336357143, 0.473196803, 0.49315967,
0.177448801, 0.374801349, 0.13815375, 0.631139241, 0.590905689,
0.15789, 0.5131425, 0.53287875)), .Names = c("Tempo", "Fo",
"Fm", "Fv.Fm", "ETR", "Clorofila", "MS", "Umid", "Zn", "Cu",
"Fe", "Mn", "Ca"), row.names = c(NA, -40L), class = "data.frame")
> out_rem <-function(x) {
outliers <- boxplot(x, plot = FALSE)$out
if (length(outliers) != 0){
x[which(x == outliers)] = NA
}
return(x)
}
> tab1[,-1] <- apply(tab1[,-1],2,out_rem)
Warning message:
In x == outliers :
longer object length is not a multiple of shorter object length
--
Alisson Lucrecio da Costa
--------------------------------------------------------------------------------
_______________________________________________
R-br mailing list
R-br em listas.c3sl.ufpr.br
https://listas.inf.ufpr.br/cgi-bin/mailman/listinfo/r-br
Leia o guia de postagem (http://www.leg.ufpr.br/r-br-guia) e forneça código mínimo reproduzível.
-------------- Próxima Parte ----------
Um anexo em HTML foi limpo...
URL: <http://listas.inf.ufpr.br/pipermail/r-br/attachments/20140521/ff609e4b/attachment-0001.html>
Mais detalhes sobre a lista de discussão R-br