[R-br] Erro em função out_rem

Robert Iquiapaza rbali em ufmg.br
Quarta Maio 21 10:19:05 BRT 2014


Allison,
Você esta substituindo por NA não pela média. Mas o alerta deve-se a um erro na sua função quando usa which(x == outliers), x e outliers têm cumprimento diferente e a comparação produz resultados inesperados, use o operador %in%:

tab2 <- apply(tab1[,-1],2,out_rem)

out_rem1 <-function(x) {
  outliers <- boxplot(x, plot = FALSE)$out
  if (length(outliers) != 0){
    x[which(x %in% outliers)] = NA  # mean(x,na.rm=T)
  }
  return(x)
}


tab3 <- apply(tab1[,-1],2,out_rem1)

tab1[,-1]==tab3

apply(tab2,2,function(x)sum(is.na(x))) #alguns outliers não são identificados corretamente
apply(tab3,2,function(x)sum(is.na(x))) # ok

Sds

From: Alisson Lucrecio 
Sent: Wednesday, May 21, 2014 8:46 AM
To: r-br 
Subject: [R-br] Erro em função out_rem

Caros Colegas,

Bom dia.

Eu estou tentanto criar uma função para encontrar os outlier em uma data frame e substituir pela média, mas esta acontecendo a seguinte mensagem de erro descrito abaixo. Alguém saberia como solucionar esse problema?
Obrigado.
> str(tab1)
'data.frame':	40 obs. of  13 variables:
 $ Tempo    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ Fo       : int  58 84 69 67 90 85 77 86 85 76 ...
 $ Fm       : int  240 427 290 331 424 373 351 375 393 302 ...
 $ Fv.Fm    : num  0.758 0.803 0.762 0.798 0.788 0.772 0.781 0.771 0.784 0.748 ...
 $ ETR      : num  20 22.4 22.3 23.9 20.1 20.7 18 23.9 27.5 24.2 ...
 $ Clorofila: num  58.3 67.8 49.8 74.8 59.6 63.6 52.2 56.6 54.4 58.5 ...
 $ MS       : num  0.57 0.69 0.71 0.81 0.48 1.2 0.55 0.68 0.55 0.3 ...
 $ Umid     : num  58.1 62.5 63 58 63.6 ...
 $ Zn       : num  33.5 100.5 93.9 127.4 535 ...
 $ Cu       : num  27.7 27.7 27.7 27.7 27.6 ...
 $ Fe       : num  1714 857 1660 1145 1141 ...
 $ Mn       : num  612 1434 1068 1541 716 ...
 $ Ca       : num  0.158 0.493 0.118 0.355 0.374 ...

> dput(tab1)
structure(list(Tempo = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 
2, 2, 2, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 10, 10, 
10, 10, 10, 12, 12, 12, 12, 12), Fo = c(58L, 84L, 69L, 67L, 90L, 
85L, 77L, 86L, 85L, 76L, 91L, 84L, 81L, 180L, 82L, 126L, 132L, 
128L, 136L, 122L, 201L, 247L, 184L, 227L, 221L, 268L, 304L, 284L, 
311L, 318L, 249L, 258L, 286L, 243L, 275L, 240L, 241L, 250L, 241L, 
228L), Fm = c(240L, 427L, 290L, 331L, 424L, 373L, 351L, 375L, 
393L, 302L, 231L, 217L, 207L, 349L, 137L, 233L, 232L, 202L, 239L, 
197L, 446L, 533L, 418L, 337L, 492L, 463L, 396L, 429L, 430L, 455L, 
481L, 469L, 599L, 524L, 460L, 244L, 543L, 418L, 398L, 474L), 
    Fv.Fm = c(0.758, 0.803, 0.762, 0.798, 0.788, 0.772, 0.781, 
    0.771, 0.784, 0.748, 0.602, 0.613, 0.609, 0.484, 0.401, 0.459, 
    0.431, 0.366, 0.431, 0.381, 0.549, 0.537, 0.56, 0.326, 0.551, 
    0.421, 0.232, 0.338, 0.277, 0.301, 0.482, 0.45, 0.523, 0.536, 
    0.402, 0.016, 0.556, 0.402, 0.394, 0.519), ETR = c(20, 22.4, 
    22.3, 23.9, 20.1, 20.7, 18, 23.9, 27.5, 24.2, 14.7, 16.3, 
    17.2, 6.1, 10.5, 6.5, 7.4, 4.8, 9.7, 7.1, 8, 12.1, 9, 5.5, 
    5.8, 7.3, 2.9, 4.9, 4.1, 4.6, 5.4, 6.9, 3.9, 6, 4.9, 0.3, 
    6.3, 4.1, 3.5, 8.2), Clorofila = c(58.3, 67.8, 49.8, 74.8, 
    59.6, 63.6, 52.2, 56.6, 54.4, 58.5, 58.1, 64.8, 46.4, 49.2, 
    43.7, 60.1, 48.1, 66, 50.4, 53, 56.2, 61, 50.8, 45.3, 56.6, 
    46, 45.9, 43, 46.1, 37.3, 57.6, 58.8, 46.7, 48.6, 41.9, 71, 
    42.6, 45.4, 44.2, 52.5), MS = c(0.57, 0.69, 0.71, 0.81, 0.48, 
    1.2, 0.55, 0.68, 0.55, 0.3, 0.52, 0.88, 0.46, 0.25, 0.29, 
    0.54, 0.48, 0.62, 0.26, 0.38, 0.46, 0.39, 0.4, 0.39, 0.37, 
    0.37, 0.74, 0.33, 0.5, 0.47, 0.54, 0.7, 0.38, 0.36, 0.17, 
    0.96, 0.54, 0.61, 0.41, 0.48), Umid = c(58.09, 62.5, 63.02, 
    58.03, 63.64, 52.94, 63.58, 63.83, 60.71, 64.29, 60.9, 51.65, 
    56.6, 51.92, 50.85, 53.04, 54.29, 40.38, 51.85, 53.66, 57.01, 
    55.17, 61.54, 63.21, 59.78, 53.75, 56.98, 61.63, 55.36, 60.83, 
    56.45, 58.08, 62.75, 58.14, 59.52, 22.58, 62.5, 55.15, 55.43, 
    60.33), Zn = c(33.4872691, 100.4618073, 93.905, 127.4425, 
    534.995015, 120.1941264, 140.4361914, 180.9215784, 46.88217673, 
    113.9705147, 95.33502538, 141.1397796, 113.7999002, 53.84846964, 
    53.79448622, 86.93668993, 87.11038961, 120.4339152, 63.88095238, 
    173.9600998, 153.9645709, 80.36944583, 93.905, 46.81206381, 
    100.5622189, 136.1928934, 107.5350701, 53.55289421, 80.77270447, 
    47.07017544, 93.81118881, 107.2663668, 80.40959041, 107.2663668, 
    134.15, 142.9980013, 107.1057884, 80.49, 46.9525, 221.3475
    ), Cu = c(27.65851223, 27.65851223, 27.7, 27.7, 27.61714855, 
    27.57590841, 27.61714855, 27.67232767, 27.65851223, 27.68615692, 
    28.12182741, 27.75551102, 55.28942116, 83.39187155, 27.76942356, 
    27.61714855, 27.67232767, 27.63092269, 52.76190476, 27.63092269, 
    27.64471058, 27.65851223, 27.7, 27.61714855, 27.68615692, 
    28.12182741, 55.51102204, 27.64471058, 55.59458103, 27.76942356, 
    27.67232767, 27.68615692, 27.67232767, 27.68615692, 27.7, 
    36.90872751, 110.5788423, 55.4, 27.7, 27.7), Fe = c(1714.328507, 
    857.1642536, 1659.67, 1144.6, 1141.176471, 569.7361872, 912.9411765, 
    1257.802198, 1657.184224, 3317.681159, 2614.568528, 2408.476954, 
    5026.187625, 5168.790768, 688.481203, 1940, 1257.802198, 
    1712.618454, 981.0857143, 1084.658354, 799.6207585, 3142.935597, 
    2918.73, 1141.176471, 1029.625187, 2033.553299, 8716.392786, 
    2741.556886, 5743.100853, 4245.634085, 1372.147852, 1487.236382, 
    1029.110889, 2002.048976, 3548.26, 762.5582945, 12108.54291, 
    2174.74, 2174.74, 1945.82), Mn = c(611.9321018, 1433.669496, 
    1068.11, 1540.88, 715.7627119, 1621.134893, 1536.271186, 
    717.1928072, 1625.991013, 840.05997, 1457.685279, 789.5290581, 
    1258.203593, 755.5745108, 737.2631579, 1309.322034, 787.1628372, 
    1275.042394, 333.5238095, 1327.441397, 1048.502994, 1608.507239, 
    752.93, 611.0169492, 1137.581209, 871.0558376, 1140.430862, 
    664.0518962, 1071.861515, 473.9548872, 1311.938062, 1347.596202, 
    437.3126873, 542.5387306, 595.34, 2053.137908, 2097.005988, 
    437.75, 1068.11, 1383.29), Ca = c(0.15765352, 0.492667249, 
    0.1184175, 0.3552525, 0.373867149, 0.432252364, 0.35418993, 
    0.374614136, 0.433547179, 0.216990255, 0.280515228, 0.435067635, 
    0.334846557, 0.297083542, 0.197857143, 0.806765952, 0.492913337, 
    0.452801746, 0.300742857, 0.472488778, 0.630299401, 0.394133799, 
    0.21709875, 0.295158275, 0.611517991, 0.30055203, 0.37574023, 
    0.315149701, 0.455528098, 0.336357143, 0.473196803, 0.49315967, 
    0.177448801, 0.374801349, 0.13815375, 0.631139241, 0.590905689, 
    0.15789, 0.5131425, 0.53287875)), .Names = c("Tempo", "Fo", 
"Fm", "Fv.Fm", "ETR", "Clorofila", "MS", "Umid", "Zn", "Cu", 
"Fe", "Mn", "Ca"), row.names = c(NA, -40L), class = "data.frame")

> out_rem <-function(x) {
  outliers <- boxplot(x, plot = FALSE)$out
  if (length(outliers) != 0){
    x[which(x == outliers)] = NA
  }
  return(x)
}

> tab1[,-1] <- apply(tab1[,-1],2,out_rem)

Warning message:
In x == outliers :
  longer object length is not a multiple of shorter object length



-- 
Alisson Lucrecio da Costa

--------------------------------------------------------------------------------
_______________________________________________
R-br mailing list
R-br em listas.c3sl.ufpr.br
https://listas.inf.ufpr.br/cgi-bin/mailman/listinfo/r-br
Leia o guia de postagem (http://www.leg.ufpr.br/r-br-guia) e forneça código mínimo reproduzível.
-------------- Próxima Parte ----------
Um anexo em HTML foi limpo...
URL: <http://listas.inf.ufpr.br/pipermail/r-br/attachments/20140521/ff609e4b/attachment-0001.html>


Mais detalhes sobre a lista de discussão R-br