Giselle,

Faltou usar a primeira coluna como row.names....

Já atualizei o código que segue abaixo:

### <BEGIN>

###

###

setwd("C:/LAB/Tmp"); getwd()

gzFile <- strsplit(dURL, "/")[[1]][8]; gzFile

### fazendo o download

download.file(dURL, gzFile, mode='wb')

### downloaded 4.3 Mb

### uma vez baixados, os arquivos comprimidos com bzip2, xvz, ou gzip podem

### ser lidos diretamente com read.table()

GBS <- read.table(gzFile, sep=',', header=T, stringsAsFactors=F, row.names=1)

dim(GBS)

### [1] 41371 257

parse.GBS <- function(x) {

unique.x <- unique(x)

alleles <- setdiff(unique.x,union("H","N"))

y <- rep(0,length(x))

y[which(x==alleles[1])] <- -1

y[which(x==alleles[2])] <- 1

y[which(x=="N")] <- NA

return(y)

}

X <- apply(GBS[,-c(1:3)],1,parse.GBS)

dim(X)

### [1] 254 41371

frac.missing <- apply(X,2,function(z){length(which(is.na(z)))/length(z)})

length(which(frac.missing<0.5))

### [1] 16030

hist(frac.missing)

### OK!!!

### <END>

Éder Comunello <c omunello.eder@gmail.com>
Dourados, MS - [22 16.5'S, 54 49'W]