vlookup

# Differences

This shows you the differences between two versions of the page.

 vlookup [2015/09/10 12:46]vincenzo created vlookup [2015/09/21 15:21] (current)vincenzo 2015/09/21 15:21 vincenzo 2015/09/10 12:46 vincenzo created 2015/09/21 15:21 vincenzo 2015/09/10 12:46 vincenzo created Line 1: Line 1: - Ok for an extensive discussion of VLOOKUP ​methods in R, take [[https://gist.github.com/benmarwick/4983803|this]] as a reference: + ​ + # Methods ​for doing Excel'​s ​VLOOKUP ​with R + + # sample data + x <- data.frame(id = c(1, 2, 3, 4), name = c('​foo',​ '​bar',​ '​bob',​ '​joe'​)) + y <- data.frame(idblah = c(5, 2, 4, 3, 1), sex = c('​m',​ '​f',​ '​f',​ '​m',​ '​m'​)) + z <- data.frame(id = c(1, 2, 3, 4, 5), sex = c('​g',​ '​b',​ '​b',​ '​g',​ '​g'​)) + + # function for find a single value + vlookup <- function(val,​ df, col){ + df[df[1] == val, col][1] } + vlookup(3, x, 2) # bob + # + lookv <- function(x,​ltab,​rcol=2) ltab[max(which(ltab[,​1]<​=x)),​rcol] + lookv(3, y, 2) # f + + # find values and merge to data frame + # based on https://stat.ethz.ch/pipermail/r-help/​2008-March/​157899.html + data.frame(sex=y\$sex,​ name=x\$name[match(y\$idblah,​ x\$id)]) + + # using merge + merge(x, y, by.x = '​id',​ by.y='​idblah'​) ​ # If the matching column(s) have same name (say, '​foo'​),​ then by='​foo'​ is all that is needed. + merge(x, y, by.x = '​id',​ by.y='​idblah',​ all = TRUE)  # Keep all records. This is same as all.y = TRUE; all.x = TRUE gives you above results + + # using sqldf, same process + library(sqldf) + sqldf("​SELECT + xa.* + , ya.sex + FROM + x xa + INNER JOIN + y ya + ON xa.id = ya.idblah;"​) + + # simplified + sqldf("​SELECT x.name, y.sex + FROM x + INNER JOIN y + ON x.id = y.idblah"​) + + # using plyr, the the join-by column must have the same name in both dfs + library(plyr) + join(x, z, by = "​id"​) +