[R] gsub : replace regex pattern with values from another data.frame

arnaud gaboury arnaud.gaboury at gmail.com
Thu Feb 12 15:40:31 CET 2015


I have two df (and dt):

df1
structure(list(name = c("poisonivy", "poisonivy", "poisonivy",
"poisonivy", "poisonivy", "poisonivy", "poisonivy", "poisonivy",
"cruzecontrol", "agreenmamba", "agreenmamba", "vairis", "vairis",
"vairis", "vairis", "vairis", "vairis", "xaeth"), text = c("ok",
"need items ?", "i didn't submit pass codes for a long now",
"ok", "<@U03AEKYL4>: what app are you talking about ?", "some testing
with my irc client",
"ha ha sorry", "for me there is no such question", "Lol.",
"<@U03AEKWTL|agreenmamba> uploaded a file:
<https://enlightened.slack.com/files/agreenmamba/F03KGRF3W/screenshot_2015-02-09-14-31-15.png|regarding:
should I stay or should I go?>",
"<@U032FHV3S> <http://youtu.be/oGIFublvDes>", "ok, see you around",
"yeah, I had a procrastination rush so I started to decode a little",
"<http://ingress.com/intel|ingress.com/intel> when you submit passcodes",
"intel", "what is the cooldown time or how does it work...;",
"anybody knows how does \"Passcode circuitry too hot. Wait for cool
down to enter another passcode.\" works?",
"and people told that agent their geocities experience would never
amount to anything (the convo yesterday) "
), ts = c("1423594336.000138", "1423594311.000136", "1423594294.000135",
"1423594258.000133", "1423594244.000131", "1423497058.000127",
"1423497041.000126", "1423478555.000123", "1423494427.000125",
"1423492370.000124", "1423478364.000121", "1423594358.000139",
"1423594329.000137", "1423594264.000134", "1423594251.000132",
"1423592204.000130", "1423592174.000129", "1423150354.000112"
)), .Names = c("name", "text", "ts"), class = c("data.table",
"data.frame"), row.names = c(NA, -18L))

df2
structure(list(id = c("U03KH8Z52", "U02AF1DTJ", "U02AF0ZT8",
"U03AEKWTL", "U02BCJH0G", "U033YA1MS", "U029QMCRR", "U03H139M5",
"U02AET1D0", "U02A6U41Z", "U02B5T4CX", "U02B2QU4R", "U03F0LQ5X",
"U03JNFKLY", "U02ASMBMQ", "U029QLQC7", "U03AEMBQU", "U02B4D3Q1",
"U02AGDC14", "U029A467C", "U02A7NFG6", "U02AESPPL", "U02AQANK7",
"U03ADJDFK", "U03EYR0KB", "U02AW7Q5Q", "U02AE8RKD", "U02FT84BS",
"U02B25M3B", "U03EZDQT7", "U02AECKFF", "U03H2691M", "U02DWTJ5V",
"U02AFTAHH", "U029QQEPM", "U03C51Z42", "U02CAK2CV", "U03AK21DP",
"U03FFN8ED", "U02B23V03", "U029T2143", "U02C1LEEX", "U03AF2QH2",
"U03E0GN0S", "U03AG20R9", "U02AES8S2", "U02AG64S7", "U02B5A0R7",
"U02AS4SLR", "U03C2SG0R", "U03AV7CCW", "U032XPFDU", "U03AUKSSV",
"U02C2A61Y", "U02AESHJQ", "U02BLSKHU", "U02E34WM6", "U03AK6P26",
"U02E6ADRZ", "U03FCDQ50", "U03EW1CC5", "U02BL0DBD", "U02FHQZ6D",
"U02B47T63", "U03H2TTQP", "U03AVP71V", "U03JLV38V", "U02E39HAY",
"U02AE5281", "U032FHV3S", "U03AL2096", "U02ARUG6M", "U02AECRSP",
"U02B42XG4", "U03AFQZNS", "U02AE7H41", "U03G9UNTG", "U02GEQ0E6",
"U02AGLE5A", "U02BQTRC9", "U03H0J6GS", "U02B3D27F", "U02AEKTHV",
"U02C52YN3", "U02E33MUW", "U03AKUT85", "U03B53EHG", "U02FBN38P",
"U03AH3E5W", "U02B5PLE0", "U02AS4RCK", "U03ANE1GZ", "U02E8LZQB",
"U03EPGJ98", "U02E3N220", "U03AEKYL4", "U02AE7HT1", "U02C1RR3G",
"U03JH408J", "U03KL0FKN", "U02B44R92", "U03EURWGX"), name = c("10k_affair",
"1upwuzhere", "4xcss", "agreenmamba", "ait109", "arly69", "azkop13",
"barcik75", "bigolnob", "blackrose", "blink619", "bobaloo23",
"bodger", "bomb", "bootswithdefer", "brandizzle", "bregalad",
"camon", "celticrain", "ch3mical", "checksum", "cocothunder",
"cruxicon", "cruzecontrol", "crystalskunk", "cscheetah", "dabcelin",
"deelicious", "delthanar", "drkaosdk", "droidenl-joe", "dukeceph",
"fillerbunny", "flickohmsford", "flyingg0d", "garaxiel", "goby9",
"gymbal", "hideandseek", "hobojr", "ijackportals", "invalidcharactr",
"itso9", "j0shs", "jarvis", "jc0mm5", "jencyberchic", "jimbobradyson",
"joespr0cket", "jostrander", "jueliet", "karlashi", "khan99",
"kingkonn0r", "krispycridder", "kritickalmass", "lawgiver", "maxcorbett",
"memory556", "meta000x", "minkovsky", "mistylady", "mstephans",
"mstrinity", "nocarryr", "ollietronic", "philistine11", "pickledpickles",
"piercingsbykris", "poisonivy", "raugmor", "remarks999", "rheds77",
"rhinz", "rigiritter", "robbie0017", "rohdef", "ryoziya", "s4n1ty",
"sacredcow133", "samwill", "sgtlemonpepper", "sivan", "spline9",
"starwolf", "stueliueli", "sweetiris", "swift2plunder", "swissphoenix",
"synyck", "test", "therug", "tinja551", "trulyjuan", "twinster",
"vairis", "vinylz3ro", "watervirus", "xaeth", "yagamiyukari",
"zafo", "zexium")), .Names = c("id", "name"), class = c("data.table",
"data.frame"), row.names = c(NA, -102L))

I need to replace this regex pattern in df1 :
(?<=<@)[^|]{9}(?=>|) by its corresponding name from df2.

E.g : if <@U03KH8Z52> is found in df1, then I want to replace it by
the "name" which correspond to this id in df2., in this case
10k_affair

I know of replace an expression with gsub:
gsub('(?<=<@)[^|]{9}(?=>|)', 'toto', df1, perl = T)
but I have no idea how to replace it with value from another df.

Thank you for hints



More information about the R-help mailing list