Saturday, April 7, 2018

A bit of SED and regular expression magic to clear up your tables


Want to keep Gene ID and Fold change from 2nd and 6th column and remove quotations. Use SED and regular expressions.



mpjanic@zoran:~/test$ cat SLC2A.txt

"6033","SLC2A1-AS1",24.8051071979286,27.7198330991446,21.8903812967126,0.789701049729193,-0.340621486785587,0.565579800713107,1
"15656","SLC2A1",21989.7607363,25370.4928449324,18609.0286276675,0.733491018144907,-0.447148795187931,0.0135793982312018,0.274890758345199
"15657","SLC2A10",116.660701272146,109.114399777756,124.207002766537,1.13831907630452,0.186905008681256,0.536100348167181,1
"15658","SLC2A11",153.584023702827,160.671774940703,146.496272464952,0.911773536571793,-0.133252558036828,0.610646274133104,1
"15659","SLC2A12",0,0,0,NA,NA,NA,NA
"15660","SLC2A13",91.7915789084436,88.0137320553238,95.5694257615633,1.08584675970211,0.118820516938777,0.718169926714343,1
"15661","SLC2A14",26.1757135597822,34.6019488604179,17.7494782591466,0.512961808328973,-0.963076678383474,0.0836580834259747,0.696210238779407
"15662","SLC2A2",0.946102725140454,0.450180696019295,1.44202475426161,3.20321321418857,1.67951983083656,0.80556985989175,1
"15663","SLC2A3",1801.59770325241,1801.22866521645,1801.96674128837,1.00040976256161,0.000591041330547107,0.91557362726645,1
"15664","SLC2A4",64.9152045965179,47.8981046043178,81.932304588718,1.71055421222935,0.774463827856209,0.0320174623595299,0.440346477750378
"15665","SLC2A4RG",2839.51257874418,2552.43962703032,3126.58553045805,1.22494005239048,0.292711146586849,0.106250785209328,0.755271734116694
"15666","SLC2A5",0.488574210205611,0.450180696019295,0.526967724391927,1.17056934926712,0.227210408076608,1,1
"15667","SLC2A6",849.67804667658,741.433588192089,957.922505161072,1.29198692966806,0.369591475141396,0.0690179423560097,0.640745754210812
"15668","SLC2A7",0,0,0,NA,NA,NA,NA
"15669","SLC2A8",306.652668694167,317.104693305773,296.20064408256,0.934078398508418,-0.0983844524549534,0.643201071003747,1
"15670","SLC2A9",312.030609161521,297.26095251567,326.800265807372,1.09937165659235,0.136679190181628,0.580291149287337,1


mpjanic@zoran:~/test$ sed -E "s/\"[0-9]*\",\"//g" SLC2A.txt | sed -E "s/\",[0-9.]*,[0-9.]*,[0-9.]*,/ /g" | sed -E "s/,.*//g"
SLC2A1-AS1 0.789701049729193
SLC2A1 0.733491018144907
SLC2A10 1.13831907630452
SLC2A11 0.911773536571793
SLC2A12 NA
SLC2A13 1.08584675970211
SLC2A14 0.512961808328973
SLC2A2 3.20321321418857
SLC2A3 1.00040976256161
SLC2A4 1.71055421222935
SLC2A4RG 1.22494005239048
SLC2A5 1.17056934926712
SLC2A6 1.29198692966806
SLC2A7 NA
SLC2A8 0.934078398508418
SLC2A9 1.09937165659235

No comments:

Post a Comment