parse csv file to select lines with expression regular as parameter #bash #awk #genomics
![](https://static.wixstatic.com/media/cd2f4b_d06d412c7a274ace8685ae34bda330d4~mv2.png/v1/fill/w_636,h_342,al_c,q_85,enc_auto/cd2f4b_d06d412c7a274ace8685ae34bda330d4~mv2.png)
#!/bin/bash #usage : ./essai.sh sample.csv SAMD11 variable=${1} if [ -z "${variable}" ] then echo "TEXTTAB file not passed as parameter" exit 1 fi variable=${2} if [ -z "${variable}" ] then echo "TEXTTAB file not passed as parameter" exit 1 fi #print which gene passed as 2nd parameter echo "my imput regular expression: $2" #print nammed of the analysed csv file as 1st parameter nom_fichier=$(echo $1 | sed -re 's/(.*).csv/\1/') echo "my analyzed file is: $nom_fichier.csv" #transform , in tabulation sed 's/,/\t/g' $1 > espace.csv #remove " around strings sed 's/"//g' espace.csv > guillemet.csv #select headers cat guillemet.csv | head -1 > headers.csv #select lines which contained the regular expression awk ''/$2/'' guillemet.csv > gene.csv cat headers.csv gene.csv > results.csv echo "number of lines in the imput csv file:" wc -l results.csv echo "number of lines in the output csv file comprising the regular expression:" wc -l $1 mv results.csv $(echo results.csv | sed "s/\./".$nom_fichier.$2"\./") rm espace.csv rm guillemet.csv rm headers.csv