Commit 12343ebb authored by Ian Dennis Miller's avatar Ian Dennis Miller

Merge branch 'idm-master-patch-92404' into 'master'

Update csv-functions.sh, README.md files

See merge request !2
parents 0501b2e6 341cb403
......@@ -22,16 +22,10 @@ There is no need to create a full analysis environment if you just want to quick
### CSV
Create aliases for working with CSV files.
Load aliases for working with CSV files.
```
alias csv.collapse_lines='perl -pe "s/\\\\\n/ /" -'
alias csv.remove_quotes='sed "s/\"//g"'
alias csv.split='sed -e "s/,/\\n/g"'
function csv.select_index() { awk -F, "{print \$$1}" }
function csv.limit() { head -n$1 }
function csv.cat() { cat $1 | csv.collapse_lines | csv.remove_quotes }
function csv.names() { csv.cat $1 | csv.limit 1 | csv.split }
source csv-functions.sh
```
## Data Set Descriptions
......@@ -49,7 +43,7 @@ Using the comma field delimiter, print column 5 from the first 10 lines.
NB: it is much faster to apply the limit before the select.
```
csv.cat ~/Data/4chan/pol.csv | csv.limit 10 | csv.select_index 5
csv.cat ~/Data/4chan/pol.csv | csv.limit 10 | csv.select_column 5
```
### gold
......
# Streaming CSV Functions
# Ian Dennis Miller
# Perform streaming operations with CSV files.
# When files are too large to fit into memory, streaming might be the only alternative.
# collapse fields containing newlines
alias csv.collapse='perl -pe "s/\\\\\n/ /" -'
# remove all quotes from a CSV file
alias csv.unquote='sed "s/\"//g"'
# split a comma-delimited row onto separate lines
alias csv.split='sed -e "s/,/\\n/g"'
function csv.index() { awk -F, "{print \$$1}" }
function csv.limit() { head -n$1 }
function csv.cat() { cat $1 | csv.collapse }
function csv.names() { csv.cat $1 | csv.limit 1 | csv.unquote | csv.split }
function csv.sum() { awk '{ s += $1 } END { printf "%.8f\n", s }' - }
function csv.mean() { awk '{ s += $1; n++ } END { printf "%.8f\n", s/n }' - }
function csv.count() { wc -l - | awk '{ print $1 }' }
# select a single column from a CSV by its index, starting at 1
csv.select_column() { awk -F, "{print \$$1}" }
# limit number of rows returned
csv.limit() { head -n $1 }
# calculate sum of a column
csv.sum() { awk '{ s += $1 } END { printf "%.8f\n", s }' - }
# calculate mean of a column
csv.mean() { awk '{ s += $1; n++ } END { printf "%.8f\n", s/n }' - }
# count the number of lines in a CSV file
csv.count() { wc -l - | awk '{ print $1 }' }
# cat a CSV file
csv.cat() { cat $1 | csv.collapse }
# print column names, each on a separate line
csv.names() { csv.cat $1 | csv.limit 1 | csv.unquote | csv.split }
# built-in updater
csv.update() { wget -O ~/.csv-functions.sh 'https://projects.sisrlab.com/idm/dataset-guide/raw/master/csv-functions.sh?inline=false'; source ~/.csv-functions.sh }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment