Commit 531d502b authored by Ian Dennis Miller's avatar Ian Dennis Miller

significant refactor

parent 1a5ae1db
......@@ -8,22 +8,28 @@ CSV_SRC="$HOME/.csv-functions.sh"
CSV_URL='https://projects.sisrlab.com/idm/csv-unix/raw/master/csv-functions.sh'
# collapse fields containing newlines
alias csv.collapse='perl -pe "s/\\\\\n/ /" -'
csv.collapse() {
perl -pe "s/\\\\\n/ /" -
}
# remove all quotes from a CSV file
alias csv.unquote='sed "s/\"//g"'
# remove all quotes from a CSV
csv.unquote() {
sed "s/\"//g" -
}
# split a comma-delimited row onto separate lines
alias csv.split='sed -e "s/,/\\n/g"'
csv.split() {
sed -e "s/,/\\n/g" -
}
# produce awk expression suitable for selecting the requested columns
_awk_select_column() {
_csv_awk_select() {
awk 'BEGIN {for (i=1; i < ARGC; i++) printf "$" ARGV[i] ", "}' $@ | rev | cut -c 3- | rev
}
# select a single column from a CSV by its index, starting at 1
csv.select_column() {
awk -F, "BEGIN {OFS = \",\"} { print `_awk_select_column $@` }"
csv.select() {
awk -F, "BEGIN {OFS = \",\"} { print `_csv_awk_select $@` }"
}
# limit number of rows returned
......@@ -46,8 +52,8 @@ csv.count() {
wc -l - | awk '{ print $1 }'
}
# cat a CSV file, even if it is gzipped
csv.cat() {
# print a CSV file, even if it is gzipped
csv.read() {
if [[ -z `file $1 |grep gzip` ]];
then cat $1 | csv.collapse;
else zcat $1 | csv.collapse;
......@@ -56,7 +62,7 @@ csv.cat() {
# For a CSV with a header row, print column names, each on a separate line
csv.names() {
csv.cat $1 | csv.limit 1 | csv.unquote | csv.split
csv.read $1 | csv.limit 1 | csv.unquote | csv.split
}
# built-in updater
......@@ -71,8 +77,89 @@ csv.install() {
fi
}
# obtain a directory listing as CSV
csv.ls() {
ls -l $1 | awk -v OFS="," '$1=$1' | grep -v "^total,"
}
# check whether csv.[whatever] exists
_csv_function_exists() {
[[ ! -z `which csv.$1 | grep -v "not found"` ]]
}
# how many seconds does it take to count the lines in a file?
csv.benchmark() {
{ time csv.read $1 | csv.count; } 2>&1 | grep total | awk '{ print $7 }'
}
# generic CSV runner
csv() {
_csv_function_exists $1 && csv.$1 "${@:2}"
}
# obtain all the commands and their short descriptions
_csv_commands() {
cat <<EOF
read [file] Read a CSV file. May be gzipped.
select [cols] Select one or more columns by index, starting with 1.
names [file] For CSV files with a header row, obtain the field names
limit [num] Restrict the number of rows returned.
ls [path] Obtain a directory listing as CSV
benchmark [file] How many seconds does it take to count the lines in a file?
collapse (piped) Collapse CSV rows that span multiple lines.
unquote (piped) Remove quotes that surround fields.
split (piped) Split a row into individual fields, producing one field per line.
sum (piped) Calculate the sum of a column containing numbers.
mean (piped) Calculate the mean of a column containing numbers.
count (piped) Count the number of rows returned.
version Print the version.
help Print this help message.
EOF
}
# obtain a list of just the command names
_csv_commands_short() {
_csv_commands | awk '{print $1}'
}
# print the version
csv.version() {
echo "CSV UNIX 0.1"
}
# print help
csv.help() {
echo "csv-unix 0.1\n\nAvailable commands:\n"
grep '^csv\.' ~/.csv-functions.sh | sed 's/alias //' | sed 's/=.*//' | sed 's/(.*//'
csv.version
cat <<EOF
Usage:
csv [command] [options]
Available commands:
EOF
_csv_commands
echo "\nhttps://projects.sisrlab.com/idm/csv-unix"
}
_csv_completion()
{
local cur prev opts base
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
case "${prev}" in
read|benchmark|names)
local names=$(ls -1 *.csv *.csv.gz)
COMPREPLY=( $(compgen -W "${names}" -- ${cur}) )
return 0
;;
esac
COMPREPLY=( $(compgen -W "$(_csv_commands_short)" -- ${cur}) )
return 0
}
complete -F _csv_completion csv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment