Simple Statistical Functions in Shell
Here’s a small collection of basic statistical functions you can use in your shell scripts. This may come in handy when analyzing disk space, system performance, etc.
Average
avg() { sort -n | awk ' BEGIN { c = 0; sum = 0; } $1 ~ /^[0-9]*(\.[0-9]*)?$/ { a[c++] = $1; sum += $1; } END { ave = sum / c; printf("%.0f\n",ave); }' }
Minimum
min() { sort -n | awk ' BEGIN { c = 0; sum = 0; } $1 ~ /^[0-9]*(\.[0-9]*)?$/ { a[c++] = $1; sum += $1; } END { c=asort(a); print a[1]; }' }
Maximum
max() { sort -n | awk ' BEGIN { c = 0; sum = 0; } $1 ~ /^[0-9]*(\.[0-9]*)?$/ { a[c++] = $1; sum += $1; } END { c=asort(a); print a[c]; }' }
Median
median() { sort -n | awk ' BEGIN { c = 0; sum = 0; } $1 ~ /^[0-9]*(\.[0-9]*)?$/ { a[c++] = $1; sum += $1; } END { ave = sum / c; if( (c % 2) == 1 ) { median = a[ int(c/2) ]; } else { median = ( a[c/2] + a[c/2-1] ) / 2; } OFS="\t"; print median; }' }
Sum
sum() { sort -n | awk ' BEGIN { c = 0; sum = 0; } $1 ~ /^[0-9]*(\.[0-9]*)?$/ { a[c++] = $1; sum += $1; } END { print sum; }' }
Count
count() { sort -n | awk ' BEGIN { c = 0; sum = 0; } $1 ~ /^[0-9]*(\.[0-9]*)?$/ { a[c++] = $1; sum += $1; } END { print c; }' }
Maximum difference
maxdelta() { i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l) for i in `seq 0 $array_ac` do array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l) echo ${array_d[$i]} done | max }
Minimum difference
mindelta() { i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l) for i in `seq 0 $array_ac` do array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l) echo ${array_d[$i]} done | min }
Average difference
avgdelta() { i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l) for i in `seq 0 $array_ac` do array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l) echo ${array_d[$i]} done | avg }
Standard Deviation
stddev() { awk '{sum+=$1; sumsq+=$1*$1} END {printf("%.2f\n", sqrt(sumsq/NR - (sum/NR)**2))}' }
Linear Regression
fitline() { awk ' BEGIN { FS = "[ ,\t]+" } NF == 2 { x_sum += $1 y_sum += $2 xy_sum += $1*$2 x2_sum += $1*$1 num += 1 x[NR] = $1 y[NR] = $2 } END { mean_x = x_sum / num mean_y = y_sum / num mean_xy = xy_sum / num mean_x2 = x2_sum / num slope = (mean_xy - (mean_x*mean_y)) / (mean_x2 - (mean_x*mean_x)) inter = mean_y - slope * mean_x for (i = num; i > 0; i--) { ss_total += (y[i] - mean_y)**2 ss_residual += (y[i] - (slope * x[i] + inter))**2 } r2 = 1 - (ss_residual / ss_total) printf("Slope : %.2f\n", slope) printf("Intercept : %.2f\n", inter) printf("R-Squared : %.2f\n", r2) }' }
Greatest common denominator
gcd() { dividend= divisor= remainder=1 until [ "${remainder}" -eq 0 ] do let "remainder = $dividend % $divisor" dividend=${divisor} divisor=${remainder} done echo "${dividend}" }
Least common multiple
lcm() { count=$# k=1 a= while [[ $# -gt 0 ]] do if [[ $1 > $2 ]] then a= fi b[${k}]=$1 k=${k}+1 shift done lcm=0 for (( c=9; c>${count}; c-- )) do b[${c}]=1 done for (( i=${a}; i>=${a}; i++ )) do if (( $(($i % ${b[1]})) == 0 && $(($i % ${b[2]})) == 0 && $(($i % ${b[3]})) == 0 && $(($i % ${b[4]})) == 0 && $(($i % ${b[5]})) == 0 && $(($i % ${b[6]})) == 0 && $(($i % ${b[7]})) == 0 && $(($i % ${b[8]})) == 0 && $(($i % ${b[9]})) == 0 )) then echo "${i}" break fi done }