#!/bin/bash # stats - calculate statistics from a whitespace-separated list of integers # # Computes count, total, average, max, min, range (max-min), and outliers # (1.5 * IQR method) from stdin input # # Usage: # command | stats [-n] # stats -h _stats() ( local SCRIPT_NAME; SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")" case "${BASH_SOURCE[0]}" in /dev/*|/proc/*) SCRIPT_NAME="" ;; esac case "$SCRIPT_NAME" in ""|bash|sh|zsh|dash) SCRIPT_NAME="stats" ;; esac _error() { echo "[ERR][$SCRIPT_NAME] $*" >&2; } _show_help() { local s; [ -t 1 ] && s=$'\033[4m' local r; [ -t 1 ] && r=$'\033[24m' echo "NAME" echo " $SCRIPT_NAME - calculate statistics from a list of integers" echo "SYNOPSIS" echo " ${s}command${r} | $SCRIPT_NAME [-n]" echo " $SCRIPT_NAME -h" echo "DESCRIPTION" echo " Reads whitespace-separated integers from stdin and prints count, total," echo " average, range (min to max), and outliers (1.5 * IQR method). Input may" echo " be newline-, space-, or tab-delimited (or any mix)." echo "OPTIONS" echo " -n Print the dataset before statistics" echo " -h, --help Show this help message" echo "EXIT STATUS" echo " 0 Success" echo " 2 Usage error (non-integer input)" echo " 3 Dependency error (bc missing)" echo "DEPENDENCIES" echo " bc, sort" } case "${1-}" in -h|--help) _show_help; return 0 ;; esac if ! command -v bc >/dev/null 2>&1; then _error "bc is required" return 3 fi # Read a list of whitespace-separated integers from stdin (newlines, spaces, tabs all OK) local -a numbers local line while IFS= read -r line || [ -n "$line" ]; do [ -n "$line" ] && numbers+=("$line") done < <(tr -s '[:space:]' '\n') # Check for any non-digits (which would most likely be a decimal point) - allow spaces as the numbers are concatenated with spaces grep -q '[^0-9 ]' <<<"${numbers[*]}" && { _error "Integers only"; return 2; } # If -n is provided, list the dataset first [ "$1" = "-n" ] && printf '%s, ' "${numbers[@]}" | sed 's/, $/\n/' # Sort the numbers in ascending order (for outlier calculation, but it gets us min and max too) local -a sorted while IFS= read -r line || [ -n "$line" ]; do sorted+=("$line"); done < <(printf '%s\n' "${numbers[@]}" | sort -n) #mapfile -t sorted <<< "$(printf '%s\n' "${numbers[@]}" | sort -n)" #sorted=($(printf '%s\n' "${numbers[@]}" | sort -n)) # Find outliers via Interquartile Range (IQR) # Calculate quartiles local -i n=${#sorted[@]} local -i Q1="${sorted[$((n / 4))]}" local -i Q3="${sorted[$((n * 3 / 4))]}" # Calculate the IQR local -i IQR=$((Q3 - Q1)) # Define lower and upper bounds for outliers local -i lower_bound; lower_bound="$(printf '%.0f' "$(bc <<< "$Q1 - 1.5 * $IQR")")" local -i upper_bound; upper_bound="$(printf '%.0f' "$(bc <<< "$Q3 + 1.5 * $IQR")")" # Find the position of the first and last data points within the bounds local -i lower_index=0 local -i upper_index=$((n - 1)) while ((sorted[lower_index] < lower_bound)); do ((lower_index++)) done while ((sorted[upper_index] > upper_bound)); do ((upper_index--)) done # Collect lower and upper outliers local -a outliers=("${sorted[@]:0:$lower_index}" "${sorted[@]:$((upper_index + 1))}") local percent_outliers; percent_outliers="$(printf '%.1f%%' "$(bc <<< "scale=4; ${#outliers[@]} / ${#numbers[@]} * 100")")" # Get total, min, and max local -i total; total=$(printf '%s+' "${sorted[@]}" | sed 's/+$//' | bc) local -i min="${sorted[0]}" local -i max="${sorted[$((${#sorted[@]} - 1))]}" local -i range=$((max - min)) # Print results echo "Count: ${#numbers[@]}" echo "Total: $total" echo "Average: $(bc <<< "$total / ${#numbers[@]}")" echo "Range: $range ($min to $max)" echo "Outliers: ${outliers[*]}" echo " ${#outliers[@]} of ${#numbers[@]} ($percent_outliers)" ) _stats "$@" __stats_rc=$? unset -f _stats if [ -n "${BASH_SOURCE[0]}" ] && [ "${BASH_SOURCE[0]}" != "$0" ]; then eval "unset __stats_rc; return $__stats_rc" fi eval "unset __stats_rc; exit $__stats_rc"