File: //lib64/nagios/plugins/check_nic_errors
#!/bin/bash
# Usage: ./check_nic_errors [-w WARNING] [-c CRITICAL] [-e EXCLUDE]
#Exclude is used if you want to exclude any of the ifaces, note that virtual ones are already excluded.
WARNING_THRESHOLD=0
CRITICAL_THRESHOLD=0
EXCLUDE_INTERFACES=""
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
while getopts "w:c:e:h" opt; do
case $opt in
w) WARNING_THRESHOLD=$OPTARG ;;
c) CRITICAL_THRESHOLD=$OPTARG ;;
e) EXCLUDE_INTERFACES=$OPTARG ;;
h)
echo "Usage: $0 [-w WARNING] [-c CRITICAL] [-e EXCLUDE]"
echo " -w WARNING Warning threshold for error count (default: 10)"
echo " -c CRITICAL Critical threshold for error count (default: 100)"
echo " -e EXCLUDE Comma-separated list of interfaces to exclude (e.g., lo,docker0)"
exit $STATE_OK
;;
\?)
echo "Invalid option: -$OPTARG"
exit $STATE_UNKNOWN
;;
esac
done
is_excluded() {
local iface=$1
if [ -z "$EXCLUDE_INTERFACES" ]; then
return 1
fi
IFS=',' read -ra EXCLUDED <<< "$EXCLUDE_INTERFACES"
for excl in "${EXCLUDED[@]}"; do
if [ "$iface" = "$excl" ]; then
return 0
fi
done
return 1
}
if [ ! -f /proc/net/dev ]; then
echo "UNKNOWN - /proc/net/dev not found"
exit $STATE_UNKNOWN
fi
exit_code=$STATE_OK
output_msg="OK - All NICs healthy"
problem_nics=()
perfdata=""
while IFS= read -r line; do
if [[ $line =~ ^[[:space:]]*Inter- ]] || [[ $line =~ ^[[:space:]]*face ]]; then
continue
fi
if [[ $line =~ ^[[:space:]]*([^:]+):[[:space:]]*(.+)$ ]]; then
iface="${BASH_REMATCH[1]}"
stats="${BASH_REMATCH[2]}"
if [ "$iface" = "lo" ] || is_excluded "$iface"; then
continue
fi
if [[ $iface =~ ^(bond|vmbr|vlan|tun|tap|veth|docker|br-|virbr) ]] || [[ $iface =~ \. ]]; then
continue
fi
if [ ! -d "/sys/class/net/$iface" ]; then
continue
fi
operstate=$(cat "/sys/class/net/$iface/operstate" 2>/dev/null)
if [ "$operstate" != "up" ]; then
continue
fi
read -r rx_bytes rx_packets rx_errs rx_drop rx_fifo rx_frame rx_compressed rx_multicast \
tx_bytes tx_packets tx_errs tx_drop tx_fifo tx_colls tx_carrier tx_compressed \
<<< "$stats"
total_rx_errors=$((rx_errs + rx_fifo + rx_frame))
total_tx_errors=$((tx_errs + tx_fifo + tx_carrier))
total_errors=$((total_rx_errors + total_tx_errors))
perfdata="${perfdata}'${iface}_rx_errors'=${rx_errs} '${iface}_rx_fifo'=${rx_fifo} '${iface}_rx_frame'=${rx_frame} "
perfdata="${perfdata}'${iface}_tx_errors'=${tx_errs} '${iface}_tx_fifo'=${tx_fifo} '${iface}_tx_carrier'=${tx_carrier} "
if [ $total_errors -ge $CRITICAL_THRESHOLD ]; then
problem_nics+=("$iface: CRITICAL (RX_errs:$rx_errs RX_fifo:$rx_fifo RX_frame:$rx_frame TX_errs:$tx_errs TX_fifo:$tx_fifo TX_carrier:$tx_carrier)")
exit_code=$STATE_CRITICAL
elif [ $total_errors -ge $WARNING_THRESHOLD ] && [ $exit_code -lt $STATE_CRITICAL ]; then
problem_nics+=("$iface: WARNING (RX_errs:$rx_errs RX_fifo:$rx_fifo RX_frame:$rx_frame TX_errs:$tx_errs TX_fifo:$tx_fifo TX_carrier:$tx_carrier)")
exit_code=$STATE_WARNING
fi
fi
done < /proc/net/dev
if [ ${#problem_nics[@]} -gt 0 ]; then
if [ $exit_code -eq $STATE_CRITICAL ]; then
output_msg="CRITICAL - NIC errors detected: "
else
output_msg="WARNING - NIC errors detected: "
fi
for ((i=0; i<${#problem_nics[@]}; i++)); do
if [ $i -eq 0 ]; then
output_msg="${output_msg}${problem_nics[$i]}"
else
output_msg="${output_msg}; ${problem_nics[$i]}"
fi
done
fi
echo "${output_msg} | ${perfdata}"
exit $exit_code