HEX
Server: LiteSpeed
System: Linux s3604.bom1.stableserver.net 4.18.0-513.11.1.lve.el8.x86_64 #1 SMP Thu Jan 18 16:21:02 UTC 2024 x86_64
User: dmstechonline (1480)
PHP: 7.4.33
Disabled: NONE
Upload Files
File: //lib64/nagios/plugins/check_a2_zpool.sh
#!/bin/bash
# Ref - BFENG-667 - A script to automatically clear pool errors if no data error/corruption in the pool
zfs_disable_clear="/etc/.jbk_zpool_clear_disabled"

if [ -e "${zfs_disable_clear}" ]; then
  echo "zfs pool monitoring is disabled currently"
  exit 1
fi

check_zpool() {
  zfs_state=$(zpool status | grep state:|awk '{print $NF}')
  zfs_data_errors=$(zpool status | grep "data errors" | awk '{print $2}')
  zfs_clear_count_file="/etc/.jbk_zpool_clear_count"
  zfs_cleared_count=$(cat ${zfs_clear_count_file} 2>/dev/null)

  if [ ! -e "${zfs_clear_count_file}" ]; then
    echo 0 >${zfs_clear_count_file}
  fi

  if [ "${zfs_state}" == "DEGRADED" ]; then
    if [ "${zfs_data_errors}" -gt 0 ]; then
      echo "zfs pool is in ${zfs_state} state and has ${zfs_data_errors} data errors"
      exit 2
    else
      zfs_cleared_count=$((zfs_cleared_count+1))
      echo "${zfs_cleared_count}" > "${zfs_clear_count_file}"
      if [ "${zfs_cleared_count}" -gt 5 ]; then
        echo "zfs pool is in ${zfs_state} state and we have cleared it ${zfs_cleared_count} times"
        exit 2
      else
        echo "zfs pool is in ${zfs_state} state and we have cleared it ${zfs_cleared_count} times"
        zpool clear backups
        exit 1
      fi
    fi
  else
    echo "zfs pool backups is healthy"
    exit 0
  fi
}
check_zpool