Skip to content

System Health Dashboard (BigBlack)

Source: Notion | Last edited: 2024-11-06 | ID: 1362d2dc-3ef...


/home/tca/scripts/system_health_dashboard.sh

#!/bin/bash
export TERM=xterm-256color
# Define required packages and their minimum versions (if needed)
declare -A REQUIRED_PACKAGES=(
["neofetch"]=""
["nvidia-smi"]=""
["duf"]=""
["logrotate"]=""
["docker"]=""
["nvme-cli"]=""
["smartmontools"]=""
["fwupd"]=""
["sysstat"]=""
["netplan.io"]=""
["htop"]=""
)
echocolor() {
echo -e "$@"
}
define_colors() {
RED=$'\e[0;31m' YELLOW=$'\e[1;33m' GREEN=$'\e[0;32m' BLUE=$'\e[0;34m' CYAN=$'\e[0;36m' NC=$'\e[0m' BOLD=$'\e[1m' DIM=$'\e[2m'
}
check_command() {
local cmd=$1
command -v "$cmd" >/dev/null 2>&1
}
install_packages() {
local packages=("$@")
for pkg in "${packages[@]}"; do
if ! sudo apt-get install -y "$pkg" >/dev/null 2>&1; then
echocolor "${RED}Failed to install ${pkg}${NC}"
exit 1
fi
done
}
check_and_install_packages() {
local missing_packages=()
for pkg in "${!REQUIRED_PACKAGES[@]}"; do
if ! check_command "$pkg"; then
case "$pkg" in
"nvidia-smi")
if ! lspci | grep -i nvidia >/dev/null 2>&1; then
continue
fi
;;
*)
missing_packages+=("$pkg")
;;
esac
fi
done
[ ${#missing_packages[@]} -gt 0 ] && install_packages "${missing_packages[@]}"
}
display_colored_message() {
local color=$1
local message=$2
echocolor "${color}${message}${NC}"
}
check_and_display_status() {
local type=$1
local items=("$@")
shift
for item in "${items[@]}"; do
local status=$(systemctl is-active "$item.$type" 2>/dev/null)
if [ "$status" = "active" ]; then
display_colored_message "$GREEN" " โ”œโ”€ โœ“ $item"
else
display_colored_message "$RED" " โ”œโ”€ โœ— $item ($status)"
fi
done
}
check_services_and_timers() {
local services=("prometheus-node-exporter" "nvidia-persistenced" "smartmontools" "sysstat" "bluetooth")
local timers=("nvme-monitor" "prometheus-node-exporter-nvme" "sysstat-collect")
display_colored_message "$BOLD$BLUE" "โ–ถ Service Status:"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
check_and_display_status "service" "${services[@]}"
echocolor " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
display_colored_message "$BOLD$BLUE" "โ–ถ Timer Status:"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
check_and_display_status "timer" "${timers[@]}"
echocolor " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
}
define_colors
check_and_install_packages > /dev/null
check_services_and_timers
if ! sudo -v; then
echo "Failed to obtain sudo privileges"
exit 1
fi
sudo -v
while true; do
sudo -n true
sleep 45
kill -0 "$$" || exit
done 2>/dev/null &
mkdir -p ~/backups ~/logs/nvme 2>/dev/null
clear
echocolor "${BOLD}${BLUE}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• System Health Dashboard โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}"
# Replace the current neofetch section with:
neofetch --stdout \
--off \
--cpu_temp C \
--memory_percent on \
| grep -E "^(OS|Kernel|Uptime|CPU|Memory|Disk):" \
| awk -v cyan="$CYAN" -v nc="$NC" '{
prefix = (NR == 6) ? " โ””โ”€ " : " โ”œโ”€ "
printf "%s%s%s%s\n", prefix, cyan, $0, nc
}'
echocolor "${BOLD}${BLUE}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• System Health Dashboard โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}"
echocolor
echocolor "${BOLD}${BLUE}โ–ถ System Statistics:${NC}"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
if command -v sar >/dev/null 2>&1; then
# Get last hour's CPU, memory, and I/O statistics
cpu_stats=$(sar -u 1 1 | tail -n1)
mem_stats=$(sar -r 1 1 | tail -n1)
io_stats=$(sar -b 1 1 | tail -n1)
echocolor " โ”œโ”€ CPU Usage (usr/sys/iowait): ${CYAN}$(echo "$cpu_stats" | awk '{printf "%.1f/%.1f/%.1f%%", $3, $5, $6}')${NC}"
echocolor " โ”œโ”€ Memory Pages (fault/maj): ${CYAN}$(echo "$mem_stats" | awk '{printf "%d/%d", $7, $8}')${NC}"
echocolor " โ””โ”€ I/O (read/write): ${CYAN}$(echo "$io_stats" | awk '{printf "%.1f/%.1f tps", $2, $3}')${NC}"
else
echocolor " โ””โ”€ ${YELLOW}sysstat not installed${NC}"
fi
echocolor
echocolor "${BOLD}${BLUE}โ–ถ Network Environment:${NC}"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
# Check Wi-Fi blacklist status
check_wifi_blacklist() {
local blacklist_files=(/etc/modprobe.d/blacklist*.conf)
local wifi_blacklisted=false
local blacklist_file=""
local blacklist_date=""
for file in "${blacklist_files[@]}"; do
if [ -f "$file" ] && grep -q "blacklist.*iwlwifi" "$file"; then
wifi_blacklisted=true
blacklist_file=$file
blacklist_date=$(stat -c %y "$file" | cut -d. -f1)
break
fi
done
if $wifi_blacklisted; then
echocolor " โ”œโ”€ Wi-Fi Status: ${GREEN}Disabled${NC}"
echocolor " โ”‚ โ”œโ”€ Blacklist: ${CYAN}$blacklist_file${NC}"
echocolor " โ”‚ โ””โ”€ Modified: ${CYAN}$blacklist_date${NC}"
else
if lsmod | grep -q "iwlwifi"; then
echocolor " โ”œโ”€ Wi-Fi Status: ${YELLOW}Active (not blacklisted)${NC}"
else
echocolor " โ”œโ”€ Wi-Fi Status: ${CYAN}Not loaded${NC}"
fi
fi
}
# Get active interfaces with their states
get_network_interfaces() {
local interfaces=$(ip -br link show | awk '{print $1, $2}')
echo "$interfaces" | while read -r iface state; do
if [[ "$iface" != "lo" && "$iface" != "docker"* && "$iface" != "br-"* && "$iface" != "veth"* ]]; then
local ip_addr=$(ip -br addr show "$iface" | awk '{print $3}')
local type=$(nmcli -g GENERAL.TYPE device show "$iface" 2>/dev/null || echo "unknown")
local speed=""
local mtu=$(ip link show "$iface" | grep -oP 'mtu \K\d+')
local rx_bytes=$(cat /sys/class/net/$iface/statistics/rx_bytes 2>/dev/null)
local tx_bytes=$(cat /sys/class/net/$iface/statistics/tx_bytes 2>/dev/null)
if [[ "$type" == "ethernet" ]]; then
speed=$(ethtool "$iface" 2>/dev/null | grep "Speed:" | awk '{print $2}')
fi
echocolor " โ”œโ”€ Interface: ${CYAN}$iface${NC}"
echocolor " โ”‚ โ”œโ”€ State: ${GREEN}$state${NC}"
[ ! -z "$ip_addr" ] && echocolor " โ”‚ โ”œโ”€ IP: ${CYAN}$ip_addr${NC}"
[ ! -z "$type" ] && echocolor " โ”‚ โ”œโ”€ Type: ${CYAN}$type${NC}"
[ ! -z "$mtu" ] && echocolor " โ”‚ โ”œโ”€ MTU: ${CYAN}$mtu${NC}"
[ ! -z "$speed" ] && echocolor " โ”‚ โ”œโ”€ Speed: ${CYAN}$speed${NC}"
if [ ! -z "$rx_bytes" ] && [ ! -z "$tx_bytes" ]; then
echocolor " โ”‚ โ””โ”€ Traffic: ${CYAN}โ†“$(numfmt --to=iec $rx_bytes)B โ†‘$(numfmt --to=iec $tx_bytes)B${NC}"
fi
fi
done
}
# Check DNS configuration
check_dns_config() {
local dns_servers=$(grep "nameserver" /etc/resolv.conf | awk '{print $2}')
local resolv_date=$(stat -c %y /etc/resolv.conf | cut -d. -f1)
if [ -n "$dns_servers" ]; then
echocolor " โ”œโ”€ DNS Configuration:"
echocolor " โ”‚ โ”œโ”€ Modified: ${CYAN}$resolv_date${NC}"
echo "$dns_servers" | while read -r server; do
if [[ "$server" == "127.0.0.53" ]]; then
echocolor " โ”‚ โ””โ”€ Server: ${CYAN}$server${NC} (systemd-resolved)"
else
echocolor " โ”‚ โ””โ”€ Server: ${CYAN}$server${NC}"
fi
done
else
echocolor " โ”œโ”€ ${YELLOW}No DNS servers configured${NC}"
fi
}
# Display network information
check_wifi_blacklist
get_network_interfaces
check_dns_config
echocolor " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
echocolor
echocolor "${BOLD}${BLUE}โ–ถ NVMe Health Status:${NC}"
if nvme_metrics=$(curl -s "http://localhost:9100/metrics" 2>/dev/null); then
temp=$(echo "$nvme_metrics" | grep "^nvme_temperature_celsius.*nvme0n1" | awk '{print int($2)}')
spare=$(echo "$nvme_metrics" | grep "^nvme_available_spare_ratio.*nvme0n1" | awk '{print int($2 * 100)}')
used=$(echo "$nvme_metrics" | grep "^nvme_percentage_used_ratio.*nvme0n1" | awk '{print int($2 * 100)}')
errors=$(echo "$nvme_metrics" | grep "^nvme_media_errors_total.*nvme0n1" | awk '{print int($2)}')
critical=$(echo "$nvme_metrics" | grep "^nvme_critical_warning.*nvme0n1" | awk '{print int($2)}')
if [ ! -z "$temp" ]; then
status="${GREEN}โœ“ Normal${NC}"
[ "$critical" -gt 0 ] && status="${RED}โš  CRITICAL${NC}"
[ "$temp" -ge 80 ] && [ "$temp" -lt 90 ] && status="${YELLOW}โšก Warning${NC}"
echocolor " โ”œโ”€ Status: $status"
echocolor " โ”œโ”€ Temperature: ${CYAN}${temp}ยฐC${NC}"
[ ! -z "$spare" ] && echocolor " โ”œโ”€ Available Spare: ${CYAN}${spare}%${NC}"
[ ! -z "$used" ] && echocolor " โ”œโ”€ Percentage Used: ${CYAN}${used}%${NC}"
[ ! -z "$errors" ] && echocolor " โ””โ”€ Media Errors: ${CYAN}${errors}${NC}"
else
echocolor " ${YELLOW}No temperature data available${NC}"
fi
else
echocolor " ${YELLOW}Unable to fetch NVMe metrics${NC}"
fi
echocolor
echocolor "${BOLD}${BLUE}โ–ถ System Load:${NC}"
uptime | awk -v cyan="$CYAN" -v nc="$NC" '{printf " โ””โ”€ Load Average: %s%.2f%s, %s%.2f%s, %s%.2f%s\n", cyan, $(NF-2), nc, cyan, $(NF-1), nc, cyan, $NF, nc}'
echocolor
echocolor "${BOLD}${BLUE}โ–ถ GPU Status:${NC}"
if command -v nvidia-smi &> /dev/null; then
driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits 2>/dev/null)
if [ ! -z "$driver_version" ]; then
echocolor " โ”œโ”€ Driver: ${CYAN}${driver_version}${NC}"
installed_pkg=$(dpkg -l | grep "^ii.*nvidia-driver-" | awk '{print $2}' | head -n1)
if [ ! -z "$installed_pkg" ]; then
if [[ "$installed_pkg" == *"nvidia-driver-550"* ]]; then
echocolor " โ”œโ”€ Status: ${GREEN}โœ“ Using recommended driver${NC}"
else
echocolor " โ”œโ”€ Status: ${YELLOW}Consider updating to driver 550${NC}"
fi
fi
fi
cuda_version=$(nvidia-smi -q | grep "CUDA Version" | awk '{print $NF}')
if [ ! -z "$cuda_version" ]; then
echocolor " โ”œโ”€ CUDA: ${CYAN}${cuda_version}${NC}"
fi
gpu_model=$(lspci | grep -i "VGA.*NVIDIA" | cut -d: -f3)
if [ ! -z "$gpu_model" ]; then
echocolor " โ”œโ”€ Model:${CYAN}${gpu_model}${NC}"
fi
gpu_info=$(nvidia-smi --query-gpu=temperature.gpu,utilization.gpu,memory.used,memory.total,power.draw,power.limit --format=csv,noheader,nounits 2>&1)
if [[ ! "$gpu_info" == *"Failed to initialize NVML"* ]] && [ ! -z "$gpu_info" ]; then
IFS=',' read -r temp util mem_used mem_total power_draw power_limit <<< "$gpu_info"
echocolor " โ”œโ”€ Temperature: ${CYAN}${temp}ยฐC${NC}"
echocolor " โ”œโ”€ Utilization: ${CYAN}${util}%${NC}"
echocolor " โ”œโ”€ Memory: ${CYAN}${mem_used}/${mem_total} MB${NC}"
echocolor " โ””โ”€ Power: ${CYAN}${power_draw}W / ${power_limit}W${NC}"
else
echocolor " โ””โ”€ ${YELLOW}GPU metrics unavailable${NC}"
fi
else
echocolor " โ””โ”€ ${YELLOW}NVIDIA tools not installed${NC}"
fi
echocolor
echocolor "${BOLD}${BLUE}โ–ถ Disk Space:${NC}"
if command -v duf >/dev/null 2>&1; then
# Preserve original duf output with all devices
duf --style unicode \
--output mountpoint,size,used,avail,usage,type,filesystem \
--sort mountpoint
else
df -h / | awk -v cyan="$CYAN" -v nc="$NC" 'NR==2 {printf " โ””โ”€ Usage: %s%s of %s (%s)%s\n", cyan, $3, $2, $5, nc}'
fi
echocolor
echocolor "${BOLD}${BLUE}โ–ถ Recent System Errors:${NC}"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
display_errors() {
local period=$1
local period_text=$2
local errors
if ! errors=$(journalctl -p err..emerg --since "$period hours ago" --no-pager 2>/dev/null); then
errors=$(sudo journalctl -p err..emerg --since "$period hours ago" --no-pager 2>/dev/null || echo "")
fi
local error_count=$(echo "$errors" | wc -l)
echocolor " ${BOLD}${period_text}${NC}"
if [ $error_count -eq 0 ]; then
echocolor " โ”œโ”€${GREEN}โœ“ No errors in the last $period hours${NC}"
echocolor " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
else
[ $error_count -gt 10 ] && echocolor " โ”œโ”€${YELLOW}โšก Found $error_count errors (grouped by type)${NC}"
echo "$errors" | awk -v dim="$DIM" -v nc="$NC" -v yellow="$YELLOW" '
{
# Create a normalized version of the line by removing PIDs and timestamps
normalized = $0
gsub(/\[[0-9]+\]/, "[PID]", normalized)
gsub(/[A-Z][a-z]+ [0-9]+ [0-9:]+/, "TIMESTAMP", normalized)
gsub(/[0-9]+/, "N", normalized) # Replace all numbers with N
if (!(normalized in first)) {
first[normalized] = $0
count[normalized] = 1
order[normalized] = ++total
} else {
count[normalized]++
}
}
END {
n = 0
while (n < 10 && n < total) {
max_count = 0
max_type = ""
for (type in count) {
if (count[type] > max_count) {
max_count = count[type]
max_type = type
}
}
prefix = (n == 9 || n == total-1) ? " โ””โ”€" : " โ”œโ”€"
if (max_count > 1) {
print prefix yellow "(" max_count " similar errors)" nc
print " โ”‚ " first[max_type]
} else {
print prefix first[max_type]
}
if (n < 9 && n < total-1) print " โ”‚"
delete count[max_type]
n++
}
print " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
}'
fi
}
display_errors 1 "Last hour"
display_errors 6 "Last 6 hours"
display_errors 12 "Last 12 hours"
display_errors 24 "Last 24 hours"
echocolor
echocolor "${BOLD}${BLUE}โ–ถ Maintenance Status:${NC}"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
# Add startup health checks
# Check systemd boot time
boot_time=$(systemd-analyze time 2>/dev/null | awk '{print $NF}')
if [[ "$boot_time" =~ ^[0-9]+\.[0-9]+s$ ]]; then
boot_seconds=$(echo "$boot_time" | sed 's/s//')
if (( $(echo "$boot_seconds > 30" | bc -l) )); then
echocolor " โ”œโ”€ ${YELLOW}โšก Slow boot time: ${boot_time}${NC}"
echocolor " โ”‚ ${DIM}Action: systemd-analyze blame${NC}"
fi
fi
# Check for failed units during boot
failed_units=$(systemctl list-units --state=failed --no-legend | wc -l)
if [ "$failed_units" -gt 0 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก $failed_units units failed during boot${NC}"
echocolor " โ”‚ ${DIM}Action: systemctl list-units --state=failed${NC}"
fi
# Check for masked units that might be important
masked_critical=$(systemctl list-unit-files --state=masked | grep -E 'network|systemd-modules-load|systemd-sysctl|systemd-timesyncd' | wc -l)
if [ "$masked_critical" -gt 0 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก Critical system units are masked${NC}"
echocolor " โ”‚ ${DIM}Action: systemctl list-unit-files --state=masked${NC}"
fi
# Check for kernel modules failing to load
failed_modules=$(sudo dmesg 2>/dev/null | grep -i "failed to load" | wc -l)
if [ "$failed_modules" -gt 0 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก $failed_modules kernel modules failed to load${NC}"
echocolor " โ”‚ ${DIM}Action: sudo dmesg | grep -i 'failed to load'${NC}"
fi
# Check systemd-journald status
journald_status=$(systemctl is-active systemd-journald)
if [ "$journald_status" != "active" ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก systemd-journald is not active${NC}"
echocolor " โ”‚ ${DIM}Action: sudo systemctl status systemd-journald${NC}"
fi
# Check system updates with action
if ! updates=$(apt-get -s upgrade 2>/dev/null | grep -P '^\d+ upgraded' | cut -d" " -f1); then
updates=$(sudo apt-get -s upgrade 2>/dev/null | grep -P '^\d+ upgraded' | cut -d" " -f1)
fi
if [ "$updates" -gt 0 ] 2>/dev/null; then
echocolor " โ”œโ”€ ${YELLOW}โšก $updates system updates available${NC}"
echocolor " โ”‚ ${DIM}Action: sudo apt update && sudo apt upgrade${NC}"
fi
# Check system reboot requirement with action
if [ -f /var/run/reboot-required ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก System restart required${NC}"
echocolor " โ”‚ ${DIM}Action: sudo reboot${NC}"
fi
# Check disk space warnings with action
disk_usage=$(df / | awk 'NR==2 {print $5}' | sed 's/%//')
if [ "$disk_usage" -gt 85 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก Root partition usage above 85%${NC}"
echocolor " โ”‚ ${DIM}Action: sudo apt autoremove && sudo apt clean${NC}"
echocolor " โ”‚ ${DIM} du -sh /* | sort -hr | head -n 10${NC}"
fi
# Check SMART status
if command -v smartctl >/dev/null 2>&1; then
smart_status=$(sudo smartctl -H /dev/nvme0n1 2>/dev/null | grep "SMART overall-health")
if [[ "$smart_status" != *"PASSED"* ]]; then
echocolor " โ”œโ”€ ${YELLOW}โšก SMART health check failed${NC}"
echocolor " โ”‚ ${DIM}Action: sudo smartctl -a /dev/nvme0n1${NC}"
fi
fi
# Check firmware updates
if command -v fwupdmgr >/dev/null 2>&1; then
if ! fwupdmgr refresh >/dev/null 2>&1 || \
! fwupdmgr get-updates 2>&1 | grep -q "No updates available"; then
fw_updates=$(fwupdmgr get-updates 2>&1)
if [[ "$fw_updates" =~ "Updates available" ]]; then
echocolor " โ”œโ”€ ${YELLOW}โšก Firmware updates available${NC}"
echocolor " โ”‚ ${DIM}Action: sudo fwupdmgr update${NC}"
fi
fi
fi
# Check system activity
if [ -f "/var/log/sysstat/sa$(date +%d)" ]; then
cpu_usage=$(sar -u 1 1 | tail -n1 | awk '{print 100-$NF}')
if [ "$(echo "$cpu_usage > 90" | bc)" -eq 1 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก High CPU usage detected in system activity logs${NC}"
echocolor " โ”‚ ${DIM}Action: sar -u -f /var/log/sysstat/sa$(date +%d)${NC}"
fi
fi
# Check old logs with action
old_logs=$(find ~/logs/nvme -name "nvme_health_*.log" -mtime +30 2>/dev/null | wc -l)
if [ $old_logs -gt 0 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก $old_logs log files older than 30 days${NC}"
echocolor " โ”‚ ${DIM}Action: find ~/logs/nvme -name \"nvme_health_*.log\" -mtime +30 -delete${NC}"
fi
# Check backup status with action
if [ ! -f ~/backups/scripts_$(date +%Y%m%d).tar.gz ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก No script backup found for today${NC}"
echocolor " โ”‚ ${DIM}Action: tar czf ~/backups/scripts_$(date +%Y%m%d).tar.gz ~/scripts/${NC}"
fi
# Check systemd failed services with action
failed_services=$(systemctl --failed --no-pager | grep "loaded units listed" | awk '{print $1}')
if [ "$failed_services" -gt 0 ] 2>/dev/null; then
echocolor " โ”œโ”€ ${YELLOW}โšก $failed_services systemd services failed${NC}"
echocolor " โ”‚ ${DIM}Action: systemctl --failed${NC}"
echocolor " โ”‚ ${DIM} sudo systemctl reset-failed${NC}ew!xT44U"
fi
# Check for zombie processes with action
zombies=$(ps aux | awk '{print $8}' | grep -c '^Z')
if [ "$zombies" -gt 0 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก $zombies zombie processes detected${NC}"
echocolor " โ”‚ ${DIM}Action: ps aux | grep Z${NC}"
echocolor " โ”‚ ${DIM} sudo kill -9 <PID>${NC}"
fi
# Check system load with action
load_5min=$(uptime | awk -F'load average:' '{print $2}' | awk -F, '{print $2}' | tr -d ' ')
cpu_cores=$(nproc)
bc_result=$(echo "$load_5min > $cpu_cores * 0.8" | bc -l)
if [ "$bc_result" -eq 1 ] 2>/dev/null; then
echocolor " โ”œโ”€ ${YELLOW}โšก High system load detected${NC}"
echocolor " โ”‚ ${DIM}Action: htop${NC}"
echocolor " โ”‚ ${DIM} sudo nice -n 19 <process>${NC}"
fi
# Check swap usage with action
swap_used=$(free | awk '/Swap/ {print $3}')
swap_total=$(free | awk '/Swap/ {print $2}')
if [ "$swap_total" -gt 0 ] 2>/dev/null; then
swap_percent=$((swap_used * 100 / swap_total))
if [ "$swap_percent" -gt 80 ]; then
echocolor " โ”œโ”€ ${YELLOW}โšก Swap usage above 80%${NC}"
echocolor " โ”‚ ${DIM}Action: sudo swapoff -a && sudo swapon -a${NC}"
fi
fi
# If no issues found
if [ ! -f /var/run/reboot-required ] && \
[ "$updates" -eq 0 ] 2>/dev/null && \
[ "$disk_usage" -le 85 ] && \
[ $old_logs -eq 0 ] && \
[ -f ~/backups/scripts_$(date +%Y%m%d).tar.gz ] && \
[ "$failed_services" -eq 0 ] 2>/dev/null && \
[ "$zombies" -eq 0 ] && \
[ "$bc_result" -eq 0 ] 2>/dev/null && \
[ "$swap_percent" -le 80 ] 2>/dev/null && \
[[ "$smart_status" == *"PASSED"* ]] && \
[ "$(echo "$cpu_usage <= 90" | bc)" -eq 1 ]; then
echocolor " โ”œโ”€ ${GREEN}โœ“ All maintenance checks passed${NC}"
fi
echocolor " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
echocolor "${BOLD}${BLUE}โ–ถ Log Maintenance:${NC}"
echocolor " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
# Get current user and group
current_user=$(whoami)
current_group=$(id -gn)
# Create directories if they don't exist
mkdir -p ~/logs/nvme 2>/dev/null
# Determine system log path
if [ -f "/var/log/syslog" ]; then
syslog_path="/var/log/syslog"
elif [ -f "/var/log/messages" ]; then
syslog_path="/var/log/messages"
else
syslog_path="/var/log/journal"
fi
# Create temporary logrotate config with proper permissions and path checks
sudo tee /etc/logrotate.d/system_health.conf >/dev/null <<EOF
$syslog_path {
maxsize 5G
rotate 7
daily
compress
delaycompress
notifempty
su root root
missingok
}
/var/lib/docker/containers/*/*.log {
maxsize 1G
rotate 3
daily
compress
delaycompress
notifempty
copytruncate
su root root
missingok
}
$HOME/logs/nvme/nvme_health_*.log {
maxage 30
rotate 5
monthly
compress
delaycompress
notifempty
su $current_user $current_group
missingok
}
EOF
# Force rotation and show status
if sudo logrotate -f /etc/logrotate.d/system_health.conf; then
echocolor " โ”œโ”€ ${GREEN}โœ“ Logs rotated successfully${NC}"
# Fix size reporting by summing up all related files
syslog_size=$(sudo du -ch $syslog_path* 2>/dev/null | grep 'total$' | cut -f1)
docker_size=$(sudo du -ch /var/lib/docker/containers 2>/dev/null | grep 'total$' | cut -f1)
nvme_size=$(du -ch ~/logs/nvme 2>/dev/null | grep 'total$' | cut -f1)
echocolor " โ”œโ”€ System Logs: ${CYAN}${syslog_size:-0B}${NC}"
echocolor " โ”œโ”€ Docker Logs: ${CYAN}${docker_size:-0B}${NC}"
echocolor " โ””โ”€ NVMe Logs: ${CYAN}${nvme_size:-0B}${NC}"
else
echocolor " โ””โ”€ ${RED}โœ— Log rotation failed${NC}"
fi
# Cleanup temporary config
sudo rm -f /etc/logrotate.d/system_health.conf
echocolor " โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"
echocolor "${BOLD}${BLUE}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• End of Dashboard โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}"
echocolor
echocolor "${DIM}Tools Used:${NC}"
for pkg in "${!REQUIRED_PACKAGES[@]}"; do
version=""
case "$pkg" in
"neofetch") version=$(neofetch --version | head -n1 | awk '{print $2}');;
"nvidia-smi") version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null);;
"duf") version=$(duf --version 2>&1 | head -n1 | awk '{print $2}');;
"logrotate") version=$(logrotate --version | head -n1 | awk '{print $2}');;
"docker") version=$(docker --version | awk '{print $3}' | tr -d ',');;
"nvme-cli") version=$(nvme --version 2>/dev/null | head -n1 | awk '{print $3}');;
"smartmontools") version=$(smartctl --version | head -n1 | awk '{print $2}');;
"fwupd") version=$(fwupdmgr --version 2>/dev/null | head -n1 | awk '{print $2}');;
"sysstat") version=$(sar -V 2>&1 | head -n1 | awk '{print $4}');;
"htop") version=$(htop --version | head -n1 | awk '{print $2}');;
esac
[ -n "$version" ] && echocolor "${DIM}โ€ข ${pkg} v${version}${NC}"
done