#!/usr/bin/perl -w

# check _physical_ disk status of disks on HP smart array controllers
# requires hpacucli
#
# does _not_ check raid status.  use arrayprobe for that.

# Copyright (c) 2008,2009,2010,2011 Peter Palfrader <peter@palfrader.org>
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

use strict;
use English;
use Getopt::Long;

# nagios exit codes
my %CODE = (
	'OK'            => 0,
	'WARNING'       => 1,
	'CRITICAL'      => 2,
	'UNKNOWN'       => 3
);

my $EXITCODE = 'OK';

$SIG{'__DIE__'} = sub {
	print @_;
	exit $CODE{'UNKNOWN'};
};

sub runcmd($) {
	my ($cmd) = @_;
	$cmd = "sudo hpacucli $cmd";
	open(FH, $cmd."|") or die ("Cannot run $cmd: $!");
	my @lines = <FH>;
	close FH;
	die ("no results from $cmd\n") if (scalar @lines == 0);
	return \@lines;
}

sub record($) {
	my ($newexit) = @_;
	die "code $newexit not defined\n" unless defined $CODE{$newexit};

	if ($CODE{$newexit} > $CODE{$EXITCODE}) {
		$EXITCODE = $newexit;
	};
}

my $usage = "$PROGRAM_NAME: Usage: $PROGRAM_NAME [--no-battery] [--ignore-controller=<regex>] [--no-controller-ok] [--ignore-transfer-speed=<pd> [--ignore-transfer-speed=<pd> ...]]\n";
my $params;
Getopt::Long::Configure('bundling');
if (!GetOptions (
	'--help'                      => \$params->{'help'},
	'--no-battery'                => \$params->{'no-battery'},
	'--no-controller-ok'          => \$params->{'no-controller-ok'},
	'--ignore-controller=s'       => \$params->{'ignore-controller'},
	'--ignore-transfer-speed=s@'  => \$params->{'ignore-transfer-speed'},
	)) {
	die ($usage);
};
if ($params->{'help'}) {
	print $usage;
	exit (0);
};
die ($usage) unless (scalar @ARGV == 0);

my $ctrlallshow = runcmd("controller all show");
my @controllers;
for (@$ctrlallshow) {
	chomp;
	next if /^$/;
	next if ($params->{'ignore-controller'} && /$params->{'ignore-controller'}/);
	if (/in Slot ([0-9a-z]+)/) {
		push @controllers, $1;
		next;
	};
	die ("Cannot read line '$_' gotten from hpacucli controller all show\n");
};

if (scalar @controllers == 0) {
	if ($params->{'no-controller-ok'}) {
		print "No smartarray controllers found with hpacucli\n";
		exit $CODE{'OK'}
	} else {
		print "UNKNOWN: No smartarray controllers found with hpacucli\n";
		exit $CODE{'UNKNOWN'}
	}
};

my @resultstr;

for my $slot (sort @controllers) {
	my @drives;
	my $nodrives = 0;
	my %status;

	my $ldallshow = runcmd("controller slot=$slot ld all show");
	my @logicaldrives;
	for (@$ldallshow) {
		chomp;
		next if /^$/;
		next if (/^\S.*in Slot $slot/);
		next if /^ *array [A-Z]$/;
		if (/logicaldrive ([0-9a-z]+)/) {
			push @logicaldrives, $1;
			next;
		} elsif (/^Error: The specified device does not have any logical drives.$/) {
			$nodrives = 1;
		} else {
			die ("Cannot read line '$_' gotten from hpacucli controller slot = $slot logicaldrive all show\n");
		}
	};

	# check logicaldrives
	for my $logicaldrive (sort @logicaldrives) {
		my $lds = runcmd("controller slot=$slot ld $logicaldrive show");
		for (@$lds) {
			chomp;
			next if /^$/;
			if (/^ *Parity Initialization Status: (Initialization Completed|Initialization Failed|Rebuilding)$/) {
				my $status = $1;
				if ($status eq 'Initialization Completed') {
					push @{$status{'OK'}}, "Parity LD$logicaldrive";
				} elsif ($status eq 'Rebuilding') {
					push @{$status{'Failed'}}, "Parity LD$logicaldrive";
					record('WARNING');
				} elsif ($status eq 'Initialization Failed') {
					push @{$status{'Failed'}}, "Parity LD$logicaldrive";
					record('CRITICAL');
				} else {
					record('UNKNOWN');
				}
			}
		}
	}

	if (!$nodrives && scalar @logicaldrives == 0) {
		push @resultstr, "Slot $slot: unexpectedly, found no logical drives in list.";
		record('UNKNOWN');
	} elsif ($nodrives && scalar keys %status > 0) {
		push @resultstr, "Slot $slot: have no logical drives but status results?";
		record('UNKNOWN');
		next;
	} elsif ($nodrives) {
		push @resultstr, "Slot $slot: no logical drives";
	};


	my $pds = runcmd("controller slot=$slot pd all show");
	for (@$pds) {
		chomp;
		next if /^$/;
		next if (/^\S.*in Slot $slot/);
		next if /^ *array [A-Z]$/;
		next if /^ *unassigned/;
		if (/^ *(array [A-Z]) \(Failed\)$/) {
			record('CRITICAL');
			push @{$status{'Failed'}}, $1;
		} elsif (/^Error: The specified controller does not have any physical drives on it.$/) {
			$nodrives = 1;
		} elsif (/^ *physicaldrive (\S+) .* (OK|Predictive Failure|Failed|Rebuilding)(?:, (?:active )?spare)?\)$/) {
			my $drive = $1;
			my $status = $2;
			push @{$status{$status}}, $drive;
			if ($status eq 'OK') {
			} elsif ($status eq 'Predictive Failure' ||
			         $status eq 'Rebuilding') {
				record('WARNING');
			} elsif ($status eq 'Failed') {
				record('CRITICAL');
			} else {
				record('UNKNOWN');
			};	
			push @drives, $drive;
		} else {
			die ("Cannot read line '$_' gotten from hpacucli controller slot=$slot pd all show\n");
		};
	};

	# Check that all drives have the proper transfer speed.
	# sometimes stuff breaks and they fall back to 10mb/sec.
	for my $drive (@drives) {
		# skip drives that are known to have failed
		next if (exists $status{'Failed'} && grep {$drive eq $_} @{$status{'Failed'}});
		my $type;
		if ($drive =~ /^[0-9]+:[0-9]+$/) { # scsi drives
			$type = 'SCSI';
		} elsif ($drive =~ /^[0-9]+[EI]:[0-9]+:[0-9]+$/) { # SAS
			$type = 'SAS';
		} elsif ($drive =~ /^[0-9]+[C]:[0-9]+:[0-9]+$/) { # New 6GBPS SAS
			$type = 'SAS+';
		} else {
			# I'm not going to run pass arguments of unknown form to the shell..
			warn ("Unknown diskdrive ID $drive\n");
			next;
		}

		my $pd = runcmd("controller slot=$slot pd $drive show");
		while (defined $pd->[0] && !($pd->[0] =~ /physicaldrive/)) {
			shift @$pd;
		};
		shift @$pd;
		my %value;
		for (@$pd) {
			if (m/^\s*(.*?):\s*(.*?)\s*$/) {
				$value{$1} = $2;
			}
		}

		my $key;
		my $expected;
		if ($type eq 'SCSI') {
			$key = 'Transfer Speed';
			if (!defined $value{'Transfer Mode'}) {
				record('WARNING');
				push @{$status{'unknown transfer mode'}}, $drive;
				next;
			} elsif ($value{'Transfer Mode'} eq 'Ultra 3 Wide') {
				$expected = '160 MB/Sec';
			} elsif ($value{'Transfer Mode'} eq 'Ultra 320 Wide') {
				$expected = '320 MB/Sec';
			} else {
				record('WARNING');
				push @{$status{'unknown transfer mode'}}, $drive."(".$value{'Transfer Mode'}.")";
				next;
			};
		} elsif ($type eq 'SAS' || $type eq 'SAS+') {
			$key = 'PHY Transfer Rate';
			if ($value{'Interface Type'} eq 'SATA') {
				$expected = [ '1.5Gbps', '3.0Gbps' ];
			} elsif ($value{'PHY Count'} eq '2') {
				if (defined($value{'Redundant Path(s)'})) {
					$expected = [ '3.0GBPS, 3.0GBPS', '6.0GBPS, 6.0GBPS' ];
				} else {
					$expected = [ '3.0GBPS, Unknown', 'Unknown, 3.0GBPS',
					              '6.0GBPS, Unknown', 'Unknown, 6.0GBPS' ];
				}
			} else {
				$expected = [ '3.0GBPS', '6.0GBPS' ];
			}
		} else {
			warn "Should not be here.  Do not know what to do with type '$type'\n";
			next;
		}

		if ($params->{'ignore-transfer-speed'}) {
			if (grep { $drive eq $_ } @{$params->{'ignore-transfer-speed'}}) {
				push @{$status{'ignored transfer speed'}}, $drive."(".$value{$key}.")";
				next;
			};
		};
		if (!defined $value{$key}) {
			record('WARNING');
			push @{$status{'unknown transfer speed'}}, $drive;
		} elsif (ref($expected) eq 'ARRAY') {
			if (scalar(grep { uc($value{$key}) eq uc($_) } @$expected) == 0) {
				record('WARNING');
				push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")";
			};
		} elsif (uc($value{$key}) ne uc($expected)) {
			record('WARNING');
			push @{$status{'bad transfer speed'}}, $drive."(".$value{$key}.")";
		};
	};

	if ($nodrives && scalar keys %status > 0) {
		push @resultstr, "Slot $slot: have no drives but status results?";
		record('UNKNOWN');
		next;
	} elsif ($nodrives) {
		push @resultstr, "Slot $slot: no drives";
		next;
	};

	my $cst = runcmd("controller slot=$slot show status");
	for (@$cst) {
		chomp;
		next if /^$/;
		next if (/^\S.*in Slot $slot/);
		if (/^ *(.*) Status: (.*)$/) {
			my $system = $1;
			my $status = $2;
			push @{$status{$status}}, $system;
			if ($status ne 'OK') {
				next if ($params->{'no-battery'} && $system eq 'Cache');
				next if ($params->{'no-battery'} && $system eq 'Battery/Capacitor');
				record('WARNING');
			};
		} else {
			die ("Cannot read line '$_' gotten from hpacucli controller slot=$slot show status\n");
		};
	};

	my $status = join(" - ", (map { $_.": ".join(", ", @{$status{$_}}) } keys %status));
	push @resultstr, "Slot $slot: $status";
};

print "$EXITCODE: ", join(" --- ", @resultstr), "\n";
exit $CODE{$EXITCODE};
