Import aacraid-status

This commit is contained in:
Adam Cécile (Le_Vert) 2013-08-14 22:56:33 +02:00
parent f983b59c43
commit 47b975e591
10 changed files with 669 additions and 0 deletions

View File

@ -0,0 +1,23 @@
README.Debian for aacraid-status package
----------------------------------------
Possible configuration:
----------------------
If you want to change the default configuration of the init script you
can create the file /etc/default/aacraid-statusd and specify the following
values.
MAILTO=
PERIOD=
REMIND=
Use MAILTO to specify which user shall get the status mails
(default is root).
With PERIOD you can fix the seconds between each check.
And REMIND specifies the seconds between each reminder.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Mon, 24 Sep 2007 15:55:09 +0200

View File

@ -0,0 +1,199 @@
#! /bin/sh
# Author: Petter Reinholdtsen <pere@hungry.com>
# License: GNU General Public License v2 or later
#
### BEGIN INIT INFO
# Provides: aacraid-statusd
# Required-Start: $remote_fs $syslog
# Required-Stop: $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Check aacraid-status values in the background.
### END INIT INFO
PATH=/sbin:/bin:/usr/sbin:/usr/bin
DESC="aacraid-status monitor"
NAME=aacraid-statusd
PIDFILE=/var/run/$NAME.pid
STATUSFILE=/var/run/$NAME.status
SCRIPTNAME=/etc/init.d/$NAME
# Do not touch you can configure this in /etc/default/aacraid-statusd
MAILTO=root # Where to report problems
PERIOD=600 # Seconds between each check (default 10 minutes)
REMIND=7200 # Seconds between each reminder (default 2 hours)
RUN_DAEMON=yes
[ -e /etc/default/aacraid-statusd ] && . /etc/default/aacraid-statusd
# Gracefully exit if the package has been removed.
test -x /usr/sbin/aacraid-status || exit 0
. /lib/lsb/init-functions
[ -e /etc/default/rcS ] && . /etc/default/rcS
if [ $RUN_DAEMON = "no" ] ; then
log_begin_msg "aacraid-statusd is disabled in /etc/default/aacraid-statusd, not starting."
log_end_msg 0
exit 0
fi
check_aacraid() {
echo $$ > $PIDFILE.new && mv $PIDFILE.new $PIDFILE
while true ; do
# Check ever $PERIOD seconds, send email on every status
# change and repeat ever $REMIND seconds if the raid is still
# bad.
if (aacraid-status) |grep -q 'NOT OPTIMAL' ; then
BADRAID=true
logger -t aacraid-statusd "detected non-optimal RAID status"
else
BADRAID=false
fi
STATUSCHANGE=false
if [ true = "$BADRAID" ] ; then
# RAID not OK
(aacraid-status) > $STATUSFILE.new
if [ ! -f $STATUSFILE ] ; then # RAID just became broken
STATUSCHANGE=true
mv $STATUSFILE.new $STATUSFILE
elif cmp -s $STATUSFILE $STATUSFILE.new ; then
# No change. Should we send reminder?
LASTTIME="`stat -c '%Z' $STATUSFILE`"
NOW="`date +%s`"
SINCELAST="`expr $NOW - $LASTTIME`"
if [ $REMIND -le "$SINCELAST" ]; then
# Time to send reminder
STATUSCHANGE=true
mv $STATUSFILE.new $STATUSFILE
else
rm $STATUSFILE.new
fi
else
STATUSCHANGE=true
mv $STATUSFILE.new $STATUSFILE
fi
else
# RAID OK
if [ -f $STATUSFILE ] ; then
rm $STATUSFILE
STATUSCHANGE=true
fi
fi
if [ true = "$STATUSCHANGE" ]; then
hostname="`uname -n`"
(
cat <<EOF
This is a RAID status update from aacraid-statusd. The aacraid-status
program reports that one of the RAIDs changed state:
EOF
if [ -f $STATUSFILE ] ; then
cat $STATUSFILE
else
(aacraid-status)
fi
echo
echo "Report from $0 on $hostname"
) | mail -s "info: AACRaid raid status change on $hostname" $MAILTO
fi
sleep $PERIOD
done
}
check_daemon() {
# Let's check if there is a daemon which is really running and not timing out
DAEMON_RUN=`ps aux | grep "/etc/init.d/aacraid-statusd check_aacraid" | grep -v grep | grep -v daemon`
if [ -n "$DAEMON_RUN" ] ; then
return 1;
else
return 0;
fi
}
#
# Function that starts the daemon/service.
#
d_start() {
[ -f $PIDFILE ] && PID="`cat $PIDFILE`"
if [ "$PID" ] ; then
log_progress_msg "Daemon already running. Refusing to start another"
return 0
elif check_daemon ; then
# Use the daemon package to turn this script into a daemon
start-stop-daemon --start --quiet --pidfile $PIDFILE \
--oknodo --exec /usr/bin/daemon $SCRIPTNAME check_aacraid
return 0
else
log_progress_msg "Daemon is already running. Refusing to start another"
return 0
fi
}
#
# Function that stops the daemon/service.
#
d_stop() {
if [ -f $PIDFILE ] ; then
# Doesn't work (kill init script instance, but not daemon...)
#start-stop-daemon --stop --oknodo --quiet --pidfile $PIDFILE > /dev/null 2>&1
DAEMONPID=`ps aux | grep '/usr/bin/daemon /etc/init.d/aacraid-statusd check_aacraid' | grep -v 'grep' | awk '{ print $2 }'`
SCRIPTPID=`cat $PIDFILE`
kill -9 $DAEMONPID $SCRIPTPID || true
rm -f $PIDFILE
else
log_progress_msg "Daemon is already stopped."
return 0
fi
}
# This is a workaround function which does not directly exit and
# therefore can be used by a restart
d_stop_by_restart() {
if [ -f $PIDFILE ] ; then
# Doesn't work (kill init script instance, but not daemon...)
#start-stop-daemon --oknodo --stop --quiet --pidfile $PIDFILE
DAEMONPID=`ps aux | grep '/usr/bin/daemon /etc/init.d/aacraid-statusd check_aacraid' | grep -v 'grep' | awk '{ print $2 }'`
SCRIPTPID=`cat $PIDFILE`
kill -9 $DAEMONPID $SCRIPTPID || true
rm -f $PIDFILE
log_end_msg 0
else
log_progress_msg "Daemon is already stopped."
log_end_msg 0
fi
}
case "$1" in
start)
echo -n ""
log_begin_msg "Starting $DESC: $NAME"
d_start ; CODE=$?
log_end_msg $CODE
;;
stop)
log_begin_msg "Stopping $DESC: $NAME"
d_stop ; CODE=$?
log_end_msg $CODE
;;
check_aacraid)
check_aacraid
;;
restart|force-reload)
log_begin_msg "Restarting $DESC: $NAME"
d_stop_by_restart
sleep 1
d_start || CODE=$?
log_end_msg $CODE
;;
*)
# echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2
echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload}" >&2
exit 1
;;
esac
exit 0

View File

@ -0,0 +1,141 @@
aacraid-status (0.20) unstable; urgency=low
* There's at least on more way arcconf can print disk position
(Closes: #187).
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Tue, 24 Jul 2012 16:48:39 +0200
aacraid-status (0.19) unstable; urgency=low
* Handle cards reporting segments with enclosure/slot position
(Closes: #187, #207). Many thanks to all people involved in solving
this issue.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Thu, 19 Jul 2012 22:59:53 +0200
aacraid-status (0.18) unstable; urgency=low
* Fixes for arcconf >= 7.30.18837.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Sat, 28 Jan 2012 17:37:21 +0100
aacraid-status (0.17) unstable; urgency=low
* Merge patch by consult@btoy1.net to handle RAID group (ie: RAID10).
Thanks for your contribution! (Closes: #7, #59).
* Update initscript using latest mpt-status one (fix unexpecting shutdown
issue due to set -e).
* Hardcode absolute arcconf path to avoid failures when the binary doesn't
exist. This will also ensure the use of the right arcconf binary
(Closes: #13).
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Sat, 10 Sep 2011 22:40:37 +0200
aacraid-status (0.16) unstable; urgency=low
* Do not fail with HotSpare and Ready (fine but not used in any array)
disks.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Mon, 16 Aug 2010 10:51:28 +0200
aacraid-status (0.15) unstable; urgency=low
* Handle 'Simple_Volume' which is disk connected to the card
withtout being part of an array or being hot-spare.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Thu, 03 Sep 2009 10:24:04 +0200
aacraid-status (0.14) unstable; urgency=low
* Fix disk/channel ids for arcconf 6.10.18451.
* Bump Standards-Version to 3.8.2.
* Update my own copyright statement.
* Remove S from Default-Stop in initscript's LSB headers.
* Fix debian/copyright to be GPL2+ compatible.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Fri, 10 Jul 2009 15:58:46 +0200
aacraid-status (0.13) unstable; urgency=low
* Fix disk/channel ids for arcconf 6.10.x.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Mon, 22 Dec 2008 23:12:11 +0100
aacraid-status (0.12) unstable; urgency=low
* Fix disk/channel ids.
* Do not need afa0 anymore (initscript) (arcconf do what's required).
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Mon, 10 Mar 2008 10:25:13 +0100
aacraid-status (0.11) unstable; urgency=low
* Small type fix.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Thu, 06 Mar 2008 17:00:35 +0100
aacraid-status (0.10) unstable; urgency=low
* In some case, arrays ids doesn't start at 0.
Don't compute them, read it.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Thu, 06 Mar 2008 16:16:27 +0100
aacraid-status (0.9) unstable; urgency=low
* Move from afacli to arrconf (far better).
This should fix all parsing problems :-)
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Thu, 06 Mar 2008 15:28:31 +0100
aacraid-status (0.8) unstable; urgency=low
* Fix broken output with degraded RAID1 array.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Tue, 12 Feb 2008 15:02:05 +0100
aacraid-status (0.7) unstable; urgency=low
* Return 1 if something wrong detected.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Tue, 09 Oct 2007 16:45:17 +0200
aacraid-status (0.6) unstable; urgency=low
* Detect and show rebuilding status.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Mon, 01 Oct 2007 15:08:55 +0200
aacraid-status (0.5) unstable; urgency=low
* Fix a nasty regex bug that could consider bad line as array line.
* Afacli do not report any status for good RAID-5 arrays (?!?).
Add a dirty hack to report them as 'Normal'.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Wed, 26 Sep 2007 17:46:02 +0200
aacraid-status (0.4) unstable; urgency=low
* Typos.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Wed, 26 Sep 2007 17:06:28 +0200
aacraid-status (0.3) unstable; urgency=low
* Run afacli in a sub-shell with nohup to avoid crap output.
* Improve failure detection, now checks for arrays status too.
* Fake failure feature now creates a bad array too.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Wed, 26 Sep 2007 09:01:50 +0200
aacraid-status (0.2) unstable; urgency=low
* aacraid-status now return more informations about disks/arrays.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Tue, 25 Sep 2007 17:12:46 +0200
aacraid-status (0.1) unstable; urgency=low
* Initial release.
-- Adam Cécile (Le_Vert) <gandalf@le-vert.net> Mon, 24 Sep 2007 15:55:09 +0200

View File

@ -0,0 +1 @@
5

View File

@ -0,0 +1,14 @@
Source: aacraid-status
Section: admin
Priority: extra
Maintainer: Adam Cécile (Le_Vert) <gandalf@le-vert.net>
Build-Depends: debhelper (>= 5)
Standards-Version: 3.9.3
Package: aacraid-status
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, python, lsb-base, daemon, bsd-mailx | mailx, arcconf (>= 7.30.18837)
Description: get RAID status out of Adaptec AACRaid HW RAID controllers
The aacraid-status software is a query tool to access the running
configuration and status of Adaptec SCSI HBAs. aacraid-status allows you to
monitor the health and status of your RAID setup.

View File

@ -0,0 +1,26 @@
This package was debianized by Adam Cécile (Le_Vert) <gandalf@le-vert.net> on
Mon, 24 Sep 2007 15:55:09 +0200.
It was downloaded from http://hwraid.le-vert.net
Copyright Holder:
Copyright (C) 2007-2009 Adam Cécile (Le_Vert) <gandalf@le-vert.net>
License:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANDABILITY of FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for details.
On Debian GNU/Linux systems, the complete text of the GNU General
Public License can be found in `/usr/share/common-licenses/GPL'.
This package is highly based on Steffen Joeris <white@debian.org> works done
for the mpt-status package.
Thanks a lot Steffen!

View File

@ -0,0 +1 @@
usr/sbin

View File

@ -0,0 +1 @@
aacraid-status usr/sbin

View File

@ -0,0 +1,42 @@
#!/usr/bin/make -f
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
build:
build-arch:
build-indep:
clean:
dh_testdir
dh_testroot
dh_clean
install:
dh_testdir
dh_testroot
dh_clean -k
dh_installdirs
binary-arch: install
binary-indep: install
dh_testdir
dh_testroot
dh_installchangelogs
dh_installdocs
dh_install
# Must be run after afacli-makedev
dh_installinit --name aacraid-statusd \
--update-rcd-params="start 21 2 3 4 5 . stop 19 0 1 6 ."
dh_link
dh_strip
dh_compress
dh_fixperms
dh_installdeb
dh_shlibdeps
dh_gencontrol
dh_md5sums
dh_builddeb
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install

221
wrapper-scripts/aacraid-status Executable file
View File

@ -0,0 +1,221 @@
#!/usr/bin/python
import os
import re
import sys
if len(sys.argv) > 2:
print 'Usage: aacraid-status [-d]'
sys.exit(1)
printarray = True
printcontroller = True
bad = False
if len(sys.argv) > 1:
if sys.argv[1] == '-d':
printarray = False
printcontroller = False
else:
print 'Usage: aacraid-status [-d]'
sys.exit(1)
# Get command output
def getOutput(cmd):
output = os.popen(cmd+' 2>/dev/null')
lines = []
for line in output:
if not re.match(r'^$',line.strip()):
lines.append(line.strip())
return lines
def returnControllerNumber(output):
for line in output:
if re.match(r'^Controllers found: [0-9]+$',line.strip()):
return int(line.split(':')[1].strip().strip('.'))
def returnControllerModel(output):
for line in output:
if re.match(r'^Controller Model.*$',line.strip()):
return line.split(':')[1].strip()
def returnControllerStatus(output):
for line in output:
if re.match(r'^Controller Status.*$',line.strip()):
return line.split(':')[1].strip()
def returnArrayIds(output):
ids = []
for line in output:
if re.match(r'^Logical device number [0-9]+$',line.strip()):
ids.append(line.strip('Logical device number').strip())
return ids
def returnArrayInfo(output):
members = []
for line in output:
# RAID level may be either N or Simple_Volume
# (a disk connected to the card, not hotspare, not part of any array)
if re.match(r'^RAID level\s+: .+$',line.strip()):
type = line.split(':')[1].strip()
if re.match(r'^Status of logical device\s+: .*$',line.strip()):
status = line.split(':')[1].strip()
if re.match(r'^Size\s+: [0-9]+ MB$',line.strip()):
size = str(int(line.strip('MB').split(':')[1].strip()) / 1000)
if re.match(r'^(Group\s[0-9]+,\s)?Segment [0-9]+\s+: .*$',line.strip()):
splitter = re.compile('(\(.*\))')
# The line can be either
# Segment 0 : Present (Controller:1,Enclosure:0,Slot:0) JPW9J0N00RWMUV
# Or
# Segment 0 : Present (Controller:1,Channel:0,Device:0) S13PJ1CQ719255
# Or
# Segment 0 : Present (Controller:1,Connector:1,Device:2) 9QJ7D0MJ
line = re.sub('Controller:','',line)
line = re.sub('(Channel|Enclosure|Connector):','',line)
line = re.sub('(Device|Slot):','',line)
line = line.split(':')[1]
if re.match(r'^ Missing',line):
members.append('?,?')
else:
members.append(splitter.split(line)[1].strip('(').strip(')'))
if re.match(r'^Group [0-9], Segment [0-9]+\s+: .*$',line.strip()):
splitter = re.compile('(\(.*\))')
line = line.split(':')[1]
if re.match(r'^ Missing',line):
members.append('?,?')
else:
members.append(splitter.split(line)[1].strip('(').strip(')'))
return [type,status,size,members]
def returnControllerTasks(output):
arrayid = False
type = False
state = False
tasks = []
for line in output:
if re.match(r'^Logical device\s+: [0-9]+$',line.strip()):
arrayid = line.split(':')[1].strip()
if re.match(r'^Current operation\s+: .*$',line.strip()):
type = line.split(':')[1].strip()
if re.match(r'^Percentage complete\s+: [0-9]+$',line.strip()):
state = line.split(':')[1].strip()
if arrayid != False and type != False and state != False:
tasks.append([arrayid,type,state])
arrayid = False
type = False
state = False
return tasks
def returnDisksInfo(output,controllerid):
diskid = False
vendor = False
model = False
state = False
disks = []
for line in output:
if re.match(r'^Reported Channel,Device(\(T:L\))?\s+: [0-9]+,[0-9]+(\([0-9]+:[0-9]+\))?$',line.strip()):
diskid = re.split('\s:\s',line)[1].strip()
diskid = re.sub('\(.*\)','',diskid)
diskid = str(controllerid)+','+diskid
if re.match(r'^State\s+: .*$',line.strip()):
state = line.split(':')[1].strip()
if re.match(r'^Vendor\s+: .*$',line.strip()):
vendor = line.split(':')[1].strip()
if re.match(r'^Model\s+: .*$',line.strip()):
model = line.split(':')[1].strip()
if diskid != False and vendor != False and model != False and state != False:
disks.append([diskid,state,vendor,model])
diskid = False
vendor = False
model = False
state = False
return disks
cmd = '/usr/sbin/arcconf GETVERSION'
output = getOutput(cmd)
controllernumber = returnControllerNumber(output)
# List controllers
if printcontroller:
print '-- Controller informations --'
print '-- ID | Model | Status'
controllerid = 1
while controllerid <= controllernumber:
cmd = '/usr/sbin/arcconf GETCONFIG '+str(controllerid)
output = getOutput(cmd)
controllermodel = returnControllerModel(output)
controllerstatus = returnControllerStatus(output)
if controllerstatus != 'Optimal':
bad = True
print 'c'+str(controllerid-1)+' | '+controllermodel+' | '+controllerstatus
controllerid += 1
print ''
# List arrays
if printarray:
controllerid = 1
print '-- Arrays informations --'
print '-- ID | Type | Size | Status | Task | Progress'
while controllerid <= controllernumber:
arrayid = 0
cmd = '/usr/sbin/arcconf GETCONFIG '+str(controllerid)
output = getOutput(cmd)
arrayids = returnArrayIds(output)
for arrayid in arrayids:
cmd = '/usr/sbin/arcconf GETCONFIG '+str(controllerid)+' LD '+str(arrayid)
output = getOutput(cmd)
arrayinfo = returnArrayInfo(output)
if arrayinfo[1] != 'Optimal':
bad = True
cmd = '/usr/sbin/arcconf GETSTATUS '+str(controllerid)
output = getOutput(cmd)
tasksinfo = returnControllerTasks(output)
done = False
# Usually it should return either [0-9] or Simple_Volume but...
# It can also return "6 Reed-Solomon" so we need to handle this too...
# So let's match [0-9] followed by a space or EOL.
if re.match('^[0-9]+(\s|$)',arrayinfo[0]):
raidtype = re.sub('^','RAID',arrayinfo[0])
else:
raidtype = arrayinfo[0]
for tasks in tasksinfo:
if int(tasks[0]) == int(arrayid):
print 'c'+str(controllerid-1)+'u'+str(arrayid)+' | '+raidtype+' | '+arrayinfo[2]+'G | '+arrayinfo[1]+' | '+tasks[1]+' | '+tasks[2]+'%'
done = True
break
if done == False:
print 'c'+str(controllerid-1)+'u'+str(arrayid)+' | '+raidtype+' | '+arrayinfo[2]+'G | '+arrayinfo[1]
controllerid += 1
print ''
# List disks
controllerid = 1
print '-- Disks informations'
print '-- ID | Model | Status'
while controllerid <= controllernumber:
arrayid = 0
cmd = '/usr/sbin/arcconf GETCONFIG '+str(controllerid)
output = getOutput(cmd)
arrayids = returnArrayIds(output)
for arrayid in arrayids:
cmd = '/usr/sbin/arcconf GETCONFIG '+str(controllerid)+' LD '+str(arrayid)
output = getOutput(cmd)
arrayinfo = returnArrayInfo(output)
cmd = '/usr/sbin/arcconf GETCONFIG '+str(controllerid)+' PD'
output = getOutput(cmd)
diskinfo = returnDisksInfo(output,controllerid)
for member in arrayinfo[3]:
i = 0
for disk in diskinfo:
if disk[1] != 'Online' and disk[1] != 'Hot Spare' and disk[1] != 'Ready':
bad = True
if disk[0] == member:
print 'c'+str(controllerid-1)+'u'+str(arrayid)+'d'+str(i)+' | '+disk[2]+' '+disk[3]+' | '+disk[1]
i += 1
controllerid += 1
if bad:
print '\nThere is at least one disk/array in a NOT OPTIMAL state.'
print '\nUse "arcconf GETCONFIG [1-9]" to get details.'
sys.exit(1)