Power Business Continuity and Automation

Power Business Continuity and Automation

Connect, learn, and share your experiences using the business continuity and automation technologies and practices designed to ensure uninterrupted operations and rapid recovery for workloads running on IBM Power systems. 


#Power
#TechXchangeConferenceLab

 View Only

QHA utility for PowerHA cluster status

By Shawn Bodily posted yesterday

  

Copy and paste the following script in your path somewhere and make it executable.  The following flags are valid and explained below. I personally use qha -nev the most.

qha version 9.06
Usage: qha [-n] [-N] [-v] [-l] [-e] [-m] [-1] [-c]
-n displays network interfaces
-N displays network interfaces + non IP heartbeat disk
-v shows online VGs
-l logs entries to /tmp/qha.out
-e shows running event
-m shows appmon status
-1 single interation
-c shows CAA SAN/Disk Status (AIX7.1 TL3 min.)


#!/bin/ksh
# Purpose: Provides an alternative to SNMP monitoring for PowerHA/HACMP (clinfo and clstat).
# Designed to be run within the cluster, not remotely. See next point!
# Can be customised to run remotely and monitor multiple clusters!
# Version: 9.06
# Updates for PowerHA version 7.1
# Authors: 1. Alex Abderrazag IBM UK
# # 2. Bill Miller IBM US
# Additions since 8.14.
# qha can be freely distributed. If you have any questions or would like to see any enhancements/updates, please email abderra@uk.ibm.com

# VARS
export PATH=$PATH:/usr/es/sbin/cluster/utilities
VERSION=`lslpp -L |grep -i cluster.es.server.rte |awk '{print $2}'| sed 's/\.//g'`
CLUSTER=`odmget HACMPcluster | grep -v node |grep name | awk '{print $3}' |sed "s:\"::g"`
UTILDIR=/usr/es/sbin/cluster/utilities
# clrsh dir in v7 must be /usr/sbin in previous version's it's /usr/es/sbin/cluster/utilities.
# Don't forget also that the rhost file for >v7 is /etc/cluster/rhosts
if [[ `lslpp -L |grep -i cluster.es.server.rte |awk '{print $2}' | cut -d'.' -f1` -ge 7 ]]; then
    CDIR=/usr/sbin
else
    CDIR=$UTILDIR
fi
OUTFILE=/tmp/.qha.$
LOGGING=/tmp/qha.out.$
ADFILE=/tmp/.ad.$
HACMPOUT=`/usr/bin/odmget -q name="hacmp.out" HACMPlogs | fgrep value | sed 's/.*=\ "\(.*\)"$/\1\/hacmp.out/'`
COMMcmd="$CDIR/clrsh"
REFRESH=0
usage() {
echo "qha version 9.06"
echo "Usage: qha [-n] [-N] [-v] [-l] [-e] [-m] [-1] [-c]"
echo "\t\t-n displays network interfaces\n\t\t-N displays network \
interfaces + nonIP heartbeat disk\n\t\t-v shows online VGs\n\t\t-l logs entries to \
/tmp/qha.out\n\t\t-e shows running event\n\t\t-m shows appmon status\n\t\t-1 \
single interation\n\t\t-c shows CAA SAN/Disk Status (AIX7.1 TL3 min.)"
}

function adapters {
i=1
j=1
cat $ADFILE | while read line
do
    en[i]=`echo $line | awk '{print $1}'`
    name[i]=`echo $line | awk '{print $2}'`
    if [ i -eq 1 ]; then
      printf " ${en[1]} ";
    fi
    if [[ ${en[i]} = ${en[j]} ]]; then
        printf "${name[i]} "
    else
        printf "\n${en[i]} ${name[i]} "
    fi
let i=i+1
let j=i-1
done
rm $ADFILE
if [ $HBOD = "TRUE" ]; then # Code for v6 and below only. To be deleted soon.
    # Process Heartbeat on Disk networks (Bill Millers code)
    VER=`echo $VERSION | cut -c 1`
    if [[ $VER = "7" ]]; then
        print "[HBOD option not supported]" >> $OUTFILE
    fi
    HBODs=$($COMMcmd $HANODE "$UTILDIR/cllsif" | grep diskhb | grep -w $HANODE | awk '{print $8}')
    for i in $(print $HBODs)
    do
        APVID=$($COMMcmd $HANODE "lspv" | grep -w $i | awk '{print $2}' | cut -c 13-)
        AHBOD=$($COMMcmd $HANODE lssrc -ls topsvcs | grep -w r$i | awk '{print $4}')
        if [ $AHBOD ]
            then
            printf "\n\t%-13s %-10s" $i"("$APVID")" [activeHBOD]
        else
            printf "\n\t%-13s %-10s" $i [inactiveHBOD]
        fi
    done
fi
}
function work {
HANODE=$1; CNT=$2 NET=$3 VGP=$4
#clrsh $HANODE date > /dev/null 2>&1 || ping -w 1 -c1 $HANODE > /dev/null 2>&1
$COMMcmd $HANODE date > /dev/null 2>&1
if [ $? -eq 0 ]; then
    EVENT="";
    CLSTRMGR=`$COMMcmd $HANODE lssrc -ls clstrmgrES | grep -i state | sed 's/Current state: //g'`
    if [[ $CLSTRMGR != ST_STABLE && $CLSTRMGR != ST_INIT && $SHOWEVENT = TRUE ]]; then
        EVENT=$($COMMcmd $HANODE cat $HACMPOUT | grep "EVENT START" |tail -1 | awk '{print $6}')
                  printf "\n%-8s %-7s %-15s\n" $HANODE iState: "$CLSTRMGR [$EVENT]"
    else
        printf "\n%-8s %-7s %-15s\n" $HANODE iState: "$CLSTRMGR"
    fi
    $UTILDIR/clfindres -s 2>/dev/null |grep -v OFFLINE | while read A
    do
        if [[ "`echo $A | awk -F: '{print $3}'`" == "$HANODE" ]]; then
            echo $A | awk -F: '{printf " %-18.16s %-10.12s %-1.20s", $1, $2, $9}'
            if [ $APPMONSTAT = "TRUE" ]; then
                RG=`echo $A | awk -F':' '{print $1}'`
                APPMON=`$UTILDIR/clRGinfo -m | grep -p $RG | grep "ONLINE" | awk 'NR>1 {print $1" "$2}'`
                print "($APPMON)"
            else
                print ""
            fi
        fi
    done
    if [ $CAA = "TRUE" ]; then
        IP_Comm_method=`odmget HACMPcluster | grep heartbeattype | awk -F'"' '{print $2}'`
        case $IP_Comm_method in
            C) # we're multicasting
                printf " CAA Multicasting:"
                $COMMcmd $HANODE lscluster -m | grep en[0-9] | awk '{printf " ("$1" "$2")"}'
                echo ""
                ;;
            U) # we're unicasting
                printf " CAA Unicasting:"
                $COMMcmd $HANODE lscluster -m | grep tcpsock | awk '{printf " ("$2" "$3" "$5")"}'
                echo ""
                ;;
        esac
        SAN_COMMS_STATUS=$(/usr/lib/cluster/clras sancomm_status | egrep -v "(--|UUID)" | awk -F'|' '{print $4}' | sed 's/ //g')
        DP_COMM_STATUS=$(/usr/lib/cluster/clras dpcomm_status | grep $HANODE | awk -F'|' '{print $4}' | sed 's/ //g')
        print " CAA SAN Comms: $SAN_COMMS_STATUS | DISK Comms: $DP_COMM_STATUS"
    fi
    if [ $NET = "TRUE" ]; then
        $COMMcmd $HANODE netstat -i | egrep -v "(Name|link|lo)" | awk '{print $1" "$4" "}' > $ADFILE
        adapters; printf "\n- "
    fi
    if [ $VGP = "TRUE" ]; then
        VGO=`$COMMcmd $HANODE "lsvg -o |fgrep -v caavg_private |fgrep -v rootvg |lsvg -pi 2> /dev/null" |awk '{printf $1")"}' |sed 's:)PV_NAME)hdisk::g' | sed 's/:/(/g' |sed 's:):) :g' |sed 's: hdisk:(:g' 2> /dev/null`
        if [ $NET = "TRUE" ]; then
              echo "$VGO-"
        else
            echo "- $VGO-"
        fi
    fi
    else
        ping -w 1 -c1 $HANODE > /dev/null 2>&1
        if [ $? -eq 0 ]; then
            echo "\nPing to $HANODE good, but can't get the status. Check clcomdES."
        else
            echo "\n$HANODE not responding, check network availability."
        fi
fi
}

# Main
NETWORK="FALSE"; VG="FALSE"; HBOD="FALSE"; LOG=false; APPMONSTAT="FALSE"; STOP=0;
CAA=FALSE; REMOTE="FALSE";
# Get Vars
while getopts :nNvlem1c ARGs
do
   case $ARGs in
        n) # -n show interface info
            NETWORK="TRUE";;
        N) # -N show interface info and activeHBOD
            NETWORK="TRUE"; HBOD="TRUE";;
        v) # -v show ONLINE VG info
            VG="TRUE";;
        l) # -l log to /tmp/qha.out
            LOG="TRUE";;
        e) # -e show running events if cluster is unstable
            SHOWEVENT="TRUE";;
        m) # -m show status of monitor app servers if present
            APPMONSTAT="TRUE";;
        1) # -1 exit after first iteration
            STOP=1;;
        c) # CAA SAN / DISK Comms
            CAA=TRUE;;
        \?) printf "\nNot a valid option\n\n" ; usage ; exit ;;
    esac
done
OO=""
trap "rm $OUTFILE; exit 0" 1 2 12 9 15
while true
do
    COUNT=0
    print "\\033[H\\033[2J\t\tCluster: $CLUSTER ($VERSION)" > $OUTFILE
    echo "\t\t$(date +%T" "%d%b%y)" >> $OUTFILE
    if [[ $REMOTE = "TRUE" ]]; then
        Fstr=`cat $CLHOSTS |grep -v "^#"`
    else
        Fstr=`odmget HACMPnode |grep name |sort -u | awk '{print $3}' |sed "s:\"::g"`
    fi
    for MAC in `echo $Fstr`
    do
        let COUNT=COUNT+1
        work $MAC $COUNT $NETWORK $VG $HBOD
    done >> $OUTFILE
    cat $OUTFILE
    if [ $LOG = "TRUE" ]; then
        wLINE=$(cat $OUTFILE |sed s'/^.*Cluster://g' | awk '{print " "$0}' |tr -s
        '[:space:]' '[ *]' | awk '{print $0}')
        wLINE_three=$(echo $wLINE | awk '{for(i=4;i<=NF;++i) printf("%s ", $i) }')
        if [[ ! "$OO" = "$wLINE_three" ]]; then
            # Note, there's been a state change, so write to the log
            # Alternatively, do something addtional, for example: send an snmp trap
            alert, using the snmptrap command. For example:
            # snmptrap -c <community> -h <anmp agent> -m "appropriate message"
            echo "$wLINE" >> $LOGGING
        fi
        OO="$wLINE_three"
    fi
    if [[ $STOP -eq 1 ]]; then
        exit
    fi
sleep $REFRESH
done

0 comments
3 views

Permalink