| 1 | # | 
|---|
| 2 | # the current cnagios.pl for UW-HEP | 
|---|
| 3 | # | 
|---|
| 4 |  | 
|---|
| 5 | use strict; | 
|---|
| 6 |  | 
|---|
| 7 | #------------------------------------------------------------------ | 
|---|
| 8 |  | 
|---|
| 9 | sub host_plugin_hook { | 
|---|
| 10 |   local($_) = $_[0]; | 
|---|
| 11 |  | 
|---|
| 12 |   s/\(Host assumed to be up\)/assumed up/; | 
|---|
| 13 |   s/\(Host check timed out\)/timed out/; | 
|---|
| 14 |   s/\(Not enough data to determine host status yet\)/none/; | 
|---|
| 15 |   s/\(No Information Returned From Host Check\)/none/; | 
|---|
| 16 |   s/Ping .*? - (\d+)% packet loss.*/$1% pkt loss/; | 
|---|
| 17 |  | 
|---|
| 18 |   return $_; | 
|---|
| 19 | } | 
|---|
| 20 |  | 
|---|
| 21 | #------------------------------------------------------------------ | 
|---|
| 22 |  | 
|---|
| 23 | sub service_plugin_hook { | 
|---|
| 24 |   local($_) = $_[0]; | 
|---|
| 25 |  | 
|---|
| 26 |   # generic... | 
|---|
| 27 |   s/Plugin timed out after \d+ seconds/timed out/; | 
|---|
| 28 |  | 
|---|
| 29 |   # check_pingwithperl... | 
|---|
| 30 |   s/.* (\d+)% packet loss, \d+.\d+ ms ave rtt/$1% pkt loss/; | 
|---|
| 31 |  | 
|---|
| 32 |   # check_tcp... | 
|---|
| 33 |   s/.* (\d+\.\d+) second[s]? response time.*/$1 sec response/; | 
|---|
| 34 |   s/.* (\d+) second[s]? response time.*/$1 sec response/; | 
|---|
| 35 |   s/.* (\d+\.\d+) sec[s]? response time.*/$1 sec response/; | 
|---|
| 36 |  | 
|---|
| 37 |   # check_ftp | 
|---|
| 38 |   s/.*Invalid response from host/bad response/; | 
|---|
| 39 |  | 
|---|
| 40 |   # chech_ssh... | 
|---|
| 41 |   s/.* (.*?OpenSSH.*?) .*/$1/; | 
|---|
| 42 |   s/.*OpenSSH_3.5p1.*/OpenSSH_3.5p1/; | 
|---|
| 43 |   s/.*Connection refused.*/connection refused/i; | 
|---|
| 44 |  | 
|---|
| 45 |   # check_netsnmp_disk & check_netsnmp_bigdisk... | 
|---|
| 46 |   # also works for check_dcache_usage... | 
|---|
| 47 |   while ( $_ =~ /(\d+\.\d+) TB/ ) { | 
|---|
| 48 |      my $tb = $1; | 
|---|
| 49 |      # WARNING: will fail for > 9999 GB... | 
|---|
| 50 |      my $gb = sprintf("%4.4s",int($tb *1024));  | 
|---|
| 51 |      $_ =~ s/$tb TB/$gb GB/; | 
|---|
| 52 |   } | 
|---|
| 53 |   while ( $_ =~ /(\d+\.\d+) GB/ ) { | 
|---|
| 54 |      my $gb = $1; | 
|---|
| 55 |      # WARNING: will fail for > 9999 GB... | 
|---|
| 56 |      my $gb_new = sprintf("%4.4s",int($gb));  | 
|---|
| 57 |      $_ =~ s/$gb GB/$gb_new GB/; | 
|---|
| 58 |   } | 
|---|
| 59 |   s/.*?(\d+ GB total,).*?,(\s*\d+ GB avail)/$1$2/; | 
|---|
| 60 |  | 
|---|
| 61 |   # check_netsnmp_loadave... | 
|---|
| 62 |   s/.* load average: (\d+\.\d+).*/$1 loadave/; | 
|---|
| 63 |  | 
|---|
| 64 |   # check_ntp... | 
|---|
| 65 |   s/.* Offset ([-]*\d+\.\d+) secs.*/$1 sec offset/; | 
|---|
| 66 |   s/.* stratum (\d+), offset ([-]*\d+\.\d+).*/stratum $1, $2 sec offset/; | 
|---|
| 67 |   s/.*Jitter\s+too high.*/jittering/; | 
|---|
| 68 |   s/.*desynchronized peer server.*/desynchronized peer server/i; | 
|---|
| 69 |   s/.*probably down.*/down/; | 
|---|
| 70 |  | 
|---|
| 71 |   # check_dhcp et al... | 
|---|
| 72 |   s/.* Received \d+ DHCPOFFER.*max lease time = (\d+) sec.*/$1 sec lease time/; | 
|---|
| 73 |   s/.* \d+ in use, (\d+) free/$1 free leases/; | 
|---|
| 74 |   if ( s/DHCP problem: (.*)/$1/ ) { $_ = lc($_); } | 
|---|
| 75 |  | 
|---|
| 76 |   # check_afs_*... | 
|---|
| 77 |   s/File Server Performance/Performance/; | 
|---|
| 78 |   s/.* (\d+ blocked) connections/$1/; | 
|---|
| 79 |   s/(.*?) AFS (\/.*)/$1 $2/; | 
|---|
| 80 |   s/(.*?) AFS Volume Quotas/$1 AFS Volumes/; | 
|---|
| 81 |   s/(\d+) processes running normally/$1 ok processes/; | 
|---|
| 82 |   s/one process running normally/one ok process/; | 
|---|
| 83 |   s/% used/%/g; | 
|---|
| 84 |   s/user.(.*?)/$1/g; | 
|---|
| 85 |   s/(\d+) volumes under quota/$1 ok volumes/; | 
|---|
| 86 |   s/db version (\d+.\d+)/db $1/; | 
|---|
| 87 |  | 
|---|
| 88 |   # check_condor_client... | 
|---|
| 89 |   s/.* vm1 = .*?\/(\S+), vm2 = .*?\/(\S+),.*/$1\/$2/; | 
|---|
| 90 |   s/.* vm1 = .*?\/(\S+), vm2 = .*?\/(\S+).*/$1\/$2/; | 
|---|
| 91 |   s/.* cpu = (\S+)/$1/; | 
|---|
| 92 |   s/CondorQueue.*?(\d+ job[s]?, \d+ running).*/$1/; | 
|---|
| 93 |   s/.*?No condor status.*/no condor status/; | 
|---|
| 94 |  | 
|---|
| 95 |   # check_condor_pool... | 
|---|
| 96 |   s/.*?(\d+) nodes.*/$1 nodes/; | 
|---|
| 97 |  | 
|---|
| 98 |   # check_condor_queue... | 
|---|
| 99 |   s/.*?(\d+ idle, \d+ held)/$1/; | 
|---|
| 100 |  | 
|---|
| 101 |   # check_nsr... | 
|---|
| 102 |   s/.*?(\d+\.\d+ GB), (\d+ saves) since.*/$1, $2/; | 
|---|
| 103 |   s/.*?(\d+ GB), (\d+ saves) since.*/$1, $2/; | 
|---|
| 104 |   s/(\d+ GB avail)able, \d+ GB total/$1/; | 
|---|
| 105 |  | 
|---|
| 106 |   # check_hpjd... | 
|---|
| 107 |   s/.*? - \(\".*\"\)/printer okay/; | 
|---|
| 108 |   if ( s/(.*)\s+\(\".*\"\)/$1/ ) { $_ = lc($_); } | 
|---|
| 109 |  | 
|---|
| 110 |   # check_LPRng_queue... | 
|---|
| 111 |   s/(\d+) active job[s]?/$1 active/; | 
|---|
| 112 |   s/(\d+) stalled job[s]?/$1 stalled/; | 
|---|
| 113 |   s/(\d+) spooled job[s]?/$1 spooled/; | 
|---|
| 114 |   s/(\d+) incoming job[s]?/$1 incoming/; | 
|---|
| 115 |   s/(\d+) incoming job[s]?/$1 incoming/; | 
|---|
| 116 |  | 
|---|
| 117 |   # check_jug_*... | 
|---|
| 118 |   s/(\d+) JugRPC processes.*/$1 processes/; | 
|---|
| 119 |   s/.*JugJobs.*?(\d+) running.*/$1 running/; | 
|---|
| 120 |   s/.*Jug Storage.*?(\d+) unassigned.*/$1 unassigned/; | 
|---|
| 121 |  | 
|---|
| 122 |   # check_dcache*... | 
|---|
| 123 |   s/.*no status available.*/not found/; | 
|---|
| 124 |   s/.*not found in the cellInfo.*/not found/; | 
|---|
| 125 |   s/service is (.*)/$1/; | 
|---|
| 126 |   s/.*(\d+) ms ave ping time/$1 ms ping time/; | 
|---|
| 127 |   s/.*(\d+) ms ping time/$1 ms ping time/; | 
|---|
| 128 |  | 
|---|
| 129 |   # (my) check_traffic & check_ifHighSpeed_traffic... | 
|---|
| 130 |   # makes columnized XXX.XX Mbps output... | 
|---|
| 131 |   s/.*? (.*) Traffic/$1 Traffic/; | 
|---|
| 132 |   s/Internet Traffic/Traffic/; | 
|---|
| 133 |   if ( $_ =~ /(\d+\.\d+) Gbps in/ ) { | 
|---|
| 134 |     my $rate = $1; | 
|---|
| 135 |     my $gbps = sprintf("%6.6s",$rate); | 
|---|
| 136 |     $_ =~ s/$rate Gbps in/$gbps Gbps in/; | 
|---|
| 137 |   } | 
|---|
| 138 |   if ( $_ =~ /(\d+\.\d+) Gbps out/ ) { | 
|---|
| 139 |     my $rate = $1; | 
|---|
| 140 |     my $gbps = sprintf("%6.6s",$rate); | 
|---|
| 141 |     $_ =~ s/$rate Gbps out/$gbps Gbps out/; | 
|---|
| 142 |   } | 
|---|
| 143 |   if ( $_ =~ /(\d+\.\d+) Mbps in/ ) { | 
|---|
| 144 |     my $rate = $1; | 
|---|
| 145 |     my $mbps = sprintf("%6.6s",$rate); | 
|---|
| 146 |     $_ =~ s/$rate Mbps in/$mbps Mbps in/; | 
|---|
| 147 |   } | 
|---|
| 148 |   if ( $_ =~ /(\d+\.\d+) Mbps out/ ) { | 
|---|
| 149 |     my $rate = $1; | 
|---|
| 150 |     my $mbps = sprintf("%6.6s",$rate); | 
|---|
| 151 |     $_ =~ s/$rate Mbps out/$mbps Mbps out/; | 
|---|
| 152 |   } | 
|---|
| 153 |   while ( $_ =~ /(\d+\.\d+) Kbps/ ) { | 
|---|
| 154 |     my $rate = $1; | 
|---|
| 155 |     my $mbps = sprintf("%.2f",$rate/1000); | 
|---|
| 156 |     $mbps = sprintf("%6.6s",$mbps); | 
|---|
| 157 |     $_ =~ s/$rate Kbps/$mbps Mbps/; | 
|---|
| 158 |   } | 
|---|
| 159 |   s/\d+\.\d+ bps/  0.00 Mbps/g; | 
|---|
| 160 |  | 
|---|
| 161 |   # check_airport... | 
|---|
| 162 |   s/(.*? AirPort) Usage/$1/; | 
|---|
| 163 |   s/no connected clients/no clients/; | 
|---|
| 164 |   s/(\d+) connected clients/$1 clients/; | 
|---|
| 165 |  | 
|---|
| 166 |   # check_netsnmp_raid... | 
|---|
| 167 |   s/.*connect failed.*/connect failed/; | 
|---|
| 168 |   s/.*degraded.*/degraded/; | 
|---|
| 169 |   s/.*degraded/degraded/; | 
|---|
| 170 |   s/.*rebuilding.*/rebuilding/; | 
|---|
| 171 |   s/.*rebuilding/rebuilding/; | 
|---|
| 172 |   s/.*built.*/building/; | 
|---|
| 173 |   s/.*built/building/; | 
|---|
| 174 |   s/.*optimal.*/optimal/; | 
|---|
| 175 |   s/.*optimal/optimal/; | 
|---|
| 176 |  | 
|---|
| 177 |   # check_ip_routing_with_mtr | 
|---|
| 178 |   s/\S+ to \S+ hop not found, first hop out is (\S+)/hop is $1/i; | 
|---|
| 179 |  | 
|---|
| 180 |   # check_phedex | 
|---|
| 181 |   s/.*(\d+ UP agents).*/$1/; | 
|---|
| 182 |  | 
|---|
| 183 |   # plugin generic... | 
|---|
| 184 |   s/.*no response.*/connection timed out/i; | 
|---|
| 185 |   s/.*no route to host.*/no route to host/i; | 
|---|
| 186 |   s/Socket timeout.*/socket timed out/; | 
|---|
| 187 |  | 
|---|
| 188 |   # nagios generic... | 
|---|
| 189 |   s/\(Service Check Timed Out\)/check timed out/; | 
|---|
| 190 |   s/\(No output returned from plugin\)/no output from plugin/; | 
|---|
| 191 |   s/Service check scheduled for.*/none/; | 
|---|
| 192 |   s/No data yet.*/no data yet/; | 
|---|
| 193 |   s/\.$//; | 
|---|
| 194 |  | 
|---|
| 195 |   # generic generic... | 
|---|
| 196 |   s/.*?OK - //i; | 
|---|
| 197 |   s/.*?WARNING - //i; | 
|---|
| 198 |   s/.*?CRITICAL - //i; | 
|---|
| 199 |   s/.*?UNKNOWN - //i; | 
|---|
| 200 |  | 
|---|
| 201 |   return $_; | 
|---|
| 202 |  | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | #------------------------------------------------------------------ | 
|---|
| 206 |  | 
|---|
| 207 | # this sub is used for host/service/plugin-output  | 
|---|
| 208 | # filtering... it should not change... | 
|---|
| 209 |  | 
|---|
| 210 | sub regex_hook { | 
|---|
| 211 |   my($str,$regex,$mode) = @_; | 
|---|
| 212 |   if ( $mode == 0 ) {  | 
|---|
| 213 |     if ( $str =~ /$regex/ ) { return 0 } else { return 1 } | 
|---|
| 214 |   } | 
|---|
| 215 |   if ( $mode == 1 ) {  | 
|---|
| 216 |     if ( $str !~ /$regex/ ) { return 0 } else { return 1 } | 
|---|
| 217 |   } | 
|---|
| 218 |   return 2; | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|