Open Source Development

 View Only

 Nagios plugin check_ntp_time from AIX Toolbox is not working properly

  • AIX Open Source
Ondřej Rajmon's profile image
Ondřej Rajmon posted Fri November 15, 2024 12:09 PM

Hello,

I updated the nagios-plugins 2.3.3-1 RPM package from AIX Tolbox to there current version 2.4.9-1 and the check_ntp_time probe stopped working. The probe reports no error, but returns a meaningless time offset. I downloaded the same version from https://nagios-plugins.org/nagios-plugins-2-4-9-released/ and compiled it on linux. The resulting probe works correctly against the same NTP server. Could there be some bug in the compilation on AIX Toolbox?
Comparison of -vv probe outputs:
- working check_ntp_time 2.3.3-1

...

sending request to peer 0
response from peer 0: packet contents:
    flags: 0x24
      li=0 (0x00)
      vn=4 (0x20)
      mode=4 (0x04)
    stratum = 2
    poll = 16
    precision = 2.98023e-08
    rtdelay = 0.0004425048828125
    rtdisp = 0.0001983642578125
    refid = a01fa6f
    refts = 1731689861.6004
    origts = 1731689881.030022
    rxts = 1731689881.030304
    txts = 1731689881.030418
offset -0.0002726316452
using peer 0 as our first candidate
best server selected: peer 0
overall average offset: 8.285045624e-05
NTP OK: Offset 8.285045624e-05 secs, stratum best:2 worst:2|offset=0.000083s;60;120; stratum_best=2 stratum_worst=2 num_warn_stratum=0 num_crit_stratum=0

- broken check_ntp_time 2.4.9-1

...

sending request to peer 0
response from peer 0: packet contents:
    flags: 0x24
      li=0 (0x00)
      vn=4 (0x20)
      mode=4 (0x04)
    stratum = 2
    poll = 16
    precision = 2.98023e-08
    rtdelay = 27
    rtdisp = 59
    refid = a01fa6f
    refts = 211258862.9175097
    origts = 1.844674407188569e+19
    rxts = 1.8446744071887e+19
    txts = 1.844674407188743e+19
offset 9.223372035e+18
using peer 0 as our first candidate
best server selected: peer 0
overall average offset: 127892293
NTP CRITICAL: Offset 127892293 secs, stratum best:2 worst:2|offset=127892292.970315s;60;120; stratum_best=2 stratum_worst=2 num_warn_stratum=0 num_crit_stratum=0

Our AIX version is 7200-05-08-2420.


#AIXOpenSource
RESHMA KUMAR's profile image
RESHMA KUMAR

Thanks for reporting the issue.
We will look into it and get back to you.


#AIXOpenSource
Rostislav Stříbrný's profile image
Rostislav Stříbrný

Hi there!

We are facing the same issue.

I tried compiling 2.4.9 directly from source https://nagios-plugins.org/downloads/ on my Ubuntu 22.04 LTS, and checked, that such compiled version works OK.

Therefore I think that the problem is somehow in AIX Toolbox's portation.


#AIXOpenSource
José Pina Coelho's profile image
José Pina Coelho

It looks like another occurrence  of this issue: https://github.com/nagios-plugins/nagios-plugins/issues/677

Introduced on 2.4.8, solved on 2.4.12.


#AIXOpenSource
Ranjit Ranjan's profile image
Ranjit Ranjan

As per git diff, I see code is redesign for check_ntp_time. We need to debug on this . We will update once RC is found.

# git diff release-2.3.3..release-2.4.9  plugins/check_ntp_time.c
diff --git a/plugins/check_ntp_time.c b/plugins/check_ntp_time.c
index 5c47429f..d5ac7905 100644
--- a/plugins/check_ntp_time.c
+++ b/plugins/check_ntp_time.c
@@ -73,13 +73,31 @@ typedef struct {
        uint8_t stratum;     /* clock stratum */
        int8_t poll;         /* polling interval */
        int8_t precision;    /* precision of the local clock */
-   int32_t rtdelay;     /* total rt delay, as a fixed point num. see macros */
-   uint32_t rtdisp;     /* like above, but for max err to primary src */
+ union {
+         int32_t rtdelay;     /* total rt delay, as a fixed point num. see macros */
+         struct {
+                 uint16_t rtdelay_l16;
+                 uint16_t rtdelay_r16;
+         };
+ };
+ union {
+         uint32_t rtdisp;     /* like above, but for max err to primary src */
+         struct {
+                 uint16_t rtdisp_l16;
+                 uint16_t rtdisp_r16;
+         };
+ };
        uint32_t refid;      /* ref clock identifier */
        uint64_t refts;      /* reference timestamp.  local time local clock */
        uint64_t origts;     /* time at which request departed client */
        uint64_t rxts;       /* time at which request arrived at server */
-   uint64_t txts;       /* time at which request departed server */
+ union {
+         uint64_t txts;       /* time at which request departed server */
+         struct {
+                 uint32_t txts_l32;
+                 uint32_t txts_r32;
+         };
+ };
 } ntp_message;
 
 /* this structure holds data about results from querying offset from a peer */
@@ -155,35 +173,68 @@ ntp_server_results *servers=NULL;
 /* ntp wants seconds since 1/1/00, epoch is 1/1/70.  this is the difference */
 #define EPOCHDIFF 0x83aa7e80UL
 
+/* extract double from 32-bit ntp fixed point number, without violating strict aliasing rules */
+double ntp32_to_double(uint32_t n) {
+ double result = 0;
+ if (n) {
+         uint16_t l16 = ntohs((uint16_t) (n & ((1<<16) - 1)));
+         uint16_t r16 = ntohs((uint16_t) (n >> 16));
+         result = l16 + ((double) r16/65536.0);
+ }
+ return result;
+}
 /* extract a 32-bit ntp fixed point number into a double */
-#define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0)
+/* #define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0)*/
+
+/* extract double from 64-bit ntp fixed point number, without violating strict aliasing rules */
+double ntp64_to_double(uint64_t n) {
+ double result = 0;
+ if (n) {
+         uint32_t l32 = ntohl((uint32_t) (n & ((1ul<<32) - 1)));
+         uint32_t r32 = ntohl((uint32_t) (n >> 32));
+         result = (l32 - EPOCHDIFF)
+                + (.00000001*(0.5+(double)(r32/42.94967296)));
+ }
+ return result;
+}
 
 /* likewise for a 64-bit ntp fp number */
-#define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\
-                         (ntohl(L32(n))-EPOCHDIFF) + \
-                         (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\
-                         0)
+/* #define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\
+                          (ntohl(L32(n))-EPOCHDIFF) + \
+                          (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\
+                          0)*/
 
 /* convert a struct timeval to a double */
 #define TVasDOUBLE(x) (double)(x.tv_sec+(0.000001*x.tv_usec))
 
+struct timeval ntp64_to_tv(uint64_t n) {
+ struct timeval result = {};
+ if (n) {
+         uint32_t l32 = ntohl((uint32_t) (n & ((1ul<<32) - 1)));
+         uint32_t r32 = ntohl((uint32_t) (n >> 32));
+         result.tv_sec = l32 - EPOCHDIFF;
+         result.tv_usec = (int)(0.5+(double)(r32/4294.967296));
+ }
+ return result;
+}
+
 /* convert an ntp 64-bit fp number to a struct timeval */
-#define NTP64toTV(n,t) \
+/* #define NTP64toTV(n,t) \
        do{ if(!n) t.tv_sec = t.tv_usec = 0; \
            else { \
                        t.tv_sec=ntohl(L32(n))-EPOCHDIFF; \
                        t.tv_usec=(int)(0.5+(double)(ntohl(R32(n))/4294.967296)); \
                } \
-   }while(0)
+ }while(0) */
 
 /* convert a struct timeval to an ntp 64-bit fp number */
-#define TVtoNTP64(t,n) \
+/* #define TVtoNTP64(t,n) \
        do{ if(!t.tv_usec && !t.tv_sec) n=0x0UL; \
                else { \
                        L32(n)=htonl(t.tv_sec + EPOCHDIFF); \
                        R32(n)=htonl((uint64_t)((4294.967296*t.tv_usec)+.5)); \
                } \
-   } while(0)
+ } while(0) */
 
 /* NTP control message header is 12 bytes, plus any data in the data
  * field, plus null padding to the nearest 32-bit boundary per rfc.
@@ -200,9 +251,9 @@ ntp_server_results *servers=NULL;
 /* calculate the offset of the local clock */
 static inline double calc_offset(const ntp_message *m, const struct timeval *t){
        double client_tx, peer_rx, peer_tx, client_rx;
-   client_tx = NTP64asDOUBLE(m->origts);
-   peer_rx = NTP64asDOUBLE(m->rxts);
-   peer_tx = NTP64asDOUBLE(m->txts);
+ client_tx = ntp64_to_double(m->origts);
+ peer_rx = ntp64_to_double(m->rxts);
+ peer_tx = ntp64_to_double(m->txts);
        client_rx=TVasDOUBLE((*t));
        return (.5*((peer_tx-client_rx)+(peer_rx-client_tx)));
 }
@@ -211,10 +262,10 @@ static inline double calc_offset(const ntp_message *m, const struct timeval *t){
 void print_ntp_message(const ntp_message *p){
        struct timeval ref, orig, rx, tx;
 
-   NTP64toTV(p->refts,ref);
-   NTP64toTV(p->origts,orig);
-   NTP64toTV(p->rxts,rx);
-   NTP64toTV(p->txts,tx);
+ ref = ntp64_to_tv(p->refts);
+ orig = ntp64_to_tv(p->origts);
+ rx = ntp64_to_tv(p->rxts);
+ tx = ntp64_to_tv(p->txts);
 
        printf("packet contents:\n");
        printf("\tflags: 0x%.2x\n", p->flags);
@@ -224,13 +275,13 @@ void print_ntp_message(const ntp_message *p){
        printf("\tstratum = %d\n", p->stratum);
        printf("\tpoll = %g\n", pow(2, p->poll));
        printf("\tprecision = %g\n", pow(2, p->precision));
-   printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay));
-   printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp));
+ printf("\trtdelay = %-.16g\n", ntp32_to_double(p->rtdelay));
+ printf("\trtdisp = %-.16g\n", ntp32_to_double(p->rtdisp));
        printf("\trefid = %x\n", p->refid);
-   printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts));
-   printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts));
-   printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts));
-   printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts));
+ printf("\trefts = %-.16g\n", ntp64_to_double(p->refts));
+ printf("\torigts = %-.16g\n", ntp64_to_double(p->origts));
+ printf("\trxts = %-.16g\n", ntp64_to_double(p->rxts));
+ printf("\ttxts = %-.16g\n", ntp64_to_double(p->txts));
 }
 
 void setup_request(ntp_message *p){
@@ -242,11 +293,17 @@ void setup_request(ntp_message *p){
        MODE_SET(p->flags, MODE_CLIENT);
        p->poll=4;
        p->precision=(int8_t)0xfa;
-   L16(p->rtdelay)=htons(1);
-   L16(p->rtdisp)=htons(1);
+ p->rtdelay_l16=htons(1);
+ p->rtdisp_l16=htons(1);
 
        gettimeofday(&t, NULL);
-   TVtoNTP64(t,p->txts);
+
+ /* This used to use TVtoNTP64 but we ran into issues with strict aliasing/type punning.
+  * We only used the macro in one place, so it's inlined now */
+ if (t.tv_usec || t.tv_sec) {
+         p->txts_l32 = htonl(t.tv_sec + EPOCHDIFF);
+         p->txts_r32 = htonl((uint64_t) ((4294.967296*t.tv_usec)+.5));
+ }
 }
 
 /* select the "best" server from a list of servers, and return its index.
@@ -420,8 +477,8 @@ double offset_request(const char *host, int *status){
                                        printf("offset %.10g\n", servers[i].offset[respnum]);
                                }
                                servers[i].stratum=req[i].stratum;
-                           servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp);
-                           servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay);
+                         servers[i].rtdisp=ntp32_to_double(req[i].rtdisp);
+                         servers[i].rtdelay=ntp32_to_double(req[i].rtdelay);
                                servers[i].waiting--;
                                servers[i].flags=req[i].flags;
                                servers_readable--;
@@ -490,7 +547,7 @@ int process_arguments(int argc, char **argv){
                usage ("\n");
 
        while (1) {
-           c = getopt_long (argc, argv, "Vhv46qw:c:t:H:p:o:d:", longopts, &option);
+         c = getopt_long (argc, argv, "Vhv46qw:c:t:H:p:o:d:C:W:", longopts, &option);
                if (c == -1 || c == EOF || c == 1)
                        break;
 
@@ -722,6 +779,6 @@ void
 print_usage(void)
 {
        printf ("%s\n", _("Usage:"));
-   printf(" %s -H <host> [-4|-6] [-w <warn>] [-c <crit>] [-v verbose] [-o <time offset>] [-d <delay>]\n", progname);
+ printf(" %s -H <host> [-4|-6] [-w <warn>] [-c <crit>] [-v verbose] [-o <time offset>] [-d <delay>] [-W <stratum warn>] [-C <stratum crit>]\n", progname);


#AIXOpenSource
Ranjit Ranjan's profile image
Ranjit Ranjan

Hi, 
we will build 2.4.12 and check the result also.

Thanks
Ranjit


#AIXOpenSource