AIX Open Source

 View Only

 Nagios plugin check_ntp_time from AIX Toolbox is not working properly

Ondřej Rajmon's profile image
Ondřej Rajmon posted Fri November 15, 2024 12:09 PM

Hello,

I updated the nagios-plugins 2.3.3-1 RPM package from AIX Tolbox to there current version 2.4.9-1 and the check_ntp_time probe stopped working. The probe reports no error, but returns a meaningless time offset. I downloaded the same version from https://nagios-plugins.org/nagios-plugins-2-4-9-released/ and compiled it on linux. The resulting probe works correctly against the same NTP server. Could there be some bug in the compilation on AIX Toolbox?
Comparison of -vv probe outputs:
- working check_ntp_time 2.3.3-1

...

sending request to peer 0
response from peer 0: packet contents:
    flags: 0x24
      li=0 (0x00)
      vn=4 (0x20)
      mode=4 (0x04)
    stratum = 2
    poll = 16
    precision = 2.98023e-08
    rtdelay = 0.0004425048828125
    rtdisp = 0.0001983642578125
    refid = a01fa6f
    refts = 1731689861.6004
    origts = 1731689881.030022
    rxts = 1731689881.030304
    txts = 1731689881.030418
offset -0.0002726316452
using peer 0 as our first candidate
best server selected: peer 0
overall average offset: 8.285045624e-05
NTP OK: Offset 8.285045624e-05 secs, stratum best:2 worst:2|offset=0.000083s;60;120; stratum_best=2 stratum_worst=2 num_warn_stratum=0 num_crit_stratum=0

- broken check_ntp_time 2.4.9-1

...

sending request to peer 0
response from peer 0: packet contents:
    flags: 0x24
      li=0 (0x00)
      vn=4 (0x20)
      mode=4 (0x04)
    stratum = 2
    poll = 16
    precision = 2.98023e-08
    rtdelay = 27
    rtdisp = 59
    refid = a01fa6f
    refts = 211258862.9175097
    origts = 1.844674407188569e+19
    rxts = 1.8446744071887e+19
    txts = 1.844674407188743e+19
offset 9.223372035e+18
using peer 0 as our first candidate
best server selected: peer 0
overall average offset: 127892293
NTP CRITICAL: Offset 127892293 secs, stratum best:2 worst:2|offset=127892292.970315s;60;120; stratum_best=2 stratum_worst=2 num_warn_stratum=0 num_crit_stratum=0

Our AIX version is 7200-05-08-2420.

RESHMA KUMAR's profile image
RESHMA KUMAR

Thanks for reporting the issue.
We will look into it and get back to you.

Rostislav Stříbrný's profile image
Rostislav Stříbrný

Hi there!

We are facing the same issue.

I tried compiling 2.4.9 directly from source https://nagios-plugins.org/downloads/ on my Ubuntu 22.04 LTS, and checked, that such compiled version works OK.

Therefore I think that the problem is somehow in AIX Toolbox's portation.

José Pina Coelho's profile image
José Pina Coelho

It looks like another occurrence  of this issue: https://github.com/nagios-plugins/nagios-plugins/issues/677

Introduced on 2.4.8, solved on 2.4.12.

Ranjit Ranjan's profile image
Ranjit Ranjan

As per git diff, I see code is redesign for check_ntp_time. We need to debug on this . We will update once RC is found.

# git diff release-2.3.3..release-2.4.9  plugins/check_ntp_time.c
diff --git a/plugins/check_ntp_time.c b/plugins/check_ntp_time.c
index 5c47429f..d5ac7905 100644
--- a/plugins/check_ntp_time.c
+++ b/plugins/check_ntp_time.c
@@ -73,13 +73,31 @@ typedef struct {
        uint8_t stratum;     /* clock stratum */
        int8_t poll;         /* polling interval */
        int8_t precision;    /* precision of the local clock */
-   int32_t rtdelay;     /* total rt delay, as a fixed point num. see macros */
-   uint32_t rtdisp;     /* like above, but for max err to primary src */
+ union {
+         int32_t rtdelay;     /* total rt delay, as a fixed point num. see macros */
+         struct {
+                 uint16_t rtdelay_l16;
+                 uint16_t rtdelay_r16;
+         };
+ };
+ union {
+         uint32_t rtdisp;     /* like above, but for max err to primary src */
+         struct {
+                 uint16_t rtdisp_l16;
+                 uint16_t rtdisp_r16;
+         };
+ };
        uint32_t refid;      /* ref clock identifier */
        uint64_t refts;      /* reference timestamp.  local time local clock */
        uint64_t origts;     /* time at which request departed client */
        uint64_t rxts;       /* time at which request arrived at server */
-   uint64_t txts;       /* time at which request departed server */
+ union {
+         uint64_t txts;       /* time at which request departed server */
+         struct {
+                 uint32_t txts_l32;
+                 uint32_t txts_r32;
+         };
+ };
 } ntp_message;
 
 /* this structure holds data about results from querying offset from a peer */
@@ -155,35 +173,68 @@ ntp_server_results *servers=NULL;
 /* ntp wants seconds since 1/1/00, epoch is 1/1/70.  this is the difference */
 #define EPOCHDIFF 0x83aa7e80UL
 
+/* extract double from 32-bit ntp fixed point number, without violating strict aliasing rules */
+double ntp32_to_double(uint32_t n) {
+ double result = 0;
+ if (n) {
+         uint16_t l16 = ntohs((uint16_t) (n & ((1<<16) - 1)));
+         uint16_t r16 = ntohs((uint16_t) (n >> 16));
+         result = l16 + ((double) r16/65536.0);
+ }
+ return result;
+}
 /* extract a 32-bit ntp fixed point number into a double */
-#define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0)
+/* #define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0)*/
+
+/* extract double from 64-bit ntp fixed point number, without violating strict aliasing rules */
+double ntp64_to_double(uint64_t n) {
+ double result = 0;
+ if (n) {
+         uint32_t l32 = ntohl((uint32_t) (n & ((1ul<<32) - 1)));
+         uint32_t r32 = ntohl((uint32_t) (n >> 32));
+         result = (l32 - EPOCHDIFF)
+                + (.00000001*(0.5+(double)(r32/42.94967296)));
+ }
+ return result;
+}
 
 /* likewise for a 64-bit ntp fp number */
-#define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\
-                         (ntohl(L32(n))-EPOCHDIFF) + \
-                         (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\
-                         0)
+/* #define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\
+                          (ntohl(L32(n))-EPOCHDIFF) + \
+                          (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\
+                          0)*/
 
 /* convert a struct timeval to a double */
 #define TVasDOUBLE(x) (double)(x.tv_sec+(0.000001*x.tv_usec))
 
+struct timeval ntp64_to_tv(uint64_t n) {
+ struct timeval result = {};
+ if (n) {
+         uint32_t l32 = ntohl((uint32_t) (n & ((1ul<<32) - 1)));
+         uint32_t r32 = ntohl((uint32_t) (n >> 32));
+         result.tv_sec = l32 - EPOCHDIFF;
+         result.tv_usec = (int)(0.5+(double)(r32/4294.967296));
+ }
+ return result;
+}
+
 /* convert an ntp 64-bit fp number to a struct timeval */
-#define NTP64toTV(n,t) \
+/* #define NTP64toTV(n,t) \
        do{ if(!n) t.tv_sec = t.tv_usec = 0; \
            else { \
                        t.tv_sec=ntohl(L32(n))-EPOCHDIFF; \
                        t.tv_usec=(int)(0.5+(double)(ntohl(R32(n))/4294.967296)); \
                } \
-   }while(0)
+ }while(0) */
 
 /* convert a struct timeval to an ntp 64-bit fp number */
-#define TVtoNTP64(t,n) \
+/* #define TVtoNTP64(t,n) \
        do{ if(!t.tv_usec && !t.tv_sec) n=0x0UL; \
                else { \
                        L32(n)=htonl(t.tv_sec + EPOCHDIFF); \
                        R32(n)=htonl((uint64_t)((4294.967296*t.tv_usec)+.5)); \
                } \
-   } while(0)
+ } while(0) */
 
 /* NTP control message header is 12 bytes, plus any data in the data
  * field, plus null padding to the nearest 32-bit boundary per rfc.
@@ -200,9 +251,9 @@ ntp_server_results *servers=NULL;
 /* calculate the offset of the local clock */
 static inline double calc_offset(const ntp_message *m, const struct timeval *t){
        double client_tx, peer_rx, peer_tx, client_rx;
-   client_tx = NTP64asDOUBLE(m->origts);
-   peer_rx = NTP64asDOUBLE(m->rxts);
-   peer_tx = NTP64asDOUBLE(m->txts);
+ client_tx = ntp64_to_double(m->origts);
+ peer_rx = ntp64_to_double(m->rxts);
+ peer_tx = ntp64_to_double(m->txts);
        client_rx=TVasDOUBLE((*t));
        return (.5*((peer_tx-client_rx)+(peer_rx-client_tx)));
 }
@@ -211,10 +262,10 @@ static inline double calc_offset(const ntp_message *m, const struct timeval *t){
 void print_ntp_message(const ntp_message *p){
        struct timeval ref, orig, rx, tx;
 
-   NTP64toTV(p->refts,ref);
-   NTP64toTV(p->origts,orig);
-   NTP64toTV(p->rxts,rx);
-   NTP64toTV(p->txts,tx);
+ ref = ntp64_to_tv(p->refts);
+ orig = ntp64_to_tv(p->origts);
+ rx = ntp64_to_tv(p->rxts);
+ tx = ntp64_to_tv(p->txts);
 
        printf("packet contents:\n");
        printf("\tflags: 0x%.2x\n", p->flags);
@@ -224,13 +275,13 @@ void print_ntp_message(const ntp_message *p){
        printf("\tstratum = %d\n", p->stratum);
        printf("\tpoll = %g\n", pow(2, p->poll));
        printf("\tprecision = %g\n", pow(2, p->precision));
-   printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay));
-   printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp));
+ printf("\trtdelay = %-.16g\n", ntp32_to_double(p->rtdelay));
+ printf("\trtdisp = %-.16g\n", ntp32_to_double(p->rtdisp));
        printf("\trefid = %x\n", p->refid);
-   printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts));
-   printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts));
-   printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts));
-   printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts));
+ printf("\trefts = %-.16g\n", ntp64_to_double(p->refts));
+ printf("\torigts = %-.16g\n", ntp64_to_double(p->origts));
+ printf("\trxts = %-.16g\n", ntp64_to_double(p->rxts));
+ printf("\ttxts = %-.16g\n", ntp64_to_double(p->txts));
 }
 
 void setup_request(ntp_message *p){
@@ -242,11 +293,17 @@ void setup_request(ntp_message *p){
        MODE_SET(p->flags, MODE_CLIENT);
        p->poll=4;
        p->precision=(int8_t)0xfa;
-   L16(p->rtdelay)=htons(1);
-   L16(p->rtdisp)=htons(1);
+ p->rtdelay_l16=htons(1);
+ p->rtdisp_l16=htons(1);
 
        gettimeofday(&t, NULL);
-   TVtoNTP64(t,p->txts);
+
+ /* This used to use TVtoNTP64 but we ran into issues with strict aliasing/type punning.
+  * We only used the macro in one place, so it's inlined now */
+ if (t.tv_usec || t.tv_sec) {
+         p->txts_l32 = htonl(t.tv_sec + EPOCHDIFF);
+         p->txts_r32 = htonl((uint64_t) ((4294.967296*t.tv_usec)+.5));
+ }
 }
 
 /* select the "best" server from a list of servers, and return its index.
@@ -420,8 +477,8 @@ double offset_request(const char *host, int *status){
                                        printf("offset %.10g\n", servers[i].offset[respnum]);
                                }
                                servers[i].stratum=req[i].stratum;
-                           servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp);
-                           servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay);
+                         servers[i].rtdisp=ntp32_to_double(req[i].rtdisp);
+                         servers[i].rtdelay=ntp32_to_double(req[i].rtdelay);
                                servers[i].waiting--;
                                servers[i].flags=req[i].flags;
                                servers_readable--;
@@ -490,7 +547,7 @@ int process_arguments(int argc, char **argv){
                usage ("\n");
 
        while (1) {
-           c = getopt_long (argc, argv, "Vhv46qw:c:t:H:p:o:d:", longopts, &option);
+         c = getopt_long (argc, argv, "Vhv46qw:c:t:H:p:o:d:C:W:", longopts, &option);
                if (c == -1 || c == EOF || c == 1)
                        break;
 
@@ -722,6 +779,6 @@ void
 print_usage(void)
 {
        printf ("%s\n", _("Usage:"));
-   printf(" %s -H <host> [-4|-6] [-w <warn>] [-c <crit>] [-v verbose] [-o <time offset>] [-d <delay>]\n", progname);
+ printf(" %s -H <host> [-4|-6] [-w <warn>] [-c <crit>] [-v verbose] [-o <time offset>] [-d <delay>] [-W <stratum warn>] [-C <stratum crit>]\n", progname);

Ranjit Ranjan's profile image
Ranjit Ranjan

Hi, 
we will build 2.4.12 and check the result also.

Thanks
Ranjit