root/src/modules-lua/noit/module/ntp.lua

Revision ed271b26d31bd852807033fd23086db565b6106b, 14.4 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 3 years ago)

make 'offset' metric consistent.

offset in 'ntp' is seconds, offset in 'ntp control' is milliseconds.
so as to reduce confusion, provide both offset and offset_ms so that
we're clear and people relying on a specific unit don't have their
hopes dashed.

  • Property mode set to 100644
Line 
1 -- Copyright (c) 2010, OmniTI Computer Consulting, Inc.
2 -- All rights reserved.
3 --
4 -- Redistribution and use in source and binary forms, with or without
5 -- modification, are permitted provided that the following conditions are
6 -- met:
7 --
8 --     * Redistributions of source code must retain the above copyright
9 --       notice, this list of conditions and the following disclaimer.
10 --     * Redistributions in binary form must reproduce the above
11 --       copyright notice, this list of conditions and the following
12 --       disclaimer in the documentation and/or other materials provided
13 --       with the distribution.
14 --     * Neither the name OmniTI Computer Consulting, Inc. nor the names
15 --       of its contributors may be used to endorse or promote products
16 --       derived from this software without specific prior written
17 --       permission.
18 --
19 -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 -- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 -- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 -- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 -- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 -- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 -- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 -- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 -- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 -- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 module(..., package.seeall)
32
33 local band,     bor,     bxor,     bnot,     rshift,     lshift
34     = bit.band, bit.bor, bit.bxor, bit.bnot, bit.rshift, bit.lshift
35
36 function onload(image)
37   image.xml_description([=[
38 <module>
39   <name>ntp</name>
40   <description><para>Determine clock skew from an NTP source.</para></description>
41   <loader>lua</loader>
42   <object>noit.module.ntp</object>
43   <moduleconfig />
44   <checkconfig>
45     <parameter name="port"
46                required="optional"
47                default="^123$"
48                allowed="\d+">The port to which we will attempt to speak NTP.</parameter>
49     <parameter name="control"
50                required="optional"
51                default="^false$"
52                allowed="^(?:true|on|false|off)$">Use the NTP control protocol to learn about the other end.  If thise ois not true/on, then this check will determine the NTP telemetry of the target relative to the agent's local time.  If it is true/on, then the agent will request the NTP telemetry of the target regarding it's preferred peer.</parameter>
53   </checkconfig>
54   <examples>
55     <example>
56       <title>Monitor an NTP service</title>
57       <para>The following example monitors an NTP services on 10.1.2.3.</para>
58       <programlisting><![CDATA[
59       <noit>
60         <modules>
61           <loader image="lua" name="lua">
62             <config><directory>/opt/reconnoiter/libexec/modules-lua/?.lua</directory></config>
63           </loader>
64           <module loader="lua" name="ntp" object="noit.module.ntp"/>
65         </modules>
66         <checks>
67           <check uuid="4ee1a1e2-1e60-11df-8e99-bf796ca462ef" module="ntp" target="10.1.2.3" period="60000" timeout="5000"/>
68         </checks>
69       </noit>
70       ]]></programlisting>
71     </example>
72   </examples>
73 </module>]=])
74   return 0
75 end
76
77 function init(module)
78   return 0
79 end
80
81 function config(module, options)
82   return 0
83 end
84
85 function elapsed(check, name, starttime, endtime)
86     local elapsedtime = endtime - starttime
87     local seconds = string.format('%.3f', noit.timeval.seconds(elapsedtime))
88     check.metric_uint32(name, math.floor(seconds * 1000 + 0.5))
89     return seconds
90 end
91
92 function timeval2ntp64(sec, usec)
93    -- packs a timeval into an NTP 64bit double
94    if(sec == 0 and usec == 0) then return string.pack('L', 0) end
95    local l32 = sec + 2208988800
96    local r32 = 4294.967296 * usec + 0.5
97    return string.pack('>II', l32, r32)
98 end
99
100 function parts2timeval(l32, r32)
101   local sec = l32 - 2208988800
102   local usec = (r32 - 0.5) / 4294.967296
103   return noit.timeval.new(sec, usec)
104 end
105
106 function ntp642timeval(s)
107   local cnt, l32, r32 = string.unpack(s, '>II')
108   return parts2timeval(l32, r32)
109 end
110
111 function double2ntp32(v)
112    local l16 = math.floor(v)
113    local r16 = 65536 * (v - l16)
114    return string.pack('>hH', l16, r16)
115 end
116
117 function ntp322double(s)
118    local cnt, l16, r16 = string.unpack(s, '>hH')
119    return l16 + (r16 / 65536)
120 end
121
122 local _sequence = 0
123 function next_sequence()
124   _sequence = _sequence + 1
125   return _sequence
126 end
127
128 function make_ntp_control(req)
129     req.version = req.version or 2 -- NTP version
130     req.mode = req.mode or 6 -- control
131     req.leap = req.leap or 0
132     -- contruct
133     req.li_vn_mode = bor(bor(band(req.mode,0x7),
134                              lshift(band(req.version,0x7),3)),
135                          lshift(band(req.leap,6),0x3))
136     req.op = req.op or 0x01
137     req.r_m_e_op = band(req.op,0x1f)
138     req.sequence = req.sequence or next_sequence()
139     req.status = req.status or 0
140     req.associd = req.associd or 0
141     req.offset = req.offset or 0
142     req.count = req.count or 0
143     local qcnt = req.count
144     req.data = req.data or ''
145     req.pad = ''
146     while (qcnt % 8) ~= 0 do
147         req.pad = req.pad .. '\0'
148     end
149     return string.pack('>bbHHHHH', req.li_vn_mode, req.r_m_e_op, req.sequence,
150                        req.status, req.associd, req.offset, req.count)
151         .. req.data
152         .. req.pad
153          , req.sequence
154 end
155
156 function ntp_control(s, req)
157     local f = { }
158     local req_packet = make_ntp_control(req)
159     local error = nil
160     s:send(req_packet)
161
162     f.num_frags = 0
163     f.offsets = {}
164     local done = false
165     repeat
166         local lerr = nil
167         local rv, buf = s:recv(480) -- max packet
168         local offset, count, cnt
169         -- need at least a header
170         if buf:len() < 12 then lerr = "short packet" end
171         if lerr == nil then
172           f.hdr = buf:sub(1,12)
173           f.buf = buf:sub(13,buf:len())
174           cnt, f.li_vn_mode, f.r_m_e_op, f.sequence,
175               f.status, f.associd, offset, count = string.unpack(f.hdr, '>bbHHHHH')
176
177           f.mode = band(f.li_vn_mode, 0x7)
178           f.version = band(rshift(f.li_vn_mode, 3), 0x7)
179           f.leap = band(rshift(f.li_vn_mode, 6), 0x3)
180           f.op = band(f.r_m_e_op, 0x1f)
181           f.is_more = band(f.r_m_e_op, 0x20) ~= 0
182           f.is_error = band(f.r_m_e_op, 0x40) ~= 0
183           f.is_response = band(f.r_m_e_op, 0x80) ~= 0
184
185           -- validate
186           if f.version > 4 or f.version < 1 then lerr = "bad version" end
187           if f.mode ~= 6 then lerr = "not a control packet" end
188           if not f.is_response then lerr = "not a response packet" end
189           if req.sequence ~= f.sequence then lerr = "sequence mismatch" end
190           if req.op ~= f.op then lerr = "opcode mismatch " .. req.op .. " != " .. f.op  end
191           if f.is_error then
192               return "error: "
193                   .. bit.tohex(band(rshift(f.status, 8), 0xff), 2)
194           end
195         end
196
197         if lerr == nil then
198           local expect = band(band(12 + count + 3, bnot(3)),0xffff)
199           -- must be aligned on a word boundary
200           if band(buf:len(), 3) ~= 0 then lerr = "bad padding" end
201           if expect > buf:len() then
202             lerr = "bad payload size " .. expect .. " vs. " .. buf:len()
203           end
204           if expect < buf:len() then
205             -- auth
206             return "auth unsupported " .. expect .. " vs. " .. buf:len()
207           end
208         end
209         if lerr == nil then
210           if f.num_frags > 23 then return "too many fragments" end
211           if count < f.buf:len() then
212             f.buf = f.buf:sub(1,count)
213           end
214           f.offsets[offset] = f.buf
215           done = not f.is_more
216         end
217         if lerr ~= nil then
218           noit.log("debug", "ntp error:%s\n", lerr)
219         end
220         error = lerr
221     until done
222
223     f.data = ''
224     for i, buf in pairs(f.offsets) do f.data = f.data .. buf end
225     return error, f
226 end
227
228
229 function make_ntp_request(fin)
230     local f = fin or { }
231                              --    ALARM         V4      CLIENT
232     f.flags = f.flags or 227 -- (0x03 << 6) | (4 << 3) | 3
233     f.stratum = f.stratum or 0
234     f.poll = f.poll or 4
235     f.precision = f.precision or 250
236     f.rtdisp = f.rtdisp or 1
237     f.rtdelay = f.rtdelay or 1
238     f.refid = f.refid or 0
239     return string.pack('>bbcc', f.flags, f.stratum, f.poll, f.precision)
240         .. double2ntp32(f.rtdisp)
241         .. double2ntp32(f.rtdelay)
242         .. string.pack('>I', f.refid)
243         .. timeval2ntp64(0,0)
244         .. timeval2ntp64(0,0)
245         .. timeval2ntp64(0,0)
246         .. timeval2ntp64(noit.gettimeofday())
247 end
248
249 function decode_ntp_message(b)
250     local cnt
251     -- not as easy as a simple unpack
252     local ntp_hdr = string.sub(b,1,4)
253     local ntp_rtdelay = string.sub(b,5,8)
254     local ntp_rtdisp = string.sub(b,9,12)
255     local ntp_refid = string.sub(b,13,16)
256     local ntp_refts = string.sub(b,17,24)
257     local ntp_origts = string.sub(b,25,32)
258     local ntp_rxts = string.sub(b,33,40)
259     local ntp_txts = string.sub(b,41,48)
260     local r = { }
261     cnt, r.flags, r.stratum, r.poll, r.precision =
262         string.unpack(ntp_hdr, '>bbcc')
263     r.rtdelay = ntp322double(ntp_rtdelay)
264     r.rtdisp = ntp322double(ntp_rtdisp)
265     cnt, r.refid = string.unpack(ntp_refid, '>I')
266     r.refts = ntp642timeval(ntp_refts)
267     r.origts = ntp642timeval(ntp_origts)
268     r.rxts = ntp642timeval(ntp_rxts)
269     r.txts = ntp642timeval(ntp_txts)
270     return r
271 end
272
273 function calculate_offset(response, now)
274     local there_and = noit.timeval.seconds(response.rxts - response.origts)
275     local back_again = noit.timeval.seconds(response.txts - now)
276     return ( there_and + back_again ) / 2.0
277 end
278
279 function initiate_control(module, check, s)
280     local err, result = ntp_control(s, {})
281     local associations = {}
282     if err ~= nil then
283         check.status(err)
284         return
285     end
286     local i = 0
287     local len, numassoc = result.data:len(), result.data:len() / 4;
288     local use_id = 0
289     while len > 0 do
290       local cnt, associd, status = string.unpack(result.data:sub(1+4*i, 4+4*i), '>HH')
291       i = i + 1
292       len = len - 4;
293       associations[i] = { }
294       associations[i].associd = associd
295       associations[i].status = status
296       if result.version > 1 then
297           associations[i].flash = band(rshift(status,8),0x7)
298           associations[i].prefer = band(associations[i].flash,0x2) ~= 0
299           associations[i].burst = band(associations[i].flash,0x4) ~= 0
300           associations[i].volley = band(associations[i].flash,0x1) ~= 0
301       else
302           associations[i].flash = band(rshift(status,8),0x3)
303           associations[i].prefer = band(associations[i].flash,0x1) ~= 0
304           associations[i].burst = band(associations[i].flash,0x2) ~= 0
305           associations[i].volley = false
306       end
307       if(associations[i].prefer) then use_id = i end
308     end
309     if(use_id < 1) then use_id = 1 end
310
311     err, result = ntp_control(s, { associd = associations[use_id].associd })
312     if err ~= nil then
313         check.status(err)
314         return
315     end
316     local vars = {}
317     for k, v in string.gmatch(result.data, "%s*([^,]+)=([^,]+)%s*,%s*") do
318        vars[k] = v;
319        noit.log("debug", "ntp: %s = %s\n", k, v)
320     end
321     check.metric_string('clock_name', vars.srcadr)
322     check.metric_int32('stratum', tonumber(vars.stratum))
323
324     -- parse the rec and the reftime
325     local rec_l, rec_h = vars.rec:match('^0x([%da-fA-F]+)%.([%da-fA-F]+)$')
326     rec_l, rec_h = tonumber("0x"..rec_l), tonumber("0x"..rec_h)
327     local rec = parts2timeval(rec_l, rec_h)
328
329     local reftime_l, reftime_h = vars.reftime:match('^0x([%da-fA-F]+)%.([%da-fA-F]+)$')
330     reftime_l, reftime_h = tonumber("0x"..reftime_l), tonumber("0x"..reftime_h)
331     local reftime = parts2timeval(reftime_l, reftime_h)
332
333     local when = nil
334     if rec.sec ~= 0 then when = noit.timeval.seconds(noit.timeval.now() - rec)
335     elseif reftime.sec ~= 0 then when = noit.timeval.seconds(noit.timeval.now() - reftime)
336     end
337     check.metric_double('when', when)
338     local poll = math.pow(2, math.max(math.min(vars.ppoll or 17, vars.hpoll or 17), 3))
339     check.metric_uint32('poll', poll)
340     check.metric_double('delay', tonumber(vars.delay))
341     check.metric_double('offset', tonumber(vars.offset) / 1000)
342     check.metric_double('offset_ms', tonumber(vars.offset))
343     check.metric_double('jitter', tonumber(vars.jitter))
344     check.metric_double('dispersion', tonumber(vars.dispersion))
345     check.metric_double('xleave', tonumber(vars.xleave))
346     check.metric_int32('peers', numassoc)
347     check.status("ntp successful")
348     check.available()
349     check.good()
350 end
351
352 function initiate(module, check)
353     local s = noit.socket(check.target_ip, 'udp')
354     local status = { }
355     local cnt = check.config.count or 4
356
357     check.unavailable()
358     check.bad()
359
360     s:connect(check.target_ip, check.config.port or 123)
361     status.responses = 0
362     status.avg_offset = 0
363     status.offset = { }
364
365     if check.config.control == "true" or check.config.control == "on" then
366         return initiate_control(module, check, s)
367     end
368
369     for i = 1,cnt do
370         local req = make_ntp_request()
371         s:send(req)
372         local rv, buf = s:recv(48)
373         local now = noit.timeval.now()
374         local response = decode_ntp_message(buf)
375         local offset = calculate_offset(response, now)
376         if offset ~= nil then
377             table.insert(status.offset, offset)
378             status.avg_offset = status.avg_offset + offset
379             status.stratum = response.stratum
380             status.poll = math.pow(2, response.poll)
381             status.precision = math.pow(2, response.precision)
382             status.rtdisp = response.rtdisp
383             status.rtdelay = response.rtdelay
384             status.responses = status.responses + 1
385         end
386         noit.sleep(0.1)
387     end
388
389     status.avg_offset = status.avg_offset / # status.offset
390     check.status( cnt .. '/' .. status.responses )
391
392     if # status.offset > 0 then
393         check.metric_double('offset', status.avg_offset)
394         check.metric_double('offset_ms', status.avg_offset * 1000.0)
395         check.metric_uint32('requests', cnt)
396         check.metric_uint32('responses', status.responses)
397         check.metric_uint32('stratum', status.stratum)
398         check.metric_int32('poll', status.poll)
399         check.metric_double('precision', status.precision)
400         check.metric_double('rtdisp', status.rtdisp)
401         check.metric_double('rtdelay', status.rtdelay)
402         check.available()
403         check.good()
404     end
405 end
Note: See TracBrowser for help on using the browser.