root/src/modules-lua/noit/module/ntp.lua

Revision c4bfda41ac34a173e53c73725b8ccc799eb29bba, 13.9 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 2 years ago)

preliminary support for ntp control protocol

  • Property mode set to 100644
Line 
1 -- Copyright (c) 2010, OmniTI Computer Consulting, Inc.
2 -- All rights reserved.
3 --
4 -- Redistribution and use in source and binary forms, with or without
5 -- modification, are permitted provided that the following conditions are
6 -- met:
7 --
8 --     * Redistributions of source code must retain the above copyright
9 --       notice, this list of conditions and the following disclaimer.
10 --     * Redistributions in binary form must reproduce the above
11 --       copyright notice, this list of conditions and the following
12 --       disclaimer in the documentation and/or other materials provided
13 --       with the distribution.
14 --     * Neither the name OmniTI Computer Consulting, Inc. nor the names
15 --       of its contributors may be used to endorse or promote products
16 --       derived from this software without specific prior written
17 --       permission.
18 --
19 -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 -- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 -- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 -- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 -- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 -- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 -- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 -- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 -- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 -- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 -- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 module(..., package.seeall)
32
33 local band,     bor,     bxor,     bnot,     rshift,     lshift
34     = bit.band, bit.bor, bit.bxor, bit.bnot, bit.rshift, bit.lshift
35
36 function onload(image)
37   image.xml_description([=[
38 <module>
39   <name>ntp</name>
40   <description><para>Determine clock skew from an NTP source.</para></description>
41   <loader>lua</loader>
42   <object>noit.module.ntp</object>
43   <moduleconfig />
44   <checkconfig>
45     <parameter name="port"
46                required="optional"
47                default="^123$"
48                allowed="\d+">The port to which we will attempt to speak NTP.</parameter>
49     <parameter name="control"
50                required="optional"
51                default="^false$"
52                allowed="^(?:true|on|false|off)$">Use the NTP control protocol to learn about the other end.  If thise ois not true/on, then this check will determine the NTP telemetry of the target relative to the agent's local time.  If it is true/on, then the agent will request the NTP telemetry of the target regarding it's preferred peer.</parameter>
53   </checkconfig>
54   <examples>
55     <example>
56       <title>Monitor an NTP service</title>
57       <para>The following example monitors an NTP services on 10.1.2.3.</para>
58       <programlisting><![CDATA[
59       <noit>
60         <modules>
61           <loader image="lua" name="lua">
62             <config><directory>/opt/reconnoiter/libexec/modules-lua/?.lua</directory></config>
63           </loader>
64           <module loader="lua" name="ntp" object="noit.module.ntp"/>
65         </modules>
66         <checks>
67           <check uuid="4ee1a1e2-1e60-11df-8e99-bf796ca462ef" module="ntp" target="10.1.2.3" period="60000" timeout="5000"/>
68         </checks>
69       </noit>
70       ]]></programlisting>
71     </example>
72   </examples>
73 </module>]=])
74   return 0
75 end
76
77 function init(module)
78   return 0
79 end
80
81 function config(module, options)
82   return 0
83 end
84
85 function elapsed(check, name, starttime, endtime)
86     local elapsedtime = endtime - starttime
87     local seconds = string.format('%.3f', noit.timeval.seconds(elapsedtime))
88     check.metric_uint32(name, math.floor(seconds * 1000 + 0.5))
89     return seconds
90 end
91
92 function timeval2ntp64(sec, usec)
93    -- packs a timeval into an NTP 64bit double
94    if(sec == 0 and usec == 0) then return string.pack('L', 0) end
95    local l32 = sec + 2208988800
96    local r32 = 4294.967296 * usec + 0.5
97    return string.pack('>II', l32, r32)
98 end
99
100 function parts2timeval(l32, r32)
101   local sec = l32 - 2208988800
102   local usec = (r32 - 0.5) / 4294.967296
103   return noit.timeval.new(sec, usec)
104 end
105
106 function ntp642timeval(s)
107   local cnt, l32, r32 = string.unpack(s, '>II')
108   return parts2timeval(l32, r32)
109 end
110
111 function double2ntp32(v)
112    local l16 = math.floor(v)
113    local r16 = 65536 * (v - l16)
114    return string.pack('>hH', l16, r16)
115 end
116
117 function ntp322double(s)
118    local cnt, l16, r16 = string.unpack(s, '>hH')
119    return l16 + (r16 / 65536)
120 end
121
122 local _sequence = 0
123 function next_sequence()
124   _sequence = _sequence + 1
125   return _sequence
126 end
127
128 function make_ntp_control(req)
129     req.version = req.version or 4 -- NTP version
130     req.mode = req.mode or 6 -- control
131     req.leap = req.leap or 0
132     -- contruct
133     req.li_vn_mode = bor(bor(band(req.mode,0x7),
134                              lshift(band(req.version,0x7),3)),
135                          lshift(band(req.leap,6),0x3))
136     req.op = req.op or 0x01
137     req.r_m_e_op = band(req.op,0x1f)
138     req.sequence = req.sequence or next_sequence()
139     req.status = req.status or 0
140     req.associd = req.associd or 0
141     req.offset = req.offset or 0
142     req.count = req.count or 0
143     local qcnt = req.count
144     req.data = req.data or ''
145     req.pad = ''
146     while (qcnt % 8) ~= 0 do
147         req.pad = req.pad .. '\0'
148     end
149     return string.pack('>bbHHHHH', req.li_vn_mode, req.r_m_e_op, req.sequence,
150                        req.status, req.associd, req.offset, req.count)
151         .. req.data
152         .. req.pad
153          , req.sequence
154 end
155
156 function ntp_control(s, req)
157     local f = { }
158     local req_packet = make_ntp_control(req)
159     s:send(req_packet)
160
161     f.num_frags = 0
162     f.offsets = {}
163     local done = false
164     repeat
165         local rv, buf = s:recv(480) -- max packet
166         local offset, count, cnt
167         -- need at least a header
168         if buf:len() < 12 then return "short packet" end
169
170         f.hdr = buf:sub(1,12)
171         f.buf = buf:sub(13,buf:len())
172         cnt, f.li_vn_mode, f.r_m_e_op, f.sequence,
173             f.status, f.associd, offset, count = string.unpack(f.hdr, '>bbHHHHH')
174
175         f.mode = band(f.li_vn_mode, 0x7)
176         f.version = band(rshift(f.li_vn_mode, 3), 0x7)
177         f.leap = band(rshift(f.li_vn_mode, 6), 0x3)
178         f.op = band(f.r_m_e_op, 0x1f)
179         f.is_more = band(f.r_m_e_op, 0x20) ~= 0
180         f.is_error = band(f.r_m_e_op, 0x40) ~= 0
181         f.is_response = band(f.r_m_e_op, 0x80) ~= 0
182
183         -- validate
184         if f.version > 4 or f.version < 1 then return "bad version" end
185         if f.mode ~= 6 then return "not a control packet" end
186         if not f.is_response then return "not a response packet" end
187         if req.sequence ~= f.sequence then return "sequence mismatch" end
188         if req.op ~= f.op then return "opcode mismatch " .. req.op .. " != " .. f.op  end
189         if f.is_error then
190             return "error: "
191                 .. bit.tohex(band(rshift(f.status, 8), 0xff), 2)
192         end
193         local expect = band(band(12 + count + 3, bnot(3)),0xffff)
194         -- must be aligned on a word boundary
195         if band(buf:len(), 3) ~= 0 then return "bad padding" end
196         if expect > buf:len() then
197             return "bad payload size " .. expect .. " vs. " .. buf:len()
198         end
199         if expect < buf:len() then
200             -- auth
201             return "auth unsupported " .. expect .. " vs. " .. buf:len()
202         end
203         if f.num_frags > 23 then return "too many fragments" end
204         if count < f.buf:len() then
205             f.buf = f.buf:sub(1,count)
206         end
207         f.offsets[offset] = f.buf
208         done = not f.is_more
209     until done
210
211     f.data = ''
212     for i, buf in pairs(f.offsets) do f.data = f.data .. buf end
213     return nil, f
214 end
215
216
217 function make_ntp_request(fin)
218     local f = fin or { }
219                              --    ALARM         V4      CLIENT
220     f.flags = f.flags or 227 -- (0x03 << 6) | (4 << 3) | 3
221     f.stratum = f.stratum or 0
222     f.poll = f.poll or 4
223     f.precision = f.precision or 250
224     f.rtdisp = f.rtdisp or 1
225     f.rtdelay = f.rtdelay or 1
226     f.refid = f.refid or 0
227     return string.pack('>bbcc', f.flags, f.stratum, f.poll, f.precision)
228         .. double2ntp32(f.rtdisp)
229         .. double2ntp32(f.rtdelay)
230         .. string.pack('>I', f.refid)
231         .. timeval2ntp64(0,0)
232         .. timeval2ntp64(0,0)
233         .. timeval2ntp64(0,0)
234         .. timeval2ntp64(noit.gettimeofday())
235 end
236
237 function decode_ntp_message(b)
238     local cnt
239     -- not as easy as a simple unpack
240     local ntp_hdr = string.sub(b,1,4)
241     local ntp_rtdelay = string.sub(b,5,8)
242     local ntp_rtdisp = string.sub(b,9,12)
243     local ntp_refid = string.sub(b,13,16)
244     local ntp_refts = string.sub(b,17,24)
245     local ntp_origts = string.sub(b,25,32)
246     local ntp_rxts = string.sub(b,33,40)
247     local ntp_txts = string.sub(b,41,48)
248     local r = { }
249     cnt, r.flags, r.stratum, r.poll, r.precision =
250         string.unpack(ntp_hdr, '>bbcc')
251     r.rtdelay = ntp322double(ntp_rtdelay)
252     r.rtdisp = ntp322double(ntp_rtdisp)
253     cnt, r.refid = string.unpack(ntp_refid, '>I')
254     r.refts = ntp642timeval(ntp_refts)
255     r.origts = ntp642timeval(ntp_origts)
256     r.rxts = ntp642timeval(ntp_rxts)
257     r.txts = ntp642timeval(ntp_txts)
258     return r
259 end
260
261 function calculate_offset(response, now)
262     local there_and = noit.timeval.seconds(response.rxts - response.origts)
263     local back_again = noit.timeval.seconds(response.txts - now)
264     return ( there_and + back_again ) / 2.0
265 end
266
267 function initiate_control(module, check, s)
268     local err, result = ntp_control(s, {})
269     local associations = {}
270     if err ~= nil then
271         check.status(err)
272         return
273     end
274     local i = 0
275     local len, numassoc = result.data:len(), result.data:len() / 4;
276     local use_id = 0
277     while len > 0 do
278       local cnt, associd, status = string.unpack(result.data:sub(1+4*i, 4+4*i), '>HH')
279       i = i + 1
280       len = len - 4;
281       associations[i] = { }
282       associations[i].associd = associd
283       associations[i].status = status
284       if result.version > 1 then
285           associations[i].flash = band(rshift(status,8),0x7)
286           associations[i].prefer = band(associations[i].flash,0x2) ~= 0
287           associations[i].burst = band(associations[i].flash,0x4) ~= 0
288           associations[i].volley = band(associations[i].flash,0x1) ~= 0
289       else
290           associations[i].flash = band(rshift(status,8),0x3)
291           associations[i].prefer = band(associations[i].flash,0x1) ~= 0
292           associations[i].burst = band(associations[i].flash,0x2) ~= 0
293           associations[i].volley = false
294       end
295       if(associations[i].prefer) then use_id = i end
296     end
297     if(use_id < 1) then use_id = 1 end
298
299     err, result = ntp_control(s, { associd = associations[use_id].associd })
300     if err ~= nil then
301         check.status(err)
302         return
303     end
304     local vars = {}
305     for k, v in string.gmatch(result.data, "%s*([^,]+)=([^,]+)%s*,%s*") do
306        vars[k] = v;
307        noit.log("debug", "ntp: %s = %s\n", k, v)
308     end
309     check.metric_string('clock_name', vars.srcadr)
310     check.metric_int32('stratum', tonumber(vars.stratum))
311
312     -- parse the rec and the reftime
313     local rec_l, rec_h = vars.rec:match('^0x([%da-fA-F]+)%.([%da-fA-F]+)$')
314     rec_l, rec_h = tonumber("0x"..rec_l), tonumber("0x"..rec_h)
315     local rec = parts2timeval(rec_l, rec_h)
316
317     local reftime_l, reftime_h = vars.reftime:match('^0x([%da-fA-F]+)%.([%da-fA-F]+)$')
318     reftime_l, reftime_h = tonumber("0x"..reftime_l), tonumber("0x"..reftime_h)
319     local reftime = parts2timeval(reftime_l, reftime_h)
320
321     local when = nil
322     if rec.sec ~= 0 then when = noit.timeval.seconds(noit.timeval.now() - rec)
323     elseif reftime.sec ~= 0 then when = noit.timeval.seconds(noit.timeval.now() - reftime)
324     end
325     check.metric_double('when', when)
326     local poll = math.pow(2, math.max(math.min(vars.ppoll or 17, vars.hpoll or 17), 3))
327     check.metric_uint32('poll', poll)
328     check.metric_double('delay', tonumber(vars.delay))
329     check.metric_double('offset', tonumber(vars.offset))
330     check.metric_double('jitter', tonumber(vars.jitter))
331     check.metric_double('dispersion', tonumber(vars.dispersion))
332     check.metric_double('xleave', tonumber(vars.xleave))
333     check.metric_int32('peers', numassoc)
334     check.status("ntp successful")
335     check.available()
336     check.good()
337 end
338
339 function initiate(module, check)
340     local s = noit.socket(check.target_ip, 'udp')
341     local status = { }
342     local cnt = check.config.count or 4
343
344     check.unavailable()
345     check.bad()
346
347     s:connect(check.target_ip, check.config.port or 123)
348     status.responses = 0
349     status.avg_offset = 0
350     status.offset = { }
351
352     if check.config.control == "true" or check.config.control == "on" then
353         return initiate_control(module, check, s)
354     end
355
356     for i = 1,cnt do
357         local req = make_ntp_request()
358         s:send(req)
359         local rv, buf = s:recv(48)
360         local now = noit.timeval.now()
361         local response = decode_ntp_message(buf)
362         local offset = calculate_offset(response, now)
363         if offset ~= nil then
364             table.insert(status.offset, offset)
365             status.avg_offset = status.avg_offset + offset
366             status.stratum = response.stratum
367             status.poll = math.pow(2, response.poll)
368             status.precision = math.pow(2, response.precision)
369             status.rtdisp = response.rtdisp
370             status.rtdelay = response.rtdelay
371             status.responses = status.responses + 1
372         end
373         noit.sleep(0.1)
374     end
375
376     status.avg_offset = status.avg_offset / # status.offset
377     check.status( cnt .. '/' .. status.responses )
378
379     if # status.offset > 0 then
380         check.metric_double('offset', status.avg_offset)
381         check.metric_uint32('requests', cnt)
382         check.metric_uint32('responses', status.responses)
383         check.metric_uint32('stratum', status.stratum)
384         check.metric_int32('poll', status.poll)
385         check.metric_double('precision', status.precision)
386         check.metric_double('rtdisp', status.rtdisp)
387         check.metric_double('rtdelay', status.rtdelay)
388         check.available()
389         check.good()
390     end
391 end
Note: See TracBrowser for help on using the browser.