runsisi's

technical notes

chrony

2020-06-16 runsisi#devops

构建

$ sudo apt install bison asciidoctor
$ ./configure --prefix=/usr --enable-debug --enable-scfilter --enable-ntp-signd

命令行

chronyd 服务端

$ ./chronyd --version
chronyd (chrony) version DEVELOPMENT (+CMDMON +NTP +REFCLOCK +RTC -PRIVDROP +SCFILTER +SECHASH +SIGND +ASYNCDNS +IPV6 +DEBUG)
$ sudo ./chronyd -dd

chronyc 客户端

$ ./chronyc --version
chronyc (chrony) version DEVELOPMENT (+READLINE +IPV6 +DEBUG)
$ sudo ./chronyc makestep
200 OK
# chronyc sources -v
210 Number of sources = 1

  .-- Source mode  '^' = server, '=' = peer, '#' = local clock.
 / .- Source state '*' = current synced, '+' = combined , '-' = not combined,
| /   '?' = unreachable, 'x' = time may be in error, '~' = time too variable.
||                                                 .- xxxx [ yyyy ] +/- zzzz
||      Reachability register (octal) -.           |  xxxx = adjusted offset,
||      Log2(Polling interval) --.      |          |  yyyy = measured offset,
||                                \     |          |  zzzz = estimated error.
||                                 |    |           \
MS Name/IP address         Stratum Poll Reach LastRx Last sample
===============================================================================
^* 192.168.9.30                 10   6   377    58    +11us[  +20us] +/-   22us

$ chronyc tracking
Reference ID    : 0A1E0169 (10.30.1.105)
Stratum         : 2
Ref time (UTC)  : Sun May 24 21:56:57 2020
System time     : 0.000000021 seconds fast of NTP time
Last offset     : +0.001941956 seconds
RMS offset      : 0.001676591 seconds
Frequency       : 0.450 ppm fast
Residual freq   : +0.008 ppm
Skew            : 0.124 ppm
Root delay      : 0.054545589 seconds
Root dispersion : 0.012493405 seconds
Update interval : 17631.7 seconds
Leap status     : Normal

系统时间相关命令

$ timedatectl status
                      Local time: Mon 2020-05-25 14:03:40 CST
                  Universal time: Mon 2020-05-25 06:03:40 UTC
                        RTC time: Mon 2020-05-25 06:03:40
                       Time zone: Asia/Shanghai (CST, +0800)
       System clock synchronized: yes
systemd-timesyncd.service active: no
                 RTC in local TZ: no

# hwclock -r
2020-05-25 14:18:07.310722+0800

配置项

orphan
minpoll maxpoll iburst minstratum
stratumweight
initstepslew
makestep
maxchange
rtcsync

实现

// client.c

process_cmd_sources
// main.c/main

if (ref_mode == REF_ModeNormal && CNF_GetInitSources() > 0) {
  ref_mode = REF_ModeInitStepSlew;
}

REF_SetModeEndHandler(reference_mode_end);
REF_SetMode(ref_mode);

// main.c/reference_mode_end
case REF_ModeInitStepSlew:
  /* Switch to the normal mode, the delay is used to prevent polling
     interval shorter than the burst interval if some configured servers
     were used also for initstepslew */
  SCH_AddTimeoutByDelay(2.0, post_init_ntp_hook, NULL);

// main.c/post_init_ntp_hook
if (ref_mode == REF_ModeInitStepSlew) {
  /* Remove the initstepslew sources and set normal mode */
  NSR_RemoveAllSources();
  ref_mode = REF_ModeNormal;
  REF_SetMode(ref_mode);
}

CNF_AddSources();
CNF_AddBroadcasts();

NSR_SetSourceResolvingEndHandler(ntp_source_resolving_end);
NSR_ResolveSources();

// main.c/ntp_source_resolving_end
NSR_SetSourceResolvingEndHandler(NULL);

NSR_StartSources();
  for (i = 0; i < ARR_GetSize(records); i++) {
    NCR_StartInstance(get_record(i)->data);
  }

NSR_AutoStartSources();
  auto_start_sources = 1;
// reference.c

REF_Initialise
  enable_local_stratum = CNF_AllowLocalReference(&local_stratum, &local_orphan, &local_distance);
  CNF_GetMakeStep(&make_step_limit, &make_step_threshold);
  CNF_GetMaxChange(&max_offset_delay, &max_offset_ignore, &max_offset);
// sched.c

SCH_MainLoop
  while (!need_to_exit) {
    LCL_ReadRawTime(&now);
    LCL_CookTime(&now, &cooked, &err);
    check_current_time
      // if local time jumped
      LCL_NotifyExternalTimeStep
        invoke_parameter_change_handlers(LCL_ChangeUnknownStep)
          handle_step
            /* Reset offset and slewing */
            slew_start = *raw;
            offset_register = 0.0;
            update_slew();
        lcl_InvokeDispersionNotifyHandlers
          add_dispersion
// sources.c

SRC_SelectSource
  for (i = 0; i < n_sources; i++) {
    SST_GetSelectionData
  }
  log_selection_message

  sources[selected_source_index]->status = SRC_SELECTED;

  SST_GetTrackingData
  combine_sources
  REF_SetReference
    LCL_ReadRawTime(&raw_now);
    LCL_GetOffsetCorrection(&raw_now, &uncorrected_offset, NULL);
      offset_convert
    is_offset_ok(offset)
    if (is_step_limit_reached(our_offset, uncorrected_offset)) {
      /* Cancel the uncorrected offset and correct the total offset by step */
      accumulate_offset = uncorrected_offset;
      step_offset = our_offset - uncorrected_offset;
    } else {
      accumulate_offset = our_offset;
      step_offset = 0.0;
    }
    if (fabs(skew) < max_update_skew || leap == LEAP_Unsynchronised) {
      LCL_AccumulateFrequencyAndOffset
        LCL_ReadRawTime(&raw);
        LCL_CookTime(&raw, &cooked, NULL);
        set_frequency
          update_slew
        accrue_offset
          update_slew
        invoke_parameter_change_handlers(LCL_ChangeAdjust)
          handle_slew
    } else {
      LCL_AccumulateOffset(accumulate_offset, correction_rate);
        LCL_ReadRawTime(&raw);
        LCL_CookTime(&raw, &cooked, NULL);
        accrue_offset
          update_slew
        invoke_parameter_change_handlers(LCL_ChangeAdjust)
          handle_slew
    }
    update_leap_status
    maybe_log_offset
    if (step_offset != 0.0) {
      LCL_ApplyStepOffset(step_offset)
    }
    LCL_SetSyncStatus
// cmdmon.c

handle_make_step
  LCL_MakeStep
    LCL_ReadRawTime(&raw);
      gettimeofday
    LCL_GetOffsetCorrection(&raw, &correction, NULL);
      offset_convert
    LCL_AccumulateOffset(correction, 0.0);
    LCL_ApplyStepOffset(-correction)
      LCL_ReadRawTime
      LCL_CookTime
      apply_step_offset
      invoke_parameter_change_handlers(LCL_ChangeStep)
        handle_step
          UTI_AddDoubleToTimespec(&slew_start, -doffset, &slew_start);

maxchange 对 makestep 的影响

w/o maxchange
# chronyd -dd
2020-05-30T03:18:10Z ntp_io_linux.c:653:(NIO_Linux_ProcessMessage) Received 90 (48) bytes from error queue for 10.x.x.x:123 fd=8 if=2 tss=1
2020-05-30T03:18:10Z ntp_core.c:2122:(update_tx_timestamp) Updated TX timestamp delay=0.000119263
2020-05-30T03:18:10Z sys_generic.c:316:(offset_convert) offset_convert now=1590808690 slew_start=1590808676 duration=1.450637e+01
2020-05-30T03:18:10Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-6.819696e-11
2020-05-30T03:18:10Z sys_generic.c:316:(offset_convert) offset_convert now=1590808690 slew_start=1590808676 duration=1.450629e+01
2020-05-30T03:18:10Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-6.819659e-11
2020-05-30T03:18:10Z ntp_io.c:666:(process_message) Received 48 bytes from 10.x.x.x:123 to 192.168.137.3 fd=8 if=2 tss=1 delay=0.000077490
2020-05-30T03:18:10Z ntp_core.c:1686:(receive_packet) NTP packet lvm=44 stratum=1 poll=4 prec=-20 root_delay=0.000000 root_disp=0.000336 refid=47505300 []
2020-05-30T03:18:10Z ntp_core.c:1691:(receive_packet) reference=1590812048.739173552 origin=4197820964.746388533 receive=1590812055.976486915 transmit=1590812055.977642707
2020-05-30T03:18:10Z ntp_core.c:1693:(receive_packet) offset=3365.341252930 delay=0.071466513 dispersion=0.000146 root_delay=0.071467 root_dispersion=0.000482
2020-05-30T03:18:10Z ntp_core.c:1696:(receive_packet) remote_interval=0.001155792 local_interval=0.072622305 response_time=0.001155792 txs=K rxs=K
2020-05-30T03:18:10Z ntp_core.c:1700:(receive_packet) test123=111 test567=111 testABCD=1111 kod_rate=0 interleaved=0 presend=0 valid=1 good=1 updated=1
2020-05-30T03:18:10Z sources.c:356:(SRC_AccumulateSample) ip=[10.x.x.x] t=1590808690.635811881 ofs=-3365.341253 del=0.071467 disp=0.000482 str=1
2020-05-30T03:18:10Z sourcestats.c:583:(SST_DoNewRegression) off=-3.365339e+03 freq=-1.578745e-03 skew=2.485036e-01 n=3 bs=0 runs=3 asym=0.000000 arun=0
2020-05-30T03:18:10Z sourcestats.c:690:(SST_GetSelectionData) n=3 off=-3365.341310 dist=0.045258 sd=0.001533 first_ago=6.192836 last_ago=0.036389 selok=1
2020-05-30T03:18:10Z sources.c:772:(SRC_SelectSource) badstat=0 sel=1 badstat_reach=0 sel_reach=b max_reach_ago=6.192836
2020-05-30T03:18:10Z sources.c:1011:(SRC_SelectSource) select score=22.046852 refid=a1e0169 match_refid=a1e0169 status=0 dist=0.045358
2020-05-30T03:18:10Z sources.c:457:(log_selection_message) Selected source 10.x.x.x
2020-05-30T03:18:10Z sourcestats.c:721:(SST_GetTrackingData) n=3 freq=-0.001579 (-1578.745ppm) skew=0.248504 (248503.564ppm) avoff=-3365.339239 offsd=0.002007 disp=0.000482
2020-05-30T03:18:10Z sys_generic.c:316:(offset_convert) offset_convert now=1590808690 slew_start=1590808676 duration=1.450716e+01
2020-05-30T03:18:10Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-6.820071e-11
2020-05-30T03:18:10Z reference.c:981:(REF_SetReference) raw_now=1590808690 now=1590808690 uncorrected_offset=-6.820071e-11
2020-05-30T03:18:10Z reference.c:1070:(REF_SetReference) Skew 0.248504 too large to track, offset=-3365.339298
2020-05-30T03:18:10Z sys_generic.c:316:(offset_convert) offset_convert now=1590808690 slew_start=1590808676 duration=1.450735e+01
2020-05-30T03:18:10Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-6.820159e-11
2020-05-30T03:18:10Z sys_generic.c:290:(accrue_offset) on enter: offset=-3.365339e+03 offset_register=0.000000e+00 correction_rate=0.000000e+00
2020-05-30T03:18:10Z sys_generic.c:296:(accrue_offset) before update_slew: offset=-3.365339e+03 offset_register=-3.365339e+03 correction_rate=0.000000e+00
2020-05-30T03:18:10Z sys_generic.c:183:(update_slew) now=1590808690 slew_start=1590808676 slew_freq=-4.701174e-12 offset_register=-3.365339e+03
2020-05-30T03:18:10Z sys_generic.c:253:(update_slew) slew_start=1590808690 slew offset=-3.365339e+03 corr_rate=0.000000e+00 base_freq=0.523961 total_freq=-83332.765701 slew_freq=-7.692308e-02 duration=10000.000000 slew_error=8.461538e-04
2020-05-30T03:18:10Z sys_generic.c:301:(accrue_offset) on exit: offset=-3.365339e+03 offset_register=-3.365339e+03 correction_rate=0.000000e+00
2020-05-30T03:18:10Z sourcestats.c:759:(SST_SlewSamples) n=3 m=0 old_off_time=1590808690.635811881 new=1590812055.975109691 old_off=-3365.339239 new_off=0.000059 old_freq=-1578.745 new_freq=-1578.745
2020-05-30T03:18:10Z reference.c:537:(maybe_log_offset) System clock wrong by 3365.339298 seconds, adjustment started

w/ maxchange

# chronyd -dd
...
2020-05-30T03:23:28Z ntp_io_linux.c:653:(NIO_Linux_ProcessMessage) Received 90 (48) bytes from error queue for 10.x.x.x:123 fd=8 if=2 tss=1
2020-05-30T03:23:28Z ntp_core.c:2122:(update_tx_timestamp) Updated TX timestamp delay=0.000113201
2020-05-30T03:23:28Z sys_generic.c:316:(offset_convert) offset_convert now=1590809008 slew_start=1590808998 duration=1.051467e+01
2020-05-30T03:23:28Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-4.943128e-11
2020-05-30T03:23:28Z sys_generic.c:316:(offset_convert) offset_convert now=1590809008 slew_start=1590808998 duration=1.051455e+01
2020-05-30T03:23:28Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-4.943074e-11
2020-05-30T03:23:28Z ntp_io.c:666:(process_message) Received 48 bytes from 10.x.x.x:123 to 192.168.137.3 fd=8 if=2 tss=1 delay=0.000113258
2020-05-30T03:23:28Z ntp_core.c:1686:(receive_packet) NTP packet lvm=44 stratum=1 poll=4 prec=-20 root_delay=0.000000 root_disp=0.000351 refid=47505300 []
2020-05-30T03:23:28Z ntp_core.c:1691:(receive_packet) reference=1590812363.752831808 origin=2053940718.651731705 receive=1590812371.955376615 transmit=1590812371.956277798
2020-05-30T03:23:28Z ntp_core.c:1693:(receive_packet) offset=3363.279470172 delay=0.073292080 dispersion=0.000149 root_delay=0.073292 root_dispersion=0.000500
2020-05-30T03:23:28Z ntp_core.c:1696:(receive_packet) remote_interval=0.000901183 local_interval=0.074193263 response_time=0.000901183 txs=K rxs=K
2020-05-30T03:23:28Z ntp_core.c:1700:(receive_packet) test123=111 test567=111 testABCD=1111 kod_rate=0 interleaved=0 presend=0 valid=1 good=1 updated=1
2020-05-30T03:23:28Z sources.c:356:(SRC_AccumulateSample) ip=[10.x.x.x] t=1590809008.676357034 ofs=-3363.279470 del=0.073292 disp=0.000500 str=1
2020-05-30T03:23:28Z sourcestats.c:583:(SST_DoNewRegression) off=-3.363272e+03 freq=3.234435e-04 skew=1.031083e+00 n=3 bs=0 runs=3 asym=0.000000 arun=0
2020-05-30T03:23:28Z sourcestats.c:690:(SST_GetSelectionData) n=3 off=-3363.279458 dist=0.075513 sd=0.009056 first_ago=10.272558 last_ago=0.037210 selok=1
2020-05-30T03:23:28Z sources.c:772:(SRC_SelectSource) badstat=0 sel=1 badstat_reach=0 sel_reach=25 max_reach_ago=10.272558
2020-05-30T03:23:28Z sources.c:1011:(SRC_SelectSource) select score=13.225256 refid=a1e0169 match_refid=a1e0169 status=0 dist=0.075613
2020-05-30T03:23:28Z sources.c:457:(log_selection_message) Selected source 10.x.x.x
2020-05-30T03:23:28Z sourcestats.c:721:(SST_GetTrackingData) n=3 freq=0.000323 (323.443ppm) skew=1.031083 (1031082.782ppm) avoff=-3363.272202 offsd=0.008815 disp=0.000500
2020-05-30T03:23:28Z sys_generic.c:316:(offset_convert) offset_convert now=1590809008 slew_start=1590808998 duration=1.051539e+01
2020-05-30T03:23:28Z sys_generic.c:329:(offset_convert) offset_convert slew_freq=-4.701174e-12 fastslew_corr=0.000000e+00 offset_register=0.000000e+00 corr=-4.943468e-11
2020-05-30T03:23:28Z reference.c:981:(REF_SetReference) raw_now=1590809008 now=1590809008 uncorrected_offset=-4.943468e-11
2020-05-30T03:23:28Z reference.c:599:(is_offset_ok) Adjustment of -3363.272 seconds exceeds the allowed maximum of 300.000 seconds (ignored)

参考资料

man hwclock

man timedatectl

chrony.conf(5) Manual Page

https://chrony.tuxfamily.org/doc/3.5/chrony.conf.html

How Linux Keeps Track of Time

https://tldp.org/HOWTO/Clock-2.html

chrony vs. systemd-timesyncd – What are the differences and use cases as NTP clients?

https://unix.stackexchange.com/questions/504381/chrony-vs-systemd-timesyncd-what-are-the-differences-and-use-cases-as-ntp-cli

Network Time Protocol: Best Practices White Paper

https://www.cisco.com/c/en/us/support/docs/availability/high-availability/19643-ntpm.html

systemd-timedated.service

https://www.freedesktop.org/software/systemd/man/systemd-timedated.service.html

https://www.freedesktop.org/wiki/Software/systemd/timedated/

systemd-timesyncd.service

https://www.freedesktop.org/software/systemd/man/systemd-timesyncd.service.html

[systemd-devel] [ANNOUNCE] systemd v28

https://lists.freedesktop.org/archives/systemd-devel/2011-May/002526.html

NTP - orphan mode with SAME stratum (“tos orphan 6”) FAIL to sync.

https://groups.google.com/forum/#!topic/comp.protocols.time.ntp/WOarKCHYDbE

Orphan Mode

https://www.eecis.udel.edu/~mills/ntp/html/orphan.html

NTP Basics

https://kb.meinbergglobal.com/kb/time_sync/ntp/ntp_basics

Synchronizing Clocks In a Cassandra Cluster Pt. 2 - Solutions

https://blog.rapid7.com/2014/03/17/synchronizing-clocks-in-a-cassandra-cluster-pt-2-solutions/

CHAPTER 6. USING THE CHRONY SUITE TO CONFIGURE NTP

https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_basic_system_settings/using-chrony-to-configure-ntp

Automatic NTP Configuration Options

https://www.cyberciti.biz/files/unix-ntpd-docs/manyopt.html