|
|
df78dc |
commit e79e5040a0e7efd622ecdd572bee40c90e59c3bd
|
|
|
df78dc |
Author: Miroslav Lichvar <mlichvar@redhat.com>
|
|
|
df78dc |
Date: Fri Apr 13 17:11:58 2018 +0200
|
|
|
df78dc |
|
|
|
df78dc |
timemaster: restart terminated processes.
|
|
|
df78dc |
|
|
|
df78dc |
If a ptp4l or phc2sys process is terminated (e.g. due to a crash) and
|
|
|
df78dc |
timemaster was running for at least one second (i.e. it's not an error
|
|
|
df78dc |
in ptp4l/phc2sys configuration), start the process again. Restart all
|
|
|
df78dc |
processes corresponding to the same time source at the same time to
|
|
|
df78dc |
ensure phc2sys is always connected to the currently running ptp4l.
|
|
|
df78dc |
|
|
|
df78dc |
Add a new option to disable the restarting.
|
|
|
df78dc |
|
|
|
df78dc |
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
|
|
|
df78dc |
|
|
|
df78dc |
diff --git a/timemaster.8 b/timemaster.8
|
|
|
df78dc |
index e0e22eb..7288972 100644
|
|
|
df78dc |
--- a/timemaster.8
|
|
|
df78dc |
+++ b/timemaster.8
|
|
|
df78dc |
@@ -87,6 +87,16 @@ Specify the first number in a sequence of SHM segments that will be used by
|
|
|
df78dc |
can be useful to avoid conflicts with time sources that are not started by
|
|
|
df78dc |
\fBtimemaster\fR, e.g. \fBgpsd\fR using segments number 0 and 1.
|
|
|
df78dc |
|
|
|
df78dc |
+.TP
|
|
|
df78dc |
+.B restart_processes
|
|
|
df78dc |
+Enable or disable restarting of processes started by \fBtimemaster\fR. If the
|
|
|
df78dc |
+option is set to a non-zero value, all processes except \fBchronyd\fR and
|
|
|
df78dc |
+\fBntpd\fR will be automatically restarted when terminated and \fBtimemaster\fR
|
|
|
df78dc |
+is running for at least one second (i.e. the process did not terminate due to a
|
|
|
df78dc |
+configuration error). If a process was terminated and is not started again,
|
|
|
df78dc |
+\fBtimemaster\fR will kill the other processes and exit with a non-zero status.
|
|
|
df78dc |
+The default value is 1 (enabled).
|
|
|
df78dc |
+
|
|
|
df78dc |
.SS [ntp_server address]
|
|
|
df78dc |
|
|
|
df78dc |
The \fBntp_server\fR section specifies an NTP server that should be used as a
|
|
|
df78dc |
@@ -318,6 +328,7 @@ ptp4l_option delay_mechanism P2P
|
|
|
df78dc |
ntp_program chronyd
|
|
|
df78dc |
rundir /var/run/timemaster
|
|
|
df78dc |
first_shm_segment 1
|
|
|
df78dc |
+restart_processes 0
|
|
|
df78dc |
|
|
|
df78dc |
[chronyd]
|
|
|
df78dc |
path /usr/sbin/chronyd
|
|
|
df78dc |
diff --git a/timemaster.c b/timemaster.c
|
|
|
df78dc |
index fc3ba31..4ba921e 100644
|
|
|
df78dc |
--- a/timemaster.c
|
|
|
df78dc |
+++ b/timemaster.c
|
|
|
df78dc |
@@ -44,6 +44,7 @@
|
|
|
df78dc |
#define DEFAULT_RUNDIR "/var/run/timemaster"
|
|
|
df78dc |
|
|
|
df78dc |
#define DEFAULT_FIRST_SHM_SEGMENT 0
|
|
|
df78dc |
+#define DEFAULT_RESTART_PROCESSES 1
|
|
|
df78dc |
|
|
|
df78dc |
#define DEFAULT_NTP_PROGRAM CHRONYD
|
|
|
df78dc |
#define DEFAULT_NTP_MINPOLL 6
|
|
|
df78dc |
@@ -108,6 +109,7 @@ struct timemaster_config {
|
|
|
df78dc |
enum ntp_program ntp_program;
|
|
|
df78dc |
char *rundir;
|
|
|
df78dc |
int first_shm_segment;
|
|
|
df78dc |
+ int restart_processes;
|
|
|
df78dc |
struct program_config chronyd;
|
|
|
df78dc |
struct program_config ntpd;
|
|
|
df78dc |
struct program_config phc2sys;
|
|
|
df78dc |
@@ -122,6 +124,9 @@ struct config_file {
|
|
|
df78dc |
struct script {
|
|
|
df78dc |
struct config_file **configs;
|
|
|
df78dc |
char ***commands;
|
|
|
df78dc |
+ int **command_groups;
|
|
|
df78dc |
+ int restart_groups;
|
|
|
df78dc |
+ int no_restart_group;
|
|
|
df78dc |
};
|
|
|
df78dc |
|
|
|
df78dc |
static void free_parray(void **a)
|
|
|
df78dc |
@@ -385,6 +390,8 @@ static int parse_timemaster_settings(char **settings,
|
|
|
df78dc |
replace_string(value, &config->rundir);
|
|
|
df78dc |
} else if (!strcasecmp(name, "first_shm_segment")) {
|
|
|
df78dc |
r = parse_int(value, &config->first_shm_segment);
|
|
|
df78dc |
+ } else if (!strcasecmp(name, "restart_processes")) {
|
|
|
df78dc |
+ r = parse_int(value, &config->restart_processes);
|
|
|
df78dc |
} else {
|
|
|
df78dc |
pr_err("unknown timemaster setting %s", name);
|
|
|
df78dc |
return 1;
|
|
|
df78dc |
@@ -508,6 +515,7 @@ static struct timemaster_config *config_parse(char *path)
|
|
|
df78dc |
config->ntp_program = DEFAULT_NTP_PROGRAM;
|
|
|
df78dc |
config->rundir = xstrdup(DEFAULT_RUNDIR);
|
|
|
df78dc |
config->first_shm_segment = DEFAULT_FIRST_SHM_SEGMENT;
|
|
|
df78dc |
+ config->restart_processes = DEFAULT_RESTART_PROCESSES;
|
|
|
df78dc |
|
|
|
df78dc |
init_program_config(&config->chronyd, "chronyd",
|
|
|
df78dc |
NULL, DEFAULT_CHRONYD_SETTINGS, NULL);
|
|
|
df78dc |
@@ -632,6 +640,18 @@ static char *get_refid(char *prefix, unsigned int number)
|
|
|
df78dc |
return NULL;
|
|
|
df78dc |
};
|
|
|
df78dc |
|
|
|
df78dc |
+static void add_command(char **command, int command_group,
|
|
|
df78dc |
+ struct script *script)
|
|
|
df78dc |
+{
|
|
|
df78dc |
+ int *group;
|
|
|
df78dc |
+
|
|
|
df78dc |
+ parray_append((void ***)&script->commands, command);
|
|
|
df78dc |
+
|
|
|
df78dc |
+ group = xmalloc(sizeof(int));
|
|
|
df78dc |
+ *group = command_group;
|
|
|
df78dc |
+ parray_append((void ***)&script->command_groups, group);
|
|
|
df78dc |
+}
|
|
|
df78dc |
+
|
|
|
df78dc |
static void add_shm_source(int shm_segment, int poll, int dpoll, double delay,
|
|
|
df78dc |
char *ntp_options, char *prefix,
|
|
|
df78dc |
struct timemaster_config *config, char **ntp_config)
|
|
|
df78dc |
@@ -671,8 +691,8 @@ static int add_ntp_source(struct ntp_server *source, char **ntp_config)
|
|
|
df78dc |
|
|
|
df78dc |
static int add_ptp_source(struct ptp_domain *source,
|
|
|
df78dc |
struct timemaster_config *config, int *shm_segment,
|
|
|
df78dc |
- int ***allocated_phcs, char **ntp_config,
|
|
|
df78dc |
- struct script *script)
|
|
|
df78dc |
+ int *command_group, int ***allocated_phcs,
|
|
|
df78dc |
+ char **ntp_config, struct script *script)
|
|
|
df78dc |
{
|
|
|
df78dc |
struct config_file *config_file;
|
|
|
df78dc |
char **command, *uds_path, **interfaces, *message_tag;
|
|
|
df78dc |
@@ -798,19 +818,19 @@ static int add_ptp_source(struct ptp_domain *source,
|
|
|
df78dc |
/* HW time stamping */
|
|
|
df78dc |
command = get_ptp4l_command(&config->ptp4l, config_file,
|
|
|
df78dc |
interfaces, 1);
|
|
|
df78dc |
- parray_append((void ***)&script->commands, command);
|
|
|
df78dc |
+ add_command(command, *command_group, script);
|
|
|
df78dc |
|
|
|
df78dc |
command = get_phc2sys_command(&config->phc2sys,
|
|
|
df78dc |
source->domain,
|
|
|
df78dc |
source->phc2sys_poll,
|
|
|
df78dc |
*shm_segment, uds_path,
|
|
|
df78dc |
message_tag);
|
|
|
df78dc |
- parray_append((void ***)&script->commands, command);
|
|
|
df78dc |
+ add_command(command, (*command_group)++, script);
|
|
|
df78dc |
} else {
|
|
|
df78dc |
/* SW time stamping */
|
|
|
df78dc |
command = get_ptp4l_command(&config->ptp4l, config_file,
|
|
|
df78dc |
interfaces, 0);
|
|
|
df78dc |
- parray_append((void ***)&script->commands, command);
|
|
|
df78dc |
+ add_command(command, (*command_group)++, script);
|
|
|
df78dc |
|
|
|
df78dc |
string_appendf(&config_file->content,
|
|
|
df78dc |
"clock_servo ntpshm\n"
|
|
|
df78dc |
@@ -862,7 +882,8 @@ static char **get_ntpd_command(struct program_config *config,
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
static struct config_file *add_ntp_program(struct timemaster_config *config,
|
|
|
df78dc |
- struct script *script)
|
|
|
df78dc |
+ struct script *script,
|
|
|
df78dc |
+ int command_group)
|
|
|
df78dc |
{
|
|
|
df78dc |
struct config_file *ntp_config = xmalloc(sizeof(*ntp_config));
|
|
|
df78dc |
char **command = NULL;
|
|
|
df78dc |
@@ -886,7 +907,7 @@ static struct config_file *add_ntp_program(struct timemaster_config *config,
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
parray_append((void ***)&script->configs, ntp_config);
|
|
|
df78dc |
- parray_append((void ***)&script->commands, command);
|
|
|
df78dc |
+ add_command(command, command_group, script);
|
|
|
df78dc |
|
|
|
df78dc |
return ntp_config;
|
|
|
df78dc |
}
|
|
|
df78dc |
@@ -894,6 +915,7 @@ static struct config_file *add_ntp_program(struct timemaster_config *config,
|
|
|
df78dc |
static void script_destroy(struct script *script)
|
|
|
df78dc |
{
|
|
|
df78dc |
char ***commands, **command;
|
|
|
df78dc |
+ int **groups;
|
|
|
df78dc |
struct config_file *config, **configs;
|
|
|
df78dc |
|
|
|
df78dc |
for (configs = script->configs; *configs; configs++) {
|
|
|
df78dc |
@@ -911,6 +933,10 @@ static void script_destroy(struct script *script)
|
|
|
df78dc |
}
|
|
|
df78dc |
free(script->commands);
|
|
|
df78dc |
|
|
|
df78dc |
+ for (groups = script->command_groups; *groups; groups++)
|
|
|
df78dc |
+ free(*groups);
|
|
|
df78dc |
+ free(script->command_groups);
|
|
|
df78dc |
+
|
|
|
df78dc |
free(script);
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
@@ -920,12 +946,15 @@ static struct script *script_create(struct timemaster_config *config)
|
|
|
df78dc |
struct source *source, **sources;
|
|
|
df78dc |
struct config_file *ntp_config = NULL;
|
|
|
df78dc |
int **allocated_phcs = (int **)parray_new();
|
|
|
df78dc |
- int ret = 0, shm_segment;
|
|
|
df78dc |
+ int ret = 0, shm_segment, command_group = 0;
|
|
|
df78dc |
|
|
|
df78dc |
script->configs = (struct config_file **)parray_new();
|
|
|
df78dc |
script->commands = (char ***)parray_new();
|
|
|
df78dc |
+ script->command_groups = (int **)parray_new();
|
|
|
df78dc |
+ script->no_restart_group = command_group;
|
|
|
df78dc |
+ script->restart_groups = config->restart_processes;
|
|
|
df78dc |
|
|
|
df78dc |
- ntp_config = add_ntp_program(config, script);
|
|
|
df78dc |
+ ntp_config = add_ntp_program(config, script, command_group++);
|
|
|
df78dc |
shm_segment = config->first_shm_segment;
|
|
|
df78dc |
|
|
|
df78dc |
for (sources = config->sources; (source = *sources); sources++) {
|
|
|
df78dc |
@@ -936,7 +965,7 @@ static struct script *script_create(struct timemaster_config *config)
|
|
|
df78dc |
break;
|
|
|
df78dc |
case PTP_DOMAIN:
|
|
|
df78dc |
if (add_ptp_source(&source->ptp, config, &shm_segment,
|
|
|
df78dc |
- &allocated_phcs,
|
|
|
df78dc |
+ &command_group, &allocated_phcs,
|
|
|
df78dc |
&ntp_config->content, script))
|
|
|
df78dc |
ret = 1;
|
|
|
df78dc |
break;
|
|
|
df78dc |
@@ -1063,10 +1092,11 @@ static int remove_config_files(struct config_file **configs)
|
|
|
df78dc |
|
|
|
df78dc |
static int script_run(struct script *script)
|
|
|
df78dc |
{
|
|
|
df78dc |
+ struct timespec ts_start, ts_now;
|
|
|
df78dc |
sigset_t mask, old_mask;
|
|
|
df78dc |
siginfo_t info;
|
|
|
df78dc |
pid_t pid, *pids;
|
|
|
df78dc |
- int i, num_commands, status, ret = 0;
|
|
|
df78dc |
+ int i, group, num_commands, status, quit = 0, ret = 0;
|
|
|
df78dc |
|
|
|
df78dc |
for (num_commands = 0; script->commands[num_commands]; num_commands++)
|
|
|
df78dc |
;
|
|
|
df78dc |
@@ -1101,7 +1131,9 @@ static int script_run(struct script *script)
|
|
|
df78dc |
}
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
- /* wait for one of the blocked signals */
|
|
|
df78dc |
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
|
|
|
df78dc |
+
|
|
|
df78dc |
+ /* process the blocked signals */
|
|
|
df78dc |
while (1) {
|
|
|
df78dc |
if (sigwaitinfo(&mask, &info) < 0) {
|
|
|
df78dc |
if (errno == EINTR)
|
|
|
df78dc |
@@ -1110,36 +1142,111 @@ static int script_run(struct script *script)
|
|
|
df78dc |
break;
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
- /*
|
|
|
df78dc |
- * assume only the first process (i.e. chronyd or ntpd) is
|
|
|
df78dc |
- * essential and continue if other processes terminate
|
|
|
df78dc |
- */
|
|
|
df78dc |
- if (info.si_signo == SIGCHLD && info.si_pid != pids[0]) {
|
|
|
df78dc |
- pr_info("process %d terminated (ignored)", info.si_pid);
|
|
|
df78dc |
+ clock_gettime(CLOCK_MONOTONIC, &ts_now);
|
|
|
df78dc |
+
|
|
|
df78dc |
+ if (info.si_signo != SIGCHLD) {
|
|
|
df78dc |
+ if (quit)
|
|
|
df78dc |
+ continue;
|
|
|
df78dc |
+
|
|
|
df78dc |
+ quit = 1;
|
|
|
df78dc |
+ pr_debug("exiting on signal %d", info.si_signo);
|
|
|
df78dc |
+
|
|
|
df78dc |
+ /* terminate remaining processes */
|
|
|
df78dc |
+ for (i = 0; i < num_commands; i++) {
|
|
|
df78dc |
+ if (pids[i] > 0) {
|
|
|
df78dc |
+ pr_debug("killing process %d", pids[i]);
|
|
|
df78dc |
+ kill(pids[i], SIGTERM);
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+
|
|
|
df78dc |
continue;
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
- pr_info("received signal %d", info.si_signo);
|
|
|
df78dc |
- break;
|
|
|
df78dc |
- }
|
|
|
df78dc |
+ /* wait for all terminated processes */
|
|
|
df78dc |
+ while (1) {
|
|
|
df78dc |
+ pid = waitpid(-1, &status, WNOHANG);
|
|
|
df78dc |
+ if (pid <= 0)
|
|
|
df78dc |
+ break;
|
|
|
df78dc |
|
|
|
df78dc |
- /* kill all started processes */
|
|
|
df78dc |
- for (i = 0; i < num_commands; i++) {
|
|
|
df78dc |
- if (pids[i] > 0) {
|
|
|
df78dc |
- pr_debug("killing process %d", pids[i]);
|
|
|
df78dc |
- kill(pids[i], SIGTERM);
|
|
|
df78dc |
+ if (!WIFEXITED(status)) {
|
|
|
df78dc |
+ pr_info("process %d terminated abnormally",
|
|
|
df78dc |
+ pid);
|
|
|
df78dc |
+ } else {
|
|
|
df78dc |
+ pr_info("process %d terminated with status %d",
|
|
|
df78dc |
+ pid, WEXITSTATUS(status));
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+
|
|
|
df78dc |
+ for (i = 0; i < num_commands; i++) {
|
|
|
df78dc |
+ if (pids[i] == pid)
|
|
|
df78dc |
+ pids[i] = 0;
|
|
|
df78dc |
+ }
|
|
|
df78dc |
}
|
|
|
df78dc |
- }
|
|
|
df78dc |
|
|
|
df78dc |
- while ((pid = wait(&status)) >= 0) {
|
|
|
df78dc |
- if (!WIFEXITED(status)) {
|
|
|
df78dc |
- pr_info("process %d terminated abnormally", pid);
|
|
|
df78dc |
- ret = 1;
|
|
|
df78dc |
- } else {
|
|
|
df78dc |
- if (WEXITSTATUS(status))
|
|
|
df78dc |
+ /* wait for all processes to terminate when exiting */
|
|
|
df78dc |
+ if (quit) {
|
|
|
df78dc |
+ for (i = 0; i < num_commands; i++) {
|
|
|
df78dc |
+ if (pids[i])
|
|
|
df78dc |
+ break;
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+ if (i == num_commands)
|
|
|
df78dc |
+ break;
|
|
|
df78dc |
+
|
|
|
df78dc |
+ pr_debug("waiting for other processes to terminate");
|
|
|
df78dc |
+ continue;
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+
|
|
|
df78dc |
+ /*
|
|
|
df78dc |
+ * terminate (and then restart if allowed) all processes in
|
|
|
df78dc |
+ * groups that have a terminated process
|
|
|
df78dc |
+ */
|
|
|
df78dc |
+ for (group = 0; group < num_commands; group++) {
|
|
|
df78dc |
+ int terminated = 0, running = 0;
|
|
|
df78dc |
+
|
|
|
df78dc |
+ for (i = 0; i < num_commands; i++) {
|
|
|
df78dc |
+ if (*(script->command_groups[i]) != group)
|
|
|
df78dc |
+ continue;
|
|
|
df78dc |
+ if (pids[i])
|
|
|
df78dc |
+ running++;
|
|
|
df78dc |
+ else
|
|
|
df78dc |
+ terminated++;
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+
|
|
|
df78dc |
+ if (!terminated)
|
|
|
df78dc |
+ continue;
|
|
|
df78dc |
+
|
|
|
df78dc |
+ /*
|
|
|
df78dc |
+ * exit with a non-zero status if the group should not
|
|
|
df78dc |
+ * be restarted (i.e. chronyd/ntpd), timemaster is
|
|
|
df78dc |
+ * running only for a short time (and it is likely a
|
|
|
df78dc |
+ * configuration error), or restarting is disabled
|
|
|
df78dc |
+ * completely
|
|
|
df78dc |
+ */
|
|
|
df78dc |
+ if (group == script->no_restart_group ||
|
|
|
df78dc |
+ ts_now.tv_sec - ts_start.tv_sec <= 1 ||
|
|
|
df78dc |
+ !script->restart_groups) {
|
|
|
df78dc |
+ kill(getpid(), SIGTERM);
|
|
|
df78dc |
ret = 1;
|
|
|
df78dc |
- pr_info("process %d terminated with status %d", pid,
|
|
|
df78dc |
- WEXITSTATUS(status));
|
|
|
df78dc |
+ break;
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+
|
|
|
df78dc |
+ for (i = 0; i < num_commands; i++) {
|
|
|
df78dc |
+ if (*(script->command_groups[i]) != group)
|
|
|
df78dc |
+ continue;
|
|
|
df78dc |
+
|
|
|
df78dc |
+ /* terminate all processes in the group first */
|
|
|
df78dc |
+ if (running && pids[i]) {
|
|
|
df78dc |
+ pr_debug("killing process %d", pids[i]);
|
|
|
df78dc |
+ kill(pids[i], SIGTERM);
|
|
|
df78dc |
+ } else if (!running && !pids[i]) {
|
|
|
df78dc |
+ pids[i] = start_program(script->commands[i],
|
|
|
df78dc |
+ &old_mask);
|
|
|
df78dc |
+ if (!pids[i])
|
|
|
df78dc |
+ kill(getpid(), SIGTERM);
|
|
|
df78dc |
+
|
|
|
df78dc |
+ /* limit restarting rate */
|
|
|
df78dc |
+ sleep(1);
|
|
|
df78dc |
+ }
|
|
|
df78dc |
+ }
|
|
|
df78dc |
}
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
@@ -1154,6 +1261,7 @@ static int script_run(struct script *script)
|
|
|
df78dc |
static void script_print(struct script *script)
|
|
|
df78dc |
{
|
|
|
df78dc |
char ***commands, **command;
|
|
|
df78dc |
+ int **groups;
|
|
|
df78dc |
struct config_file *config, **configs;
|
|
|
df78dc |
|
|
|
df78dc |
for (configs = script->configs; *configs; configs++) {
|
|
|
df78dc |
@@ -1162,7 +1270,9 @@ static void script_print(struct script *script)
|
|
|
df78dc |
}
|
|
|
df78dc |
|
|
|
df78dc |
fprintf(stderr, "commands:\n\n");
|
|
|
df78dc |
- for (commands = script->commands; *commands; commands++) {
|
|
|
df78dc |
+ for (commands = script->commands, groups = script->command_groups;
|
|
|
df78dc |
+ *commands; commands++, groups++) {
|
|
|
df78dc |
+ fprintf(stderr, "[%d] ", **groups);
|
|
|
df78dc |
for (command = *commands; *command; command++)
|
|
|
df78dc |
fprintf(stderr, "%s ", *command);
|
|
|
df78dc |
fprintf(stderr, "\n");
|