2020-07-26 07:58:15 +00:00
|
|
|
/*
|
|
|
|
* Odyssey.
|
|
|
|
*
|
|
|
|
* Scalable PostgreSQL connection pooler.
|
|
|
|
*/
|
|
|
|
|
2020-11-27 18:03:42 +00:00
|
|
|
#include <kiwi.h>
|
|
|
|
#include <machinarium.h>
|
|
|
|
#include <odyssey.h>
|
2020-07-26 07:58:15 +00:00
|
|
|
|
2020-12-28 10:43:31 +00:00
|
|
|
void od_watchdog_worker(void *arg)
|
2020-07-26 07:58:15 +00:00
|
|
|
{
|
|
|
|
od_instance_t *instance = arg;
|
|
|
|
|
|
|
|
int fd_ctrl = od_get_control_lock(instance->config.locks_dir);
|
|
|
|
if (fd_ctrl == -1) {
|
2021-02-18 22:26:40 +00:00
|
|
|
od_error(
|
|
|
|
&instance->logger, "watchdog", NULL, NULL,
|
|
|
|
"failed to create ctrl lock file in %s (errno: %d) try to "
|
|
|
|
"specify another locks dir or disable online restart feature",
|
|
|
|
instance->config.locks_dir == NULL ?
|
|
|
|
ODYSSEY_DEFAULT_LOCK_DIR :
|
|
|
|
instance->config.locks_dir,
|
|
|
|
errno);
|
2020-07-26 07:58:15 +00:00
|
|
|
|
|
|
|
if (instance->config.graceful_die_on_errors) {
|
|
|
|
kill(instance->pid.pid, OD_SIG_GRACEFUL_SHUTDOWN);
|
|
|
|
} else {
|
|
|
|
kill(instance->pid.pid, SIGKILL);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
int fd_exec = od_get_execution_lock(instance->config.locks_dir);
|
|
|
|
if (fd_exec == -1) {
|
2020-12-28 10:43:31 +00:00
|
|
|
od_log(&instance->logger, "watchdog", NULL, NULL,
|
2020-07-26 07:58:15 +00:00
|
|
|
"failed to create exec lock file in %s (errno: %d) try to "
|
|
|
|
"specify another locks dir or disable online restart feature",
|
2020-12-28 10:43:31 +00:00
|
|
|
instance->config.locks_dir == NULL ?
|
|
|
|
ODYSSEY_DEFAULT_LOCK_DIR :
|
|
|
|
instance->config.locks_dir,
|
2020-07-26 07:58:15 +00:00
|
|
|
errno);
|
|
|
|
if (instance->config.graceful_die_on_errors) {
|
|
|
|
kill(instance->pid.pid, OD_SIG_GRACEFUL_SHUTDOWN);
|
|
|
|
} else {
|
|
|
|
kill(instance->pid.pid, SIGKILL);
|
|
|
|
}
|
2020-10-22 11:01:18 +00:00
|
|
|
close(fd_ctrl);
|
2020-07-26 07:58:15 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
od_dbg_printf_on_dvl_lvl(1, "try to acquire ctrl lock %d\n", fd_ctrl);
|
2020-08-08 16:00:31 +00:00
|
|
|
while (1) {
|
|
|
|
if (flock(fd_ctrl, LOCK_EX | LOCK_NB) == 0) {
|
2020-12-28 10:43:31 +00:00
|
|
|
od_dbg_printf_on_dvl_lvl(1, "acquire ctrl lock ok %d\n",
|
|
|
|
fd_ctrl);
|
2020-08-08 16:00:31 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
machine_sleep(ODYSSEY_WATCHDOG_ITER_INTERVAL);
|
|
|
|
}
|
2020-07-26 07:58:15 +00:00
|
|
|
|
2020-08-08 16:00:31 +00:00
|
|
|
od_dbg_printf_on_dvl_lvl(1, "try to acquire exec lock %d\n", fd_exec);
|
|
|
|
while (1) {
|
|
|
|
if (flock(fd_exec, LOCK_EX | LOCK_NB) == 0) {
|
2020-12-28 10:43:31 +00:00
|
|
|
od_dbg_printf_on_dvl_lvl(1, "acquire exec lock ok %d\n",
|
|
|
|
fd_exec);
|
2020-08-08 16:00:31 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
machine_sleep(ODYSSEY_WATCHDOG_ITER_INTERVAL);
|
2020-07-26 07:58:15 +00:00
|
|
|
}
|
|
|
|
|
2020-08-08 16:00:31 +00:00
|
|
|
flock(fd_ctrl, LOCK_UN | LOCK_NB);
|
2020-07-26 07:58:15 +00:00
|
|
|
while (1) {
|
|
|
|
if (flock(fd_ctrl, LOCK_EX | LOCK_NB) == -1) {
|
2020-12-28 10:43:31 +00:00
|
|
|
od_dbg_printf_on_dvl_lvl(1, "release exec lock %d\n",
|
|
|
|
fd_exec);
|
2020-07-26 07:58:15 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
flock(fd_ctrl, LOCK_UN | LOCK_NB);
|
|
|
|
|
2021-02-11 16:52:36 +00:00
|
|
|
od_dbg_printf_on_dvl_lvl(1, "watchdog worker sleep for %d ms\n",
|
2020-12-28 10:43:31 +00:00
|
|
|
ODYSSEY_WATCHDOG_ITER_INTERVAL);
|
2020-07-26 07:58:15 +00:00
|
|
|
machine_sleep(ODYSSEY_WATCHDOG_ITER_INTERVAL);
|
|
|
|
}
|
|
|
|
flock(fd_exec, LOCK_UN | LOCK_NB);
|
|
|
|
|
2020-10-22 11:01:18 +00:00
|
|
|
/* request our own process to shutdown */
|
2020-07-26 07:58:15 +00:00
|
|
|
kill(instance->pid.pid, OD_SIG_GRACEFUL_SHUTDOWN);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-12-28 10:43:31 +00:00
|
|
|
od_retcode_t od_watchdog_invoke(od_system_t *system)
|
2020-07-26 07:58:15 +00:00
|
|
|
{
|
|
|
|
od_instance_t *instance = system->global->instance;
|
|
|
|
|
|
|
|
int64_t id = machine_create("watchdog", od_watchdog_worker, instance);
|
|
|
|
if (id == -1) {
|
2020-12-28 10:43:31 +00:00
|
|
|
od_error(&instance->logger, "cron", NULL, NULL,
|
|
|
|
"failed to start watchdog coroutine");
|
2020-07-26 07:58:15 +00:00
|
|
|
return NOT_OK_RESPONSE;
|
|
|
|
}
|
|
|
|
return OK_RESPONSE;
|
|
|
|
}
|