|
|
3280a9 |
From 3c74f736c657d007770fe866842b08d0a74772ca Mon Sep 17 00:00:00 2001
|
|
|
3280a9 |
From: Mark Reynolds <mreynolds@redhat.com>
|
|
|
3280a9 |
Date: Wed, 9 Dec 2020 15:21:11 -0500
|
|
|
3280a9 |
Subject: [PATCH 6/6] Issue 4414 - disk monitoring - prevent division by zero
|
|
|
3280a9 |
crash
|
|
|
3280a9 |
|
|
|
3280a9 |
Bug Description: If a disk mount has zero total space or zero used
|
|
|
3280a9 |
space then a division by zero can occur and the
|
|
|
3280a9 |
server will crash.
|
|
|
3280a9 |
|
|
|
3280a9 |
It has also been observed that sometimes a system
|
|
|
3280a9 |
can return the wrong disk entirely, and when that
|
|
|
3280a9 |
happens the incorrect disk also has zero available
|
|
|
3280a9 |
space which triggers the disk monitioring thread to
|
|
|
3280a9 |
immediately shut the server down.
|
|
|
3280a9 |
|
|
|
3280a9 |
Fix Description: Check the total and used space for zero and do not
|
|
|
3280a9 |
divide, just ignore it. As a preemptive measure
|
|
|
3280a9 |
ignore disks from /dev, /proc, /sys (except /dev/shm).
|
|
|
3280a9 |
Yes it's a bit hacky, but the true underlying cause
|
|
|
3280a9 |
is not known yet. So better to be safe than sorry.
|
|
|
3280a9 |
|
|
|
3280a9 |
Relates: https://github.com/389ds/389-ds-base/issues/4414
|
|
|
3280a9 |
|
|
|
3280a9 |
Reviewed by: firstyear(Thanks!)
|
|
|
3280a9 |
---
|
|
|
3280a9 |
ldap/servers/slapd/daemon.c | 22 +++++++++++++++++++++-
|
|
|
3280a9 |
ldap/servers/slapd/monitor.c | 13 +++++--------
|
|
|
3280a9 |
2 files changed, 26 insertions(+), 9 deletions(-)
|
|
|
3280a9 |
|
|
|
3280a9 |
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
|
|
|
3280a9 |
index 691f77570..bfd965263 100644
|
|
|
3280a9 |
--- a/ldap/servers/slapd/daemon.c
|
|
|
3280a9 |
+++ b/ldap/servers/slapd/daemon.c
|
|
|
3280a9 |
@@ -221,7 +221,27 @@ disk_mon_get_mount_point(char *dir)
|
|
|
3280a9 |
}
|
|
|
3280a9 |
if (s.st_dev == dev_id) {
|
|
|
3280a9 |
endmntent(fp);
|
|
|
3280a9 |
- return (slapi_ch_strdup(mnt->mnt_dir));
|
|
|
3280a9 |
+
|
|
|
3280a9 |
+ if ((strncmp(mnt->mnt_dir, "/dev", 4) == 0 && strncmp(mnt->mnt_dir, "/dev/shm", 8) != 0) ||
|
|
|
3280a9 |
+ strncmp(mnt->mnt_dir, "/proc", 4) == 0 ||
|
|
|
3280a9 |
+ strncmp(mnt->mnt_dir, "/sys", 4) == 0)
|
|
|
3280a9 |
+ {
|
|
|
3280a9 |
+ /*
|
|
|
3280a9 |
+ * Ignore "mount directories" starting with /dev (except
|
|
|
3280a9 |
+ * /dev/shm), /proc, /sys For some reason these mounts are
|
|
|
3280a9 |
+ * occasionally/incorrectly returned. Only seen this at a
|
|
|
3280a9 |
+ * customer site once. When it happens it causes disk
|
|
|
3280a9 |
+ * monitoring to think the server has 0 disk space left, and
|
|
|
3280a9 |
+ * it abruptly/unexpectedly shuts the server down. At this
|
|
|
3280a9 |
+ * point it looks like a bug in stat(), setmntent(), or
|
|
|
3280a9 |
+ * getmntent(), but there is no way to prove that since there
|
|
|
3280a9 |
+ * is no way to reproduce the original issue. For now just
|
|
|
3280a9 |
+ * return NULL to be safe.
|
|
|
3280a9 |
+ */
|
|
|
3280a9 |
+ return NULL;
|
|
|
3280a9 |
+ } else {
|
|
|
3280a9 |
+ return (slapi_ch_strdup(mnt->mnt_dir));
|
|
|
3280a9 |
+ }
|
|
|
3280a9 |
}
|
|
|
3280a9 |
}
|
|
|
3280a9 |
endmntent(fp);
|
|
|
3280a9 |
diff --git a/ldap/servers/slapd/monitor.c b/ldap/servers/slapd/monitor.c
|
|
|
3280a9 |
index 562721bed..65f082986 100644
|
|
|
3280a9 |
--- a/ldap/servers/slapd/monitor.c
|
|
|
3280a9 |
+++ b/ldap/servers/slapd/monitor.c
|
|
|
3280a9 |
@@ -131,7 +131,6 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
|
|
|
3280a9 |
{
|
|
|
3280a9 |
int32_t rc = LDAP_SUCCESS;
|
|
|
3280a9 |
char **dirs = NULL;
|
|
|
3280a9 |
- char buf[BUFSIZ];
|
|
|
3280a9 |
struct berval val;
|
|
|
3280a9 |
struct berval *vals[2];
|
|
|
3280a9 |
uint64_t total_space;
|
|
|
3280a9 |
@@ -143,15 +142,13 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
|
|
|
3280a9 |
|
|
|
3280a9 |
disk_mon_get_dirs(&dirs);
|
|
|
3280a9 |
|
|
|
3280a9 |
- for (uint16_t i = 0; dirs && dirs[i]; i++) {
|
|
|
3280a9 |
+ for (size_t i = 0; dirs && dirs[i]; i++) {
|
|
|
3280a9 |
+ char buf[BUFSIZ] = {0};
|
|
|
3280a9 |
rc = disk_get_info(dirs[i], &total_space, &avail_space, &used_space);
|
|
|
3280a9 |
- if (rc) {
|
|
|
3280a9 |
- slapi_log_err(SLAPI_LOG_WARNING, "monitor_disk_info",
|
|
|
3280a9 |
- "Unable to get 'cn=disk space,cn=monitor' stats for %s\n", dirs[i]);
|
|
|
3280a9 |
- } else {
|
|
|
3280a9 |
+ if (rc == 0 && total_space > 0 && used_space > 0) {
|
|
|
3280a9 |
val.bv_len = snprintf(buf, sizeof(buf),
|
|
|
3280a9 |
- "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
|
|
|
3280a9 |
- dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
|
|
|
3280a9 |
+ "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
|
|
|
3280a9 |
+ dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
|
|
|
3280a9 |
val.bv_val = buf;
|
|
|
3280a9 |
attrlist_merge(&e->e_attrs, "dsDisk", vals);
|
|
|
3280a9 |
}
|
|
|
3280a9 |
--
|
|
|
3280a9 |
2.26.2
|
|
|
3280a9 |
|