From 53cecf3abcc08724ae9b21fd02b8423fd39e7086 Mon Sep 17 00:00:00 2001 From: progier389 Date: Fri, 17 Nov 2023 14:41:51 +0100 Subject: [PATCH] Issue 5984 - Crash when paged result search are abandoned - fix + fix2 (#5985 and #5987) Notice: This cherry-pick include two commit: df7dd8320 Issue 5984 - Crash when paged result search are abandoned - fix2 (#5987) 06bd08629 Issue 5984 - Crash when paged result search are abandoned (#5985) The reason is that cherry pick of #5985 generates lots of conflict in __init.py and #5987 only revert that file ==> So it is easier and safer to keep the original file. * Issue 5984 - Crash when paged result search are abandoned Problem: Fix #4551 has changed the lock that protects the paged result data within a connection. But the abandon operation attempts to free the paged search result with the connection lock. This leads to race condition and double free causing an heap corruption and a SIGSEGV. Solution: - Get a copy of the operation data that needs to be logged. - Unlock the connection mutex (to avoid deadlock risk) - Free the paged result while holding the paged result lock. Issue: 5984 Reviewed by: @tbordaz (Thanks!) (cherry picked from commit 06bd0862956672eb76276cab5c1dd906fe5a7eec) --- .../paged_results/paged_results_test.py | 758 +++++++++--------- ldap/servers/slapd/abandon.c | 23 +- ldap/servers/slapd/opshared.c | 4 +- ldap/servers/slapd/pagedresults.c | 8 +- ldap/servers/slapd/proto-slap.h | 2 +- 5 files changed, 392 insertions(+), 403 deletions(-) diff --git a/dirsrvtests/tests/suites/paged_results/paged_results_test.py b/dirsrvtests/tests/suites/paged_results/paged_results_test.py index ae2627b75..a030824c6 100644 --- a/dirsrvtests/tests/suites/paged_results/paged_results_test.py +++ b/dirsrvtests/tests/suites/paged_results/paged_results_test.py @@ -1,21 +1,32 @@ # --- BEGIN COPYRIGHT BLOCK --- -# Copyright (C) 2016 Red Hat, Inc. +# Copyright (C) 2020 Red Hat, Inc. # All rights reserved. # # License: GPL (version 3 or any later version). # See LICENSE for details. # --- END COPYRIGHT BLOCK --- # -from random import sample +import socket +from random import sample, randrange import pytest from ldap.controls import SimplePagedResultsControl, GetEffectiveRightsControl from lib389.tasks import * from lib389.utils import * from lib389.topologies import topology_st -from lib389._constants import DN_LDBM, DN_DM, DEFAULT_SUFFIX, BACKEND_NAME, PASSWORD +from lib389._constants import DN_LDBM, DN_DM, DEFAULT_SUFFIX +from lib389._controls import SSSRequestControl +from lib389.idm.user import UserAccount, UserAccounts +from lib389.cli_base import FakeArgs +from lib389.config import LDBMConfig +from lib389.dbgen import dbgen_users -from sss_control import SSSRequestControl +from lib389.idm.organization import Organization +from lib389.idm.organizationalunit import OrganizationalUnit +from lib389.backend import Backends +from lib389._mapped_object import DSLdapObject + +pytestmark = pytest.mark.tier1 DEBUGGING = os.getenv('DEBUGGING', False) @@ -26,9 +37,8 @@ else: log = logging.getLogger(__name__) -TEST_USER_NAME = 'simplepaged_test' -TEST_USER_DN = 'uid={},{}'.format(TEST_USER_NAME, DEFAULT_SUFFIX) TEST_USER_PWD = 'simplepaged_test' + NEW_SUFFIX_1_NAME = 'test_parent' NEW_SUFFIX_1 = 'o={}'.format(NEW_SUFFIX_1_NAME) NEW_SUFFIX_2_NAME = 'child' @@ -36,34 +46,90 @@ NEW_SUFFIX_2 = 'ou={},{}'.format(NEW_SUFFIX_2_NAME, NEW_SUFFIX_1) NEW_BACKEND_1 = 'parent_base' NEW_BACKEND_2 = 'child_base' +OLD_HOSTNAME = socket.gethostname() +if os.getuid() == 0: + socket.sethostname('localhost') +HOSTNAME = socket.gethostname() +IP_ADDRESS = socket.gethostbyname(HOSTNAME) +OLD_IP_ADDRESS = socket.gethostbyname(OLD_HOSTNAME) + @pytest.fixture(scope="module") -def test_user(topology_st, request): +def create_40k_users(topology_st, request): + inst = topology_st.standalone + + # Prepare return value + retval = FakeArgs() + retval.inst = inst + retval.bename = '40k' + retval.suffix = 'o=%s' % retval.bename + ldifdir = inst.get_ldif_dir() + retval.ldif_file = '%s/%s.ldif' % (ldifdir, retval.bename) + + # Create new backend + bes = Backends(inst) + be_1 = bes.create(properties={ + 'cn': retval.bename, + 'nsslapd-suffix': retval.suffix, + }) + + # Set paged search lookthrough limit + ldbmconfig = LDBMConfig(inst) + ldbmconfig.replace('nsslapd-pagedlookthroughlimit', b'100000') + + # Create ldif and import it. + dbgen_users(inst, 40000, retval.ldif_file, retval.suffix) + # tasks = Tasks(inst) + # args = {TASK_WAIT: True} + # tasks.importLDIF(retval.suffix, None, retval.ldif_file, args) + inst.stop() + assert inst.ldif2db(retval.bename, None, None, None, retval.ldif_file, None) + inst.start() + + # And set an aci allowing anonymous read + log.info('Adding ACI to allow our test user to search') + ACI_TARGET = '(targetattr != "userPassword || aci")' + ACI_ALLOW = '(version 3.0; acl "Enable anonymous access";allow (read, search, compare)' + ACI_SUBJECT = '(userdn = "ldap:///anyone");)' + ACI_BODY = ACI_TARGET + ACI_ALLOW + ACI_SUBJECT + o_1 = Organization(inst, retval.suffix) + o_1.set('aci', ACI_BODY) + + return retval + + +@pytest.fixture(scope="module") +def create_user(topology_st, request): """User for binding operation""" - log.info('Adding user {}'.format(TEST_USER_DN)) - try: - topology_st.standalone.add_s(Entry((TEST_USER_DN, { - 'objectclass': 'top person'.split(), - 'objectclass': 'organizationalPerson', - 'objectclass': 'inetorgperson', - 'cn': TEST_USER_NAME, - 'sn': TEST_USER_NAME, - 'userpassword': TEST_USER_PWD, - 'mail': '%s@redhat.com' % TEST_USER_NAME, - 'uid': TEST_USER_NAME - }))) - except ldap.LDAPError as e: - log.error('Failed to add user (%s): error (%s)' % (TEST_USER_DN, - e.message['desc'])) - raise e + log.info('Adding user simplepaged_test') + new_uri = topology_st.standalone.ldapuri.replace(OLD_HOSTNAME, HOSTNAME) + topology_st.standalone.ldapuri = new_uri + users = UserAccounts(topology_st.standalone, DEFAULT_SUFFIX) + user = users.create(properties={ + 'uid': 'simplepaged_test', + 'cn': 'simplepaged_test', + 'sn': 'simplepaged_test', + 'uidNumber': '1234', + 'gidNumber': '1234', + 'homeDirectory': '/home/simplepaged_test', + 'userPassword': TEST_USER_PWD, + }) + + # Now add the ACI so simplepage_test can read the users ... + ACI_BODY = ensure_bytes('(targetattr= "uid || sn || dn")(version 3.0; acl "Allow read for user"; allow (read,search,compare) userdn = "ldap:///all";)') + topology_st.standalone.modify_s(DEFAULT_SUFFIX, [(ldap.MOD_REPLACE, 'aci', ACI_BODY)]) def fin(): - log.info('Deleting user {}'.format(TEST_USER_DN)) - topology_st.standalone.delete_s(TEST_USER_DN) + log.info('Deleting user simplepaged_test') + if not DEBUGGING: + user.delete() + if os.getuid() == 0: + socket.sethostname(OLD_HOSTNAME) request.addfinalizer(fin) + return user @pytest.fixture(scope="module") def new_suffixes(topology_st): @@ -72,47 +138,40 @@ def new_suffixes(topology_st): """ log.info('Adding suffix:{} and backend: {}'.format(NEW_SUFFIX_1, NEW_BACKEND_1)) - topology_st.standalone.backend.create(NEW_SUFFIX_1, - {BACKEND_NAME: NEW_BACKEND_1}) - topology_st.standalone.mappingtree.create(NEW_SUFFIX_1, - bename=NEW_BACKEND_1) - try: - topology_st.standalone.add_s(Entry((NEW_SUFFIX_1, { - 'objectclass': 'top', - 'objectclass': 'organization', - 'o': NEW_SUFFIX_1_NAME - }))) - except ldap.LDAPError as e: - log.error('Failed to add suffix ({}): error ({})'.format(NEW_SUFFIX_1, - e.message['desc'])) - raise - log.info('Adding suffix:{} and backend: {}'.format(NEW_SUFFIX_2, NEW_BACKEND_2)) - topology_st.standalone.backend.create(NEW_SUFFIX_2, - {BACKEND_NAME: NEW_BACKEND_2}) - topology_st.standalone.mappingtree.create(NEW_SUFFIX_2, - bename=NEW_BACKEND_2, - parent=NEW_SUFFIX_1) - - try: - topology_st.standalone.add_s(Entry((NEW_SUFFIX_2, { - 'objectclass': 'top', - 'objectclass': 'organizationalunit', - 'ou': NEW_SUFFIX_2_NAME - }))) - except ldap.LDAPError as e: - log.error('Failed to add suffix ({}): error ({})'.format(NEW_SUFFIX_2, - e.message['desc'])) - raise + bes = Backends(topology_st.standalone) + bes.create(properties={ + 'cn': 'NEW_BACKEND_1', + 'nsslapd-suffix': NEW_SUFFIX_1, + }) + # Create the root objects with their ACI log.info('Adding ACI to allow our test user to search') ACI_TARGET = '(targetattr != "userPassword || aci")' ACI_ALLOW = '(version 3.0; acl "Enable anonymous access";allow (read, search, compare)' ACI_SUBJECT = '(userdn = "ldap:///anyone");)' ACI_BODY = ACI_TARGET + ACI_ALLOW + ACI_SUBJECT - mod = [(ldap.MOD_ADD, 'aci', ACI_BODY)] - topology_st.standalone.modify_s(NEW_SUFFIX_1, mod) + o_1 = Organization(topology_st.standalone, NEW_SUFFIX_1) + o_1.create(properties={ + 'o': NEW_SUFFIX_1_NAME, + 'aci': ACI_BODY, + }) + + log.info('Adding suffix:{} and backend: {}'.format(NEW_SUFFIX_2, NEW_BACKEND_2)) + be_2 = bes.create(properties={ + 'cn': 'NEW_BACKEND_2', + 'nsslapd-suffix': NEW_SUFFIX_2, + }) + + # We have to adjust the MT to say that BE_1 is a parent. + mt = be_2.get_mapping_tree() + mt.set_parent(NEW_SUFFIX_1) + + ou_2 = OrganizationalUnit(topology_st.standalone, NEW_SUFFIX_2) + ou_2.create(properties={ + 'ou': NEW_SUFFIX_2_NAME + }) def add_users(topology_st, users_num, suffix): @@ -122,72 +181,54 @@ def add_users(topology_st, users_num, suffix): """ users_list = [] + users = UserAccounts(topology_st.standalone, suffix, rdn=None) + log.info('Adding %d users' % users_num) for num in sample(range(1000), users_num): num_ran = int(round(num)) USER_NAME = 'test%05d' % num_ran - USER_DN = 'uid=%s,%s' % (USER_NAME, suffix) - users_list.append(USER_DN) - try: - topology_st.standalone.add_s(Entry((USER_DN, { - 'objectclass': 'top person'.split(), - 'objectclass': 'organizationalPerson', - 'objectclass': 'inetorgperson', - 'cn': USER_NAME, - 'sn': USER_NAME, - 'userpassword': 'pass%s' % num_ran, - 'mail': '%s@redhat.com' % USER_NAME, - 'uid': USER_NAME}))) - except ldap.LDAPError as e: - log.error('Failed to add user (%s): error (%s)' % (USER_DN, - e.message['desc'])) - raise e + + user = users.create(properties={ + 'uid': USER_NAME, + 'sn': USER_NAME, + 'cn': USER_NAME, + 'uidNumber': '%s' % num_ran, + 'gidNumber': '%s' % num_ran, + 'homeDirectory': '/home/%s' % USER_NAME, + 'mail': '%s@redhat.com' % USER_NAME, + 'userpassword': 'pass%s' % num_ran, + }) + users_list.append(user) return users_list -def del_users(topology_st, users_list): +def del_users(users_list): """Delete users with DNs from given list""" log.info('Deleting %d users' % len(users_list)) - for user_dn in users_list: - try: - topology_st.standalone.delete_s(user_dn) - except ldap.LDAPError as e: - log.error('Failed to delete user (%s): error (%s)' % (user_dn, - e.message['desc'])) - raise e + for user in users_list: + user.delete() def change_conf_attr(topology_st, suffix, attr_name, attr_value): - """Change configurational attribute in the given suffix. + """Change configuration attribute in the given suffix. Returns previous attribute value. """ - try: - entries = topology_st.standalone.search_s(suffix, ldap.SCOPE_BASE, - 'objectclass=top', - [attr_name]) - attr_value_bck = entries[0].data.get(attr_name) - log.info('Set %s to %s. Previous value - %s. Modified suffix - %s.' % ( - attr_name, attr_value, attr_value_bck, suffix)) - if attr_value is None: - topology_st.standalone.modify_s(suffix, [(ldap.MOD_DELETE, - attr_name, - attr_value)]) - else: - topology_st.standalone.modify_s(suffix, [(ldap.MOD_REPLACE, - attr_name, - attr_value)]) - except ldap.LDAPError as e: - log.error('Failed to change attr value (%s): error (%s)' % (attr_name, - e.message['desc'])) - raise e + entry = DSLdapObject(topology_st.standalone, suffix) + attr_value_bck = entry.get_attr_val_bytes(attr_name) + log.info('Set %s to %s. Previous value - %s. Modified suffix - %s.' % ( + attr_name, attr_value, attr_value_bck, suffix)) + if attr_value is None: + entry.remove_all(attr_name) + else: + entry.replace(attr_name, attr_value) return attr_value_bck -def paged_search(topology_st, suffix, controls, search_flt, searchreq_attrlist): +def paged_search(conn, suffix, controls, search_flt, searchreq_attrlist, abandon_rate=0): """Search at the DEFAULT_SUFFIX with ldap.SCOPE_SUBTREE using Simple Paged Control(should the first item in the list controls. @@ -206,14 +247,17 @@ def paged_search(topology_st, suffix, controls, search_flt, searchreq_attrlist): searchreq_attrlist, req_pr_ctrl.size, str(controls))) - msgid = topology_st.standalone.search_ext(suffix, - ldap.SCOPE_SUBTREE, - search_flt, - searchreq_attrlist, - serverctrls=controls) + msgid = conn.search_ext(suffix, ldap.SCOPE_SUBTREE, search_flt, searchreq_attrlist, serverctrls=controls) + log.info('Getting page %d' % (pages,)) while True: - log.info('Getting page %d' % (pages,)) - rtype, rdata, rmsgid, rctrls = topology_st.standalone.result3(msgid) + try: + rtype, rdata, rmsgid, rctrls = conn.result3(msgid, timeout=0.001) + except ldap.TIMEOUT: + if pages > 0 and abandon_rate>0 and randrange(100)c_mutex); for (o = pb_conn->c_ops; o != NULL; o = o->o_next) { - if (o->o_msgid == id && o != pb_op) + if (o->o_msgid == id && o != pb_op) { + slapi_operation_time_elapsed(o, &o_copy.hr_time_end); + o_copy.nentries = o->o_results.r.r_search.nentries; + o_copy.opid = o->o_opid; break; + } } if (o != NULL) { @@ -130,7 +140,8 @@ do_abandon(Slapi_PBlock *pb) slapi_log_err(SLAPI_LOG_TRACE, "do_abandon", "op not found\n"); } - if (0 == pagedresults_free_one_msgid_nolock(pb_conn, id)) { + PR_ExitMonitor(pb_conn->c_mutex); + if (0 == pagedresults_free_one_msgid(pb_conn, id, pageresult_lock_get_addr(pb_conn))) { slapi_log_access(LDAP_DEBUG_STATS, "conn=%" PRIu64 " op=%d ABANDON targetop=Simple Paged Results msgid=%d\n", pb_conn->c_connid, pb_op->o_opid, id); @@ -143,15 +154,11 @@ do_abandon(Slapi_PBlock *pb) " targetop=SUPPRESSED-BY-PLUGIN msgid=%d\n", pb_conn->c_connid, pb_op->o_opid, id); } else { - struct timespec o_hr_time_end; - slapi_operation_time_elapsed(o, &o_hr_time_end); slapi_log_access(LDAP_DEBUG_STATS, "conn=%" PRIu64 " op=%d ABANDON" " targetop=%d msgid=%d nentries=%d etime=%" PRId64 ".%010" PRId64 "\n", - pb_conn->c_connid, pb_op->o_opid, o->o_opid, id, - o->o_results.r.r_search.nentries, (int64_t)o_hr_time_end.tv_sec, (int64_t)o_hr_time_end.tv_nsec); + pb_conn->c_connid, pb_op->o_opid, o_copy.opid, id, + o_copy.nentries, (int64_t)o_copy.hr_time_end.tv_sec, (int64_t)o_copy.hr_time_end.tv_nsec); } - - PR_ExitMonitor(pb_conn->c_mutex); /* * Wake up the persistent searches, so they * can notice if they've been abandoned. diff --git a/ldap/servers/slapd/opshared.c b/ldap/servers/slapd/opshared.c index ec316dfb9..80c670d7b 100644 --- a/ldap/servers/slapd/opshared.c +++ b/ldap/servers/slapd/opshared.c @@ -889,9 +889,7 @@ op_shared_search(Slapi_PBlock *pb, int send_result) next_be = NULL; /* to break the loop */ if (operation->o_status & SLAPI_OP_STATUS_ABANDONED) { /* It turned out this search was abandoned. */ - pthread_mutex_lock(pagedresults_mutex); - pagedresults_free_one_msgid_nolock(pb_conn, operation->o_msgid); - pthread_mutex_unlock(pagedresults_mutex); + pagedresults_free_one_msgid(pb_conn, operation->o_msgid, pagedresults_mutex); /* paged-results-request was abandoned; making an empty cookie. */ pagedresults_set_response_control(pb, 0, estimate, -1, pr_idx); send_ldap_result(pb, 0, NULL, "Simple Paged Results Search abandoned", 0, NULL); diff --git a/ldap/servers/slapd/pagedresults.c b/ldap/servers/slapd/pagedresults.c index 8c043f6af..dc9db2c60 100644 --- a/ldap/servers/slapd/pagedresults.c +++ b/ldap/servers/slapd/pagedresults.c @@ -34,6 +34,10 @@ pageresult_lock_cleanup() slapi_ch_free((void**)&lock_hash); } +/* Beware to the lock order with c_mutex: + * c_mutex is sometime locked while holding pageresult_lock + * ==> Do not lock pageresult_lock when holing c_mutex + */ pthread_mutex_t * pageresult_lock_get_addr(Connection *conn) { @@ -350,7 +354,7 @@ pagedresults_free_one(Connection *conn, Operation *op, int index) * Used for abandoning - pageresult_lock_get_addr(conn) is already locked in do_abandone. */ int -pagedresults_free_one_msgid_nolock(Connection *conn, ber_int_t msgid) +pagedresults_free_one_msgid(Connection *conn, ber_int_t msgid, pthread_mutex_t *mutex) { int rc = -1; int i; @@ -361,6 +365,7 @@ pagedresults_free_one_msgid_nolock(Connection *conn, ber_int_t msgid) } else { slapi_log_err(SLAPI_LOG_TRACE, "pagedresults_free_one_msgid_nolock", "=> msgid=%d\n", msgid); + pthread_mutex_lock(mutex); for (i = 0; i < conn->c_pagedresults.prl_maxlen; i++) { if (conn->c_pagedresults.prl_list[i].pr_msgid == msgid) { PagedResults *prp = conn->c_pagedresults.prl_list + i; @@ -375,6 +380,7 @@ pagedresults_free_one_msgid_nolock(Connection *conn, ber_int_t msgid) break; } } + pthread_mutex_unlock(mutex); slapi_log_err(SLAPI_LOG_TRACE, "pagedresults_free_one_msgid_nolock", "<= %d\n", rc); } diff --git a/ldap/servers/slapd/proto-slap.h b/ldap/servers/slapd/proto-slap.h index b8f736107..a02605774 100644 --- a/ldap/servers/slapd/proto-slap.h +++ b/ldap/servers/slapd/proto-slap.h @@ -1525,7 +1525,7 @@ int pagedresults_is_timedout_nolock(Connection *conn); int pagedresults_reset_timedout_nolock(Connection *conn); int pagedresults_in_use_nolock(Connection *conn); int pagedresults_free_one(Connection *conn, Operation *op, int index); -int pagedresults_free_one_msgid_nolock(Connection *conn, ber_int_t msgid); +int pagedresults_free_one_msgid(Connection *conn, ber_int_t msgid, pthread_mutex_t *mutex); int op_is_pagedresults(Operation *op); int pagedresults_cleanup_all(Connection *conn, int needlock); void op_set_pagedresults(Operation *op); -- 2.41.0